From 7e28d0b26759846485978ada860ef4a427e06c8f Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 10 Dec 2021 10:48:02 +0530 Subject: drm/i915/adl-n: Enable ADL-N platform Adding PCI device ids and enabling ADL-N platform. ADL-N from i915 point of view is subplatform of ADL-P. BSpec: 68397 Changes since V2: - Added version log history Changes since V1: - replace IS_ALDERLAKE_N with IS_ADLP_N - Jani Nikula Signed-off-by: Tejas Upadhyay Reviewed-by: Anusha Srivatsa Acked-by: Thomas Gleixner Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20211210051802.4063958-1-tejaskumarx.surendrakumar.upadhyay@intel.com --- include/drm/i915_pciids.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index baf3d1d3d566..533890dc9da1 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -666,6 +666,12 @@ INTEL_VGA_DEVICE(0x46C2, info), \ INTEL_VGA_DEVICE(0x46C3, info) +/* ADL-N */ +#define INTEL_ADLN_IDS(info) \ + INTEL_VGA_DEVICE(0x46D0, info), \ + INTEL_VGA_DEVICE(0x46D1, info), \ + INTEL_VGA_DEVICE(0x46D2, info) + /* RPL-S */ #define INTEL_RPLS_IDS(info) \ INTEL_VGA_DEVICE(0xA780, info), \ -- cgit v1.2.3 From 9206a3af4fc0cebbefca2d79876d279bdd8d582b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 17 Dec 2021 13:55:58 +0200 Subject: clk: ti: Move dra7 clock devices out of the legacy section I accidentally added some dra7 clock defines to the legacy section that we want to stop using. Let's move the defines to the right location. Note that this is just a cosmetic fix. Cc: linux-clk@vger.kernel.org Cc: Stephen Boyd Cc: Tero Kristo Acked-by: Rob Herring Signed-off-by: Tony Lindgren --- include/dt-bindings/clock/dra7.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/dra7.h b/include/dt-bindings/clock/dra7.h index 7d57063b8a65..29ff6b895848 100644 --- a/include/dt-bindings/clock/dra7.h +++ b/include/dt-bindings/clock/dra7.h @@ -84,17 +84,10 @@ #define DRA7_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -/* iva clocks */ -#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) - /* dss clocks */ #define DRA7_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -/* gpu clocks */ -#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) - /* l3init clocks */ #define DRA7_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) #define DRA7_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) @@ -267,10 +260,17 @@ #define DRA7_L3INSTR_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_L3INSTR_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) +/* iva clocks */ +#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) +#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) + /* dss clocks */ #define DRA7_DSS_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_DSS_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) +/* gpu clocks */ +#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) + /* l3init clocks */ #define DRA7_L3INIT_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) #define DRA7_L3INIT_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -- cgit v1.2.3 From 825ca9ed1c9f5516b30292bb1c7ab648c2a01b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 21 Dec 2021 21:37:53 +0200 Subject: drm: Always include the debugfs dentry in drm_crtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the counterproductive CONFIG_DEBUG_FS ifdef and just include the debugfs dentry in drm_crtc always. This way we don't need annoying ifdefs in the actual code with DEBUGFS=n. Also we don't have these ifdefs around any of the other debugfs dentries either so can't see why drm_crtc should be special. This fixes the i915 DEBUGFS=n build because I assumed the dentry would always be there. Cc: Jani Nikula Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Fixes: e74c6aa955ca ("drm/i915/fbc: Register per-crtc debugfs files") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211221193754.12287-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula Acked-by: Daniel Vetter --- include/drm/drm_crtc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 13eeba2a750a..4d01b4d89775 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1135,14 +1135,12 @@ struct drm_crtc { */ spinlock_t commit_lock; -#ifdef CONFIG_DEBUG_FS /** * @debugfs_entry: * * Debugfs directory for this CRTC. */ struct dentry *debugfs_entry; -#endif /** * @crc: -- cgit v1.2.3 From c4381d0ee81930097e94e55d1c23f85798ffd093 Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Wed, 12 Jan 2022 10:34:11 -0500 Subject: drm/amdgpu: Add interface to load SRIOV cap FW - Add interface to load SRIOV cap FW. If the FW does not exist, simply skip this FW loading routine. This FW will only be loaded under SRIOV. Other driver configuration will not be affected. By adding this interface, it will make us easier to prepare SRIOV Linux guest driver for different users. - Update sysfs interface to read cap FW version. - Refactor PSP FW loading routine under SRIOV to use a unified SWITCH statement instead of using IF statement - Remove redundant amdgpu_sriov_vf() check in FW loading routine Acked-by: Monk Liu Acked-by: Guchun Chen Signed-off-by: Bokun Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0b94ec7b73e7..be4f9111f478 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -728,6 +728,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_DMCUB 0x14 /* Subquery id: Query TOC firmware version */ #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f -- cgit v1.2.3 From 5298d4bfe80f6ae6ae2777bcd1357b0022d98573 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Jan 2022 07:56:14 +0100 Subject: unicode: clean up the Kconfig symbol confusion Turn the CONFIG_UNICODE symbol into a tristate that generates some always built in code and remove the confusing CONFIG_UNICODE_UTF8_DATA symbol. Note that a lot of the IS_ENABLED() checks could be turned from cpp statements into normal ifs, but this change is intended to be fairly mechanic, so that should be cleaned up later. Fixes: 2b3d04787012 ("unicode: Add utf8-data module") Reported-by: Linus Torvalds Reviewed-by: Eric Biggers Signed-off-by: Christoph Hellwig Signed-off-by: Gabriel Krisman Bertazi --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c8510da6cc6d..fdac22d16c2b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1490,7 +1490,7 @@ struct super_block { #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif -- cgit v1.2.3 From ebb7fb1557b1d03b906b668aa2164b51e6b7d19a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 26 Jan 2022 09:19:20 -0800 Subject: xfs, iomap: limit individual ioend chain lengths in writeback Trond Myklebust reported soft lockups in XFS IO completion such as this: watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [kworker/12:1:3106] CPU: 12 PID: 3106 Comm: kworker/12:1 Not tainted 4.18.0-305.10.2.el8_4.x86_64 #1 Workqueue: xfs-conv/md127 xfs_end_io [xfs] RIP: 0010:_raw_spin_unlock_irqrestore+0x11/0x20 Call Trace: wake_up_page_bit+0x8a/0x110 iomap_finish_ioend+0xd7/0x1c0 iomap_finish_ioends+0x7f/0xb0 xfs_end_ioend+0x6b/0x100 [xfs] xfs_end_io+0xb9/0xe0 [xfs] process_one_work+0x1a7/0x360 worker_thread+0x1fa/0x390 kthread+0x116/0x130 ret_from_fork+0x35/0x40 Ioends are processed as an atomic completion unit when all the chained bios in the ioend have completed their IO. Logically contiguous ioends can also be merged and completed as a single, larger unit. Both of these things can be problematic as both the bio chains per ioend and the size of the merged ioends processed as a single completion are both unbound. If we have a large sequential dirty region in the page cache, write_cache_pages() will keep feeding us sequential pages and we will keep mapping them into ioends and bios until we get a dirty page at a non-sequential file offset. These large sequential runs can will result in bio and ioend chaining to optimise the io patterns. The pages iunder writeback are pinned within these chains until the submission chaining is broken, allowing the entire chain to be completed. This can result in huge chains being processed in IO completion context. We get deep bio chaining if we have large contiguous physical extents. We will keep adding pages to the current bio until it is full, then we'll chain a new bio to keep adding pages for writeback. Hence we can build bio chains that map millions of pages and tens of gigabytes of RAM if the page cache contains big enough contiguous dirty file regions. This long bio chain pins those pages until the final bio in the chain completes and the ioend can iterate all the chained bios and complete them. OTOH, if we have a physically fragmented file, we end up submitting one ioend per physical fragment that each have a small bio or bio chain attached to them. We do not chain these at IO submission time, but instead we chain them at completion time based on file offset via iomap_ioend_try_merge(). Hence we can end up with unbound ioend chains being built via completion merging. XFS can then do COW remapping or unwritten extent conversion on that merged chain, which involves walking an extent fragment at a time and running a transaction to modify the physical extent information. IOWs, we merge all the discontiguous ioends together into a contiguous file range, only to then process them individually as discontiguous extents. This extent manipulation is computationally expensive and can run in a tight loop, so merging logically contiguous but physically discontigous ioends gains us nothing except for hiding the fact the fact we broke the ioends up into individual physical extents at submission and then need to loop over those individual physical extents at completion. Hence we need to have mechanisms to limit ioend sizes and to break up completion processing of large merged ioend chains: 1. bio chains per ioend need to be bound in length. Pure overwrites go straight to iomap_finish_ioend() in softirq context with the exact bio chain attached to the ioend by submission. Hence the only way to prevent long holdoffs here is to bound ioend submission sizes because we can't reschedule in softirq context. 2. iomap_finish_ioends() has to handle unbound merged ioend chains correctly. This relies on any one call to iomap_finish_ioend() being bound in runtime so that cond_resched() can be issued regularly as the long ioend chain is processed. i.e. this relies on mechanism #1 to limit individual ioend sizes to work correctly. 3. filesystems have to loop over the merged ioends to process physical extent manipulations. This means they can loop internally, and so we break merging at physical extent boundaries so the filesystem can easily insert reschedule points between individual extent manipulations. Signed-off-by: Dave Chinner Reported-and-tested-by: Trond Myklebust Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b55bd49e55f5..97a3a2edb585 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -263,9 +263,11 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ + u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ + sector_t io_sector; /* start sector of ioend */ struct bio *io_bio; /* bio being built */ struct bio io_inline_bio; /* MUST BE LAST! */ }; -- cgit v1.2.3 From 8cda7a4f96e435be2fd074009d69521d973d7d31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Jan 2022 17:57:33 -0500 Subject: drm/amdgpu/UAPI: add new CTX OP to get/set stable pstates Add a new CTX ioctl operation to set stable pstates for profiling. When creating traces for tools like RGP or using SPM or doing performance profiling, it's required to enable a special stable profiling power state on the GPU. These profiling states set fixed clocks and disable certain other power features like powergating which may impact the results. Historically, these profiling pstates were enabled via sysfs, but this adds an interface to enable it via the CTX ioctl from the application. Since the power state is global only one application can set it at a time, so if multiple applications try and use it only the first will get it, the ioctl will return -EBUSY for others. The sysfs interface will override whatever has been set by this interface. Mesa MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/207 v2: don't default r = 0; v3: rebase on Evan's PM cleanup Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index be4f9111f478..76b580d10a52 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -206,6 +206,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_FREE_CTX 2 #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -238,10 +240,18 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_PRIORITY_HIGH 512 #define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; - /** For future use, no flags defined so far */ + /** Flags */ __u32 flags; __u32 ctx_id; /** AMDGPU_CTX_PRIORITY_* */ @@ -262,6 +272,11 @@ union drm_amdgpu_ctx_out { /** Reset status since the last call of the ioctl. */ __u32 reset_status; } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; }; union drm_amdgpu_ctx { -- cgit v1.2.3 From d01ffb9eee4af165d83b08dd73ebdf9fe94a519b Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 28 Jan 2022 12:47:16 +0800 Subject: ax25: add refcount in ax25_dev to avoid UAF bugs If we dereference ax25_dev after we call kfree(ax25_dev) in ax25_dev_device_down(), it will lead to concurrency UAF bugs. There are eight syscall functions suffer from UAF bugs, include ax25_bind(), ax25_release(), ax25_connect(), ax25_ioctl(), ax25_getname(), ax25_sendmsg(), ax25_getsockopt() and ax25_info_show(). One of the concurrency UAF can be shown as below: (USE) | (FREE) | ax25_device_event | ax25_dev_device_down ax25_bind | ... ... | kfree(ax25_dev) ax25_fillin_cb() | ... ax25_fillin_cb_from_dev() | ... | The root cause of UAF bugs is that kfree(ax25_dev) in ax25_dev_device_down() is not protected by any locks. When ax25_dev, which there are still pointers point to, is released, the concurrency UAF bug will happen. This patch introduces refcount into ax25_dev in order to guarantee that there are no pointers point to it when ax25_dev is released. Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- include/net/ax25.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/ax25.h b/include/net/ax25.h index 526e49589197..50b417df6221 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -239,6 +239,7 @@ typedef struct ax25_dev { #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_dama_info dama; #endif + refcount_t refcount; } ax25_dev; typedef struct ax25_cb { @@ -293,6 +294,15 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } +#define ax25_dev_hold(__ax25_dev) \ + refcount_inc(&((__ax25_dev)->refcount)) + +static __inline__ void ax25_dev_put(ax25_dev *ax25_dev) +{ + if (refcount_dec_and_test(&ax25_dev->refcount)) { + kfree(ax25_dev); + } +} static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; -- cgit v1.2.3 From 3c75c0ea5da749bd1efebd1387f2e5011b8c7d78 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 19 Jan 2022 16:52:48 +0100 Subject: ASoC: soc-pcm: Fix DPCM lockdep warning due to nested stream locks The recent change for DPCM locking caused spurious lockdep warnings. Actually the warnings are false-positive, as those are triggered due to the nested stream locks for FE and BE. Since both locks belong to the same lock class, lockdep sees it as if a deadlock. For fixing this, we need to take PCM stream locks for BE with the nested lock primitives. Since currently snd_pcm_stream_lock*() helper assumes only the top-level single locking, a new helper function snd_pcm_stream_lock_irqsave_nested() is defined for a single-depth nested lock, which is now used in the BE DAI trigger that is always performed inside a FE stream lock. Fixes: b2ae80663008 ("ASoC: soc-pcm: serialize BE triggers") Reported-and-tested-by: Hans de Goede Reported-and-tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/73018f3c-9769-72ea-0325-b3f8e2381e30@redhat.com Link: https://lore.kernel.org/alsa-devel/9a0abddd-49e9-872d-2f00-a1697340f786@samsung.com Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20220119155249.26754-2-tiwai@suse.de Signed-off-by: Mark Brown --- include/sound/pcm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 33451f8ff755..524220fe1af6 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -614,6 +614,7 @@ void snd_pcm_stream_unlock(struct snd_pcm_substream *substream); void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream); unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream); +unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream); /** * snd_pcm_stream_lock_irqsave - Lock the PCM stream @@ -632,6 +633,20 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, unsigned long flags); +/** + * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking + * @substream: PCM substream + * @flags: irq flags + * + * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with + * the single-depth lockdep subclass. + */ +#define snd_pcm_stream_lock_irqsave_nested(substream, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _snd_pcm_stream_lock_irqsave_nested(substream); \ + } while (0) + /** * snd_pcm_group_for_each_entry - iterate over the linked substreams * @s: the iterator -- cgit v1.2.3 From 06feec6005c9d9500cd286ec440aabf8b2ddd94d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Jan 2022 22:50:39 +0300 Subject: ASoC: hdmi-codec: Fix OOB memory accesses Correct size of iec_status array by changing it to the size of status array of the struct snd_aes_iec958. This fixes out-of-bounds slab read accesses made by memcpy() of the hdmi-codec driver. This problem is reported by KASAN. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20220112195039.1329-1-digetx@gmail.com Signed-off-by: Mark Brown --- include/uapi/sound/asound.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index ff7e638221c5..228279ea0670 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -56,8 +56,10 @@ * * ****************************************************************************/ +#define AES_IEC958_STATUS_SIZE 24 + struct snd_aes_iec958 { - unsigned char status[24]; /* AES/IEC958 channel status bits */ + unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ unsigned char subcode[147]; /* AES/IEC958 subcode bits */ unsigned char pad; /* nothing */ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ -- cgit v1.2.3 From f6c6804c43fa18d3cee64b55490dfbd3bef1363a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 28 Jan 2022 15:40:25 +0000 Subject: kvm: Move KVM_GET_XSAVE2 IOCTL definition at the end of kvm.h This way we can more easily find the next free IOCTL number when adding new IOCTLs. Fixes: be50b2065dfa ("kvm: x86: Add support for getting/setting expanded xstate buffer") Signed-off-by: Janosch Frank Message-Id: <20220128154025.102666-1-frankja@linux.ibm.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b46bcdb0cab1..5191b57e1562 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1624,9 +1624,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) - struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; @@ -2048,4 +2045,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 2af104290da5e4858e8caefa068827d7392c6a09 Mon Sep 17 00:00:00 2001 From: Tomohito Esaki Date: Fri, 28 Jan 2022 15:08:34 +0900 Subject: drm: introduce fb_modifiers_not_supported flag in mode_config If only linear modifier is advertised, since there are many drivers that only linear supported, the DRM core should handle this rather than open-coding in every driver. However, there are legacy drivers such as radeon that do not support modifiers but infer the actual layout of the underlying buffer. Therefore, a new flag fb_modifiers_not_supported is introduced for these legacy drivers, and allow_fb_modifiers is replaced with this new flag. v3: - change the order as follows: 1. add fb_modifiers_not_supported flag 2. add default modifiers 3. remove allow_fb_modifiers flag - add a conditional disable in amdgpu_dm_plane_init() v4: - modify kernel docs v5: - modify kernel docs Signed-off-by: Tomohito Esaki Acked-by: Harry Wentland Reviewed-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220128060836.11216-2-etom@igel.co.jp --- include/drm/drm_mode_config.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 91ca575a78de..4a93dac91cf9 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -933,6 +933,16 @@ struct drm_mode_config { */ bool allow_fb_modifiers; + /** + * @fb_modifiers_not_supported: + * + * When this flag is set, the DRM device will not expose modifier + * support to userspace. This is only used by legacy drivers that infer + * the buffer layout through heuristics without using modifiers. New + * drivers shall not set fhis flag. + */ + bool fb_modifiers_not_supported; + /** * @normalize_zpos: * -- cgit v1.2.3 From 8be576837b6e62b2ad0de2f9ba31cef618fa2891 Mon Sep 17 00:00:00 2001 From: Tomohito Esaki Date: Fri, 28 Jan 2022 15:08:35 +0900 Subject: drm: add support modifiers for drivers whose planes only support linear layout The LINEAR modifier is advertised as default if a driver doesn't specify modifiers. v2: - rebase to the latest master branch (5.16.0+) + "drm/plane: Make format_mod_supported truly optional" patch [1] [1] https://patchwork.freedesktop.org/patch/467940/?series=98255&rev=3 v3: - change the order as follows: 1. add fb_modifiers_not_supported flag 2. add default modifiers 3. remove allow_fb_modifiers flag v5: - change default_modifiers array from non-static to static - remove terminator in default_modifiers array - use ARRAY_SIZE to get the format_modifier_count - update sanity check in plane init func to use the fb_modifiers_not_supported - modify kernel docs Signed-off-by: Tomohito Esaki Reviewed-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220128060836.11216-3-etom@igel.co.jp --- include/drm/drm_plane.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 06759badf99f..2628c7cde2da 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -803,6 +803,9 @@ void *__drmm_universal_plane_alloc(struct drm_device *dev, * * The @drm_plane_funcs.destroy hook must be NULL. * + * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set + * @format_modifiers to NULL. The plane will advertise the linear modifier. + * * Returns: * Pointer to new plane, or ERR_PTR on failure. */ -- cgit v1.2.3 From 3d082157a24216ca084082ce421a37d14ecfcfad Mon Sep 17 00:00:00 2001 From: Tomohito Esaki Date: Fri, 28 Jan 2022 15:08:36 +0900 Subject: drm: remove allow_fb_modifiers The allow_fb_modifiers flag is unnecessary since it has been replaced with fb_modifiers_not_supported flag. v3: - change the order as follows: 1. add fb_modifiers_not_supported flag 2. add default modifiers 3. remove allow_fb_modifiers flag v5: - keep a sanity check in plane init func Signed-off-by: Tomohito Esaki Reviewed-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220128060836.11216-4-etom@igel.co.jp --- include/drm/drm_mode_config.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 4a93dac91cf9..6b5e01295348 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -917,22 +917,6 @@ struct drm_mode_config { */ bool async_page_flip; - /** - * @allow_fb_modifiers: - * - * Whether the driver supports fb modifiers in the ADDFB2.1 ioctl call. - * Note that drivers should not set this directly, it is automatically - * set in drm_universal_plane_init(). - * - * IMPORTANT: - * - * If this is set the driver must fill out the full implicit modifier - * information in their &drm_mode_config_funcs.fb_create hook for legacy - * userspace which does not set modifiers. Otherwise the GETFB2 ioctl is - * broken for modifier aware userspace. - */ - bool allow_fb_modifiers; - /** * @fb_modifiers_not_supported: * -- cgit v1.2.3 From d80976d9ffd9d7f89a26134a299b236910477f3b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 30 Nov 2021 16:27:55 +0100 Subject: dma-resv: some doc polish for iterators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hammer it a bit more in that iterators can be restarted and when that matters, plus suggest to prefer the locked version whenver. Also delete the two leftover kerneldoc for static functions plus sprinkle some more links while at it. v2: Keep some comments (Christian) Reviewed-by: Christian König Signed-off-by: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Link: https://patchwork.freedesktop.org/patch/msgid/20211130152756.1388106-1-daniel.vetter@ffwll.ch --- include/linux/dma-resv.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index a715df97b31a..afdfdfac729f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -153,6 +153,13 @@ struct dma_resv { * struct dma_resv_iter - current position into the dma_resv fences * * Don't touch this directly in the driver, use the accessor function instead. + * + * IMPORTANT + * + * When using the lockless iterators like dma_resv_iter_next_unlocked() or + * dma_resv_for_each_fence_unlocked() beware that the iterator can be restarted. + * Code which accumulates statistics or similar needs to check for this with + * dma_resv_iter_is_restarted(). */ struct dma_resv_iter { /** @obj: The dma_resv object we iterate over */ @@ -243,7 +250,11 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * &dma_resv.lock and using RCU instead. The cursor needs to be initialized * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside * the iterator a reference to the dma_fence is held and the RCU lock dropped. - * When the dma_resv is modified the iteration starts over again. + * + * Beware that the iterator can be restarted when the struct dma_resv for + * @cursor is modified. Code which accumulates statistics or similar needs to + * check for this with dma_resv_iter_is_restarted(). For this reason prefer the + * lock iterator dma_resv_for_each_fence() whenever possible. */ #define dma_resv_for_each_fence_unlocked(cursor, fence) \ for (fence = dma_resv_iter_first_unlocked(cursor); \ -- cgit v1.2.3 From ef9989afda73332df566852d6e9ca695c05f10ce Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 1 Feb 2022 13:29:22 +0000 Subject: kvm: add guest_state_{enter,exit}_irqoff() When transitioning to/from guest mode, it is necessary to inform lockdep, tracing, and RCU in a specific order, similar to the requirements for transitions to/from user mode. Additionally, it is necessary to perform vtime accounting for a window around running the guest, with RCU enabled, such that timer interrupts taken from the guest can be accounted as guest time. Most architectures don't handle all the necessary pieces, and a have a number of common bugs, including unsafe usage of RCU during the window between guest_enter() and guest_exit(). On x86, this was dealt with across commits: 87fa7f3e98a1310e ("x86/kvm: Move context tracking where it belongs") 0642391e2139a2c1 ("x86/kvm/vmx: Add hardirq tracing to guest enter/exit") 9fc975e9efd03e57 ("x86/kvm/svm: Add hardirq tracing on guest enter/exit") 3ebccdf373c21d86 ("x86/kvm/vmx: Move guest enter/exit into .noinstr.text") 135961e0a7d555fc ("x86/kvm/svm: Move guest enter/exit into .noinstr.text") 160457140187c5fb ("KVM: x86: Defer vtime accounting 'til after IRQ handling") bc908e091b326467 ("KVM: x86: Consolidate guest enter/exit logic to common helpers") ... but those fixes are specific to x86, and as the resulting logic (while correct) is split across generic helper functions and x86-specific helper functions, it is difficult to see that the entry/exit accounting is balanced. This patch adds generic helpers which architectures can use to handle guest entry/exit consistently and correctly. The guest_{enter,exit}() helpers are split into guest_timing_{enter,exit}() to perform vtime accounting, and guest_context_{enter,exit}() to perform the necessary context tracking and RCU management. The existing guest_{enter,exit}() heleprs are left as wrappers of these. Atop this, new guest_state_enter_irqoff() and guest_state_exit_irqoff() helpers are added to handle the ordering of lockdep, tracing, and RCU manageent. These are inteneded to mirror exit_to_user_mode() and enter_from_user_mode(). Subsequent patches will migrate architectures over to the new helpers, following a sequence: guest_timing_enter_irqoff(); guest_state_enter_irqoff(); < run the vcpu > guest_state_exit_irqoff(); < take any pending IRQs > guest_timing_exit_irqoff(); This sequences handles all of the above correctly, and more clearly balances the entry and exit portions, making it easier to understand. The existing helpers are marked as deprecated, and will be removed once all architectures have been converted. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Marc Zyngier Reviewed-by: Paolo Bonzini Reviewed-by: Nicolas Saenz Julienne Message-Id: <20220201132926.3301912-2-mark.rutland@arm.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 112 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f079820f52b5..b3810976a27f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -29,7 +29,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -368,8 +370,11 @@ struct kvm_vcpu { u64 last_used_slot_gen; }; -/* must be called with irqs disabled */ -static __always_inline void guest_enter_irqoff(void) +/* + * Start accounting time towards a guest. + * Must be called before entering guest context. + */ +static __always_inline void guest_timing_enter_irqoff(void) { /* * This is running in ioctl context so its safe to assume that it's the @@ -378,7 +383,18 @@ static __always_inline void guest_enter_irqoff(void) instrumentation_begin(); vtime_account_guest_enter(); instrumentation_end(); +} +/* + * Enter guest context and enter an RCU extended quiescent state. + * + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is + * unsafe to use any code which may directly or indirectly use RCU, tracing + * (including IRQ flag tracing), or lockdep. All code in this period must be + * non-instrumentable. + */ +static __always_inline void guest_context_enter_irqoff(void) +{ /* * KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -394,16 +410,79 @@ static __always_inline void guest_enter_irqoff(void) } } -static __always_inline void guest_exit_irqoff(void) +/* + * Deprecated. Architectures should move to guest_timing_enter_irqoff() and + * guest_state_enter_irqoff(). + */ +static __always_inline void guest_enter_irqoff(void) +{ + guest_timing_enter_irqoff(); + guest_context_enter_irqoff(); +} + +/** + * guest_state_enter_irqoff - Fixup state when entering a guest + * + * Entry to a guest will enable interrupts, but the kernel state is interrupts + * disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Tell lockdep that interrupts are enabled + * + * Invoked from architecture specific code before entering a guest. + * Must be called with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke guest_timing_enter_irqoff() before this. + * + * Note: this is analogous to exit_to_user_mode(). + */ +static __always_inline void guest_state_enter_irqoff(void) +{ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + instrumentation_end(); + + guest_context_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + +/* + * Exit guest context and exit an RCU extended quiescent state. + * + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is + * unsafe to use any code which may directly or indirectly use RCU, tracing + * (including IRQ flag tracing), or lockdep. All code in this period must be + * non-instrumentable. + */ +static __always_inline void guest_context_exit_irqoff(void) { context_tracking_guest_exit(); +} +/* + * Stop accounting time towards a guest. + * Must be called after exiting guest context. + */ +static __always_inline void guest_timing_exit_irqoff(void) +{ instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ vtime_account_guest_exit(); instrumentation_end(); } +/* + * Deprecated. Architectures should move to guest_state_exit_irqoff() and + * guest_timing_exit_irqoff(). + */ +static __always_inline void guest_exit_irqoff(void) +{ + guest_context_exit_irqoff(); + guest_timing_exit_irqoff(); +} + static inline void guest_exit(void) { unsigned long flags; @@ -413,6 +492,33 @@ static inline void guest_exit(void) local_irq_restore(flags); } +/** + * guest_state_exit_irqoff - Establish state when returning from guest mode + * + * Entry from a guest disables interrupts, but guest mode is traced as + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + * + * Invoked from architecture specific code after exiting a guest. + * Must be invoked with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke guest_timing_exit_irqoff() after this. + * + * Note: this is analogous to enter_from_user_mode(). + */ +static __always_inline void guest_state_exit_irqoff(void) +{ + lockdep_hardirqs_off(CALLER_ADDR0); + guest_context_exit_irqoff(); + + instrumentation_begin(); + trace_hardirqs_off_finish(); + instrumentation_end(); +} + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* -- cgit v1.2.3 From bee9f65523218e3baeeecde9295c8fbe9bc08e0a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Jan 2022 16:02:50 +0000 Subject: netfs, cachefiles: Add a method to query presence of data in the cache Add a netfs_cache_ops method by which a network filesystem can ask the cache about what data it has available and where so that it can make a multipage read more efficient. Signed-off-by: David Howells cc: linux-cachefs@redhat.com Acked-by: Jeff Layton Reviewed-by: Rohith Surabattula Signed-off-by: Steve French --- include/linux/netfs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b46c39d98bbd..614f22213e21 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -244,6 +244,13 @@ struct netfs_cache_ops { int (*prepare_write)(struct netfs_cache_resources *cres, loff_t *_start, size_t *_len, loff_t i_size, bool no_space_allocated_yet); + + /* Query the occupancy of the cache in a region, returning where the + * next chunk of data starts and how long it is. + */ + int (*query_occupancy)(struct netfs_cache_resources *cres, + loff_t start, size_t len, size_t granularity, + loff_t *_data_start, size_t *_data_len); }; struct readahead_control; -- cgit v1.2.3 From 6d5c900eb64107001e91e1f46bddc254dded8a59 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jan 2022 09:22:41 -0800 Subject: net/mlx5e: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Use struct_group() in struct vlan_ethhdr around members h_dest and h_source, so they can be referenced together. This will allow memcpy() and sizeof() to more easily reason about sizes, improve readability, and avoid future warnings about writing beyond the end of h_dest. "pahole" shows no size nor member offset changes to struct vlan_ethhdr. "objdump -d" shows no object code changes. Fixes: 34802a42b352 ("net/mlx5e: Do not modify the TX SKB") Signed-off-by: Kees Cook Signed-off-by: Saeed Mahameed --- include/linux/if_vlan.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8420fe504927..2be4dd7e90a9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -46,8 +46,10 @@ struct vlan_hdr { * @h_vlan_encapsulated_proto: packet type ID or len */ struct vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; + struct_group(addrs, + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + ); __be16 h_vlan_proto; __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; -- cgit v1.2.3 From ddecd22878601a606d160680fa85802b75d92eb6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 11:34:07 +0100 Subject: perf: uapi: Document perf_event_attr::sig_data truncation on 32 bit architectures Due to the alignment requirements of siginfo_t, as described in 3ddb3fd8cdb0 ("signal, perf: Fix siginfo_t by avoiding u64 on 32-bit architectures"), siginfo_t::si_perf_data is limited to an unsigned long. However, perf_event_attr::sig_data is an u64, to avoid having to deal with compat conversions. Due to being an u64, it may not immediately be clear to users that sig_data is truncated on 32 bit architectures. Add a comment to explicitly point this out, and hopefully help some users save time by not having to deduce themselves what's happening. Reported-by: Dmitry Vyukov Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20220131103407.1971678-3-elver@google.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db..82858b697c05 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -465,6 +465,8 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be + * truncated accordingly on 32 bit architectures. */ __u64 sig_data; }; -- cgit v1.2.3 From 1148836fd3226c20de841084aba24184d4fbbe77 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 2 Feb 2022 14:55:29 +0100 Subject: Revert "fbdev: Garbage collect fbdev scrolling acceleration, part 1 (from TODO list)" This reverts commit b3ec8cdf457e5e63d396fe1346cc788cf7c1b578. Revert the second (of 2) commits which disabled scrolling acceleration in fbcon/fbdev. It introduced a regression for fbdev-supported graphic cards because of the performance penalty by doing screen scrolling by software instead of using the existing graphic card 2D hardware acceleration. Console scrolling acceleration was disabled by dropping code which checked at runtime the driver hardware capabilities for the BINFO_HWACCEL_COPYAREA or FBINFO_HWACCEL_FILLRECT flags and if set, it enabled scrollmode SCROLL_MOVE which uses hardware acceleration to move screen contents. After dropping those checks scrollmode was hard-wired to SCROLL_REDRAW instead, which forces all graphic cards to redraw every character at the new screen position when scrolling. This change effectively disabled all hardware-based scrolling acceleration for ALL drivers, because now all kind of 2D hardware acceleration (bitblt, fillrect) in the drivers isn't used any longer. The original commit message mentions that only 3 DRM drivers (nouveau, omapdrm and gma500) used hardware acceleration in the past and thus code for checking and using scrolling acceleration is obsolete. This statement is NOT TRUE, because beside the DRM drivers there are around 35 other fbdev drivers which depend on fbdev/fbcon and still provide hardware acceleration for fbdev/fbcon. The original commit message also states that syzbot found lots of bugs in fbcon and thus it's "often the solution to just delete code and remove features". This is true, and the bugs - which actually affected all users of fbcon, including DRM - were fixed, or code was dropped like e.g. the support for software scrollback in vgacon (commit 973c096f6a85). So to further analyze which bugs were found by syzbot, I've looked through all patches in drivers/video which were tagged with syzbot or syzkaller back to year 2005. The vast majority fixed the reported issues on a higher level, e.g. when screen is to be resized, or when font size is to be changed. The few ones which touched driver code fixed a real driver bug, e.g. by adding a check. But NONE of those patches touched code of either the SCROLL_MOVE or the SCROLL_REDRAW case. That means, there was no real reason why SCROLL_MOVE had to be ripped-out and just SCROLL_REDRAW had to be used instead. The only reason I can imagine so far was that SCROLL_MOVE wasn't used by DRM and as such it was assumed that it could go away. That argument completely missed the fact that SCROLL_MOVE is still heavily used by fbdev (non-DRM) drivers. Some people mention that using memcpy() instead of the hardware acceleration is pretty much the same speed. But that's not true, at least not for older graphic cards and machines where we see speed decreases by factor 10 and more and thus this change leads to console responsiveness way worse than before. That's why the original commit is to be reverted. By reverting we reintroduce hardware-based scrolling acceleration and fix the performance regression for fbdev drivers. There isn't any impact on DRM when reverting those patches. Signed-off-by: Helge Deller Acked-by: Geert Uytterhoeven Acked-by: Sven Schnelle Cc: stable@vger.kernel.org # v5.16+ Signed-off-by: Helge Deller Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220202135531.92183-2-deller@gmx.de --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 3da95842b207..02f362c661c8 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -262,7 +262,7 @@ struct fb_ops { /* Draws a rectangle */ void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect); - /* Copy data from area to another. Obsolete. */ + /* Copy data from area to another */ void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region); /* Draws a image to the display */ void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image); -- cgit v1.2.3 From c86d86131ab75696fc52d98571148842e067d620 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 2 Feb 2022 06:09:04 +0300 Subject: Partially revert "net/smc: Add netlink net namespace support" The change of sizeof(struct smc_diag_linkinfo) by commit 79d39fc503b4 ("net/smc: Add netlink net namespace support") introduced an ABI regression: since struct smc_diag_lgrinfo contains an object of type "struct smc_diag_linkinfo", offset of all subsequent members of struct smc_diag_lgrinfo was changed by that change. As result, applications compiled with the old version of struct smc_diag_linkinfo will receive garbage in struct smc_diag_lgrinfo.role if the kernel implements this new version of struct smc_diag_linkinfo. Fix this regression by reverting the part of commit 79d39fc503b4 that changes struct smc_diag_linkinfo. After all, there is SMC_GEN_NETLINK interface which is good enough, so there is probably no need to touch the smc_diag ABI in the first place. Fixes: 79d39fc503b4 ("net/smc: Add netlink net namespace support") Signed-off-by: Dmitry V. Levin Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20220202030904.GA9742@altlinux.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/smc_diag.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index c7008d87f1a4..8cb3a6fef553 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,12 +84,11 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ - __aligned_u64 net_cookie; /* RDMA device net namespace */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ }; struct smc_diag_lgrinfo { -- cgit v1.2.3 From e1d2699b96793d19388e302fa095e0da2c145701 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Jan 2022 22:10:52 -0500 Subject: NFS: Avoid duplicate uncached readdir calls on eof If we've reached the end of the directory, then cache that information in the context so that we don't need to do an uncached readdir in order to rediscover that fact. Fixes: 794092c57f89 ("NFS: Do uncached readdir when we're seeking a cookie in an empty page cache") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 02aa49323d1d..68f81d8d36de 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -107,6 +107,7 @@ struct nfs_open_dir_context { __u64 dup_cookie; pgoff_t page_index; signed char duped; + bool eof; }; /* -- cgit v1.2.3 From 2ea88716369ac9a7486a8cb309d6bf1239ea156c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 23 Jan 2022 17:27:47 +0100 Subject: libceph: make recv path in secure mode work the same as send path The recv path of secure mode is intertwined with that of crc mode. While it's slightly more efficient that way (the ciphertext is read into the destination buffer and decrypted in place, thus avoiding two potentially heavy memory allocations for the bounce buffer and the corresponding sg array), it isn't really amenable to changes. Sacrifice that edge and align with the send path which always uses a full-sized bounce buffer (currently there is no other way -- if the kernel crypto API ever grows support for streaming (piecewise) en/decryption for GCM [1], we would be able to easily take advantage of that on both sides). [1] https://lore.kernel.org/all/20141225202830.GA18794@gondor.apana.org.au/ Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- include/linux/ceph/messenger.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index ff99ce094cfa..6c6b6ea52bb8 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -383,6 +383,10 @@ struct ceph_connection_v2_info { struct ceph_gcm_nonce in_gcm_nonce; struct ceph_gcm_nonce out_gcm_nonce; + struct page **in_enc_pages; + int in_enc_page_cnt; + int in_enc_resid; + int in_enc_i; struct page **out_enc_pages; int out_enc_page_cnt; int out_enc_resid; -- cgit v1.2.3 From 038b8d1d1ab1cce11a158d30bf080ff41a2cfd15 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 30 Dec 2021 15:13:32 +0100 Subject: libceph: optionally use bounce buffer on recv path in crc mode Both msgr1 and msgr2 in crc mode are zero copy in the sense that message data is read from the socket directly into the destination buffer. We assume that the destination buffer is stable (i.e. remains unchanged while it is being read to) though. Otherwise, CRC errors ensue: libceph: read_partial_message 0000000048edf8ad data crc 1063286393 != exp. 228122706 libceph: osd1 (1)192.168.122.1:6843 bad crc/signature libceph: bad data crc, calculated 57958023, expected 1805382778 libceph: osd2 (2)192.168.122.1:6876 integrity error, bad crc Introduce rxbounce option to enable use of a bounce buffer when receiving message data. In particular this is needed if a mapped image is a Windows VM disk, passed to QEMU. Windows has a system-wide "dummy" page that may be mapped into the destination buffer (potentially more than once into the same buffer) by the Windows Memory Manager in an effort to generate a single large I/O [1][2]. QEMU makes a point of preserving overlap relationships when cloning I/O vectors, so krbd gets exposed to this behaviour. [1] "What Is Really in That MDL?" https://docs.microsoft.com/en-us/previous-versions/windows/hardware/design/dn614012(v=vs.85) [2] https://blogs.msmvps.com/kernelmustard/2005/05/04/dummy-pages/ URL: https://bugzilla.redhat.com/show_bug.cgi?id=1973317 Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- include/linux/ceph/libceph.h | 1 + include/linux/ceph/messenger.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 6a89ea410e43..edf62eaa6285 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -35,6 +35,7 @@ #define CEPH_OPT_TCP_NODELAY (1<<4) /* TCP_NODELAY on TCP sockets */ #define CEPH_OPT_NOMSGSIGN (1<<5) /* don't sign msgs (msgr1) */ #define CEPH_OPT_ABORT_ON_FULL (1<<6) /* abort w/ ENOSPC when full */ +#define CEPH_OPT_RXBOUNCE (1<<7) /* double-buffer read data */ #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 6c6b6ea52bb8..e7f2fb2fc207 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -461,6 +461,7 @@ struct ceph_connection { struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ + struct page *bounce_page; u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ -- cgit v1.2.3 From bfb1a7c91fb7758273b4a8d735313d9cc388b502 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 2 Feb 2022 12:55:53 -0800 Subject: x86/bug: Merge annotate_reachable() into _BUG_FLAGS() asm In __WARN_FLAGS(), we had two asm statements (abbreviated): asm volatile("ud2"); asm volatile(".pushsection .discard.reachable"); These pair of statements are used to trigger an exception, but then help objtool understand that for warnings, control flow will be restored immediately afterwards. The problem is that volatile is not a compiler barrier. GCC explicitly documents this: > Note that the compiler can move even volatile asm instructions > relative to other code, including across jump instructions. Also, no clobbers are specified to prevent instructions from subsequent statements from being scheduled by compiler before the second asm statement. This can lead to instructions from subsequent statements being emitted by the compiler before the second asm statement. Providing a scheduling model such as via -march= options enables the compiler to better schedule instructions with known latencies to hide latencies from data hazards compared to inline asm statements in which latencies are not estimated. If an instruction gets scheduled by the compiler between the two asm statements, then objtool will think that it is not reachable, producing a warning. To prevent instructions from being scheduled in between the two asm statements, merge them. Also remove an unnecessary unreachable() asm annotation from BUG() in favor of __builtin_unreachable(). objtool is able to track that the ud2 from BUG() terminates control flow within the function. Link: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile Link: https://github.com/ClangBuiltLinux/linux/issues/1483 Signed-off-by: Nick Desaulniers Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220202205557.2260694-1-ndesaulniers@google.com --- include/linux/compiler.h | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 429dcebe2b99..0f7fd205ab7e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -117,14 +117,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __stringify_label(n) #n -#define __annotate_reachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ -}) -#define annotate_reachable() __annotate_reachable(__COUNTER__) - #define __annotate_unreachable(c) ({ \ asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ @@ -133,24 +125,21 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) -#define ASM_UNREACHABLE \ - "999:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long 999b - .\n\t" \ +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ ".popsection\n\t" /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table") #else -#define annotate_reachable() #define annotate_unreachable() +# define ASM_REACHABLE #define __annotate_jump_table #endif -#ifndef ASM_UNREACHABLE -# define ASM_UNREACHABLE -#endif #ifndef unreachable # define unreachable() do { \ annotate_unreachable(); \ -- cgit v1.2.3 From 4a81f6da9cb2d1ef911131a6fd8bd15cb61fc772 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 1 Feb 2022 20:39:42 +0100 Subject: net, neigh: Do not trigger immediate probes on NUD_FAILED from neigh_managed_work syzkaller was able to trigger a deadlock for NTF_MANAGED entries [0]: kworker/0:16/14617 is trying to acquire lock: ffffffff8d4dd370 (&tbl->lock){++-.}-{2:2}, at: ___neigh_create+0x9e1/0x2990 net/core/neighbour.c:652 [...] but task is already holding lock: ffffffff8d4dd370 (&tbl->lock){++-.}-{2:2}, at: neigh_managed_work+0x35/0x250 net/core/neighbour.c:1572 The neighbor entry turned to NUD_FAILED state, where __neigh_event_send() triggered an immediate probe as per commit cd28ca0a3dd1 ("neigh: reduce arp latency") via neigh_probe() given table lock was held. One option to fix this situation is to defer the neigh_probe() back to the neigh_timer_handler() similarly as pre cd28ca0a3dd1. For the case of NTF_MANAGED, this deferral is acceptable given this only happens on actual failure state and regular / expected state is NUD_VALID with the entry already present. The fix adds a parameter to __neigh_event_send() in order to communicate whether immediate probe is allowed or disallowed. Existing call-sites of neigh_event_send() default as-is to immediate probe. However, the neigh_managed_work() disables it via use of neigh_event_send_probe(). [0] __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2956 [inline] check_deadlock kernel/locking/lockdep.c:2999 [inline] validate_chain kernel/locking/lockdep.c:3788 [inline] __lock_acquire.cold+0x149/0x3ab kernel/locking/lockdep.c:5027 lock_acquire kernel/locking/lockdep.c:5639 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5604 __raw_write_lock_bh include/linux/rwlock_api_smp.h:202 [inline] _raw_write_lock_bh+0x2f/0x40 kernel/locking/spinlock.c:334 ___neigh_create+0x9e1/0x2990 net/core/neighbour.c:652 ip6_finish_output2+0x1070/0x14f0 net/ipv6/ip6_output.c:123 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline] __ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170 ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224 dst_output include/net/dst.h:451 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ndisc_send_skb+0xa99/0x17f0 net/ipv6/ndisc.c:508 ndisc_send_ns+0x3a9/0x840 net/ipv6/ndisc.c:650 ndisc_solicit+0x2cd/0x4f0 net/ipv6/ndisc.c:742 neigh_probe+0xc2/0x110 net/core/neighbour.c:1040 __neigh_event_send+0x37d/0x1570 net/core/neighbour.c:1201 neigh_event_send include/net/neighbour.h:470 [inline] neigh_managed_work+0x162/0x250 net/core/neighbour.c:1574 process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307 worker_thread+0x657/0x1110 kernel/workqueue.c:2454 kthread+0x2e9/0x3a0 kernel/kthread.c:377 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Fixes: 7482e3841d52 ("net, neigh: Add NTF_MANAGED flag for managed neighbor entries") Reported-by: syzbot+5239d0e1778a500d477a@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Roopa Prabhu Tested-by: syzbot+5239d0e1778a500d477a@syzkaller.appspotmail.com Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220201193942.5055-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 937389e04c8e..87419f7f5421 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -350,7 +350,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); @@ -460,17 +461,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) refcount_inc(&(n)->refcnt) -static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +static __always_inline int neigh_event_send_probe(struct neighbour *neigh, + struct sk_buff *skb, + const bool immediate_ok) { unsigned long now = jiffies; - + if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); - if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) - return __neigh_event_send(neigh, skb); + if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + return __neigh_event_send(neigh, skb, immediate_ok); return 0; } +static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + return neigh_event_send_probe(neigh, skb, true); +} + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { -- cgit v1.2.3 From 4564661af6ee321942ec1ab012191d7adedd3e00 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 30 Jan 2022 11:17:05 -0800 Subject: xen: xenbus_dev.h: delete incorrect file name It is better/preferred not to include file names in source files because (a) they are not needed and (b) they can be incorrect, so just delete this incorrect file name. Signed-off-by: Randy Dunlap Reviewed-by: Juergen Gross Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Cc: xen-devel@lists.xenproject.org Link: https://lore.kernel.org/r/20220130191705.24971-1-rdunlap@infradead.org Signed-off-by: Juergen Gross --- include/xen/xenbus_dev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/xen/xenbus_dev.h b/include/xen/xenbus_dev.h index bbee8c6a349d..4dc45a51c044 100644 --- a/include/xen/xenbus_dev.h +++ b/include/xen/xenbus_dev.h @@ -1,6 +1,4 @@ /****************************************************************************** - * evtchn.h - * * Interface to /dev/xen/xenbus_backend. * * Copyright (c) 2011 Bastian Blank -- cgit v1.2.3 From 164666fa66669d437bdcc8d5f1744a2aee73be41 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Mon, 31 Jan 2022 12:23:07 -0500 Subject: Improve docs for IOCTL_GNTDEV_MAP_GRANT_REF --------------cKY3Ggs6VDUCSn4I6iN78sHA Content-Type: multipart/mixed; boundary="------------g0T69ASidFiPhh4eOY4XzIg1" --------------g0T69ASidFiPhh4eOY4XzIg1 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable The current implementation of gntdev guarantees that the first call to IOCTL_GNTDEV_MAP_GRANT_REF will set @index to 0. This is required to use gntdev for Wayland, which is a future desire of Qubes OS. Additionally, requesting zero grants results in an error, but this was not documented either. Document both of these. Signed-off-by: Demi Marie Obenour Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/f66c5a4e-2034-00b5-a635-6983bd999c07@gmail.com Signed-off-by: Juergen Gross --- include/uapi/xen/gntdev.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h index 9ac5515b9bc2..7a7145395c09 100644 --- a/include/uapi/xen/gntdev.h +++ b/include/uapi/xen/gntdev.h @@ -47,7 +47,13 @@ struct ioctl_gntdev_grant_ref { /* * Inserts the grant references into the mapping table of an instance * of gntdev. N.B. This does not perform the mapping, which is deferred - * until mmap() is called with @index as the offset. + * until mmap() is called with @index as the offset. @index should be + * considered opaque to userspace, with one exception: if no grant + * references have ever been inserted into the mapping table of this + * instance, @index will be set to 0. This is necessary to use gntdev + * with userspace APIs that expect a file descriptor that can be + * mmap()'d at offset 0, such as Wayland. If @count is set to 0, this + * ioctl will fail. */ #define IOCTL_GNTDEV_MAP_GRANT_REF \ _IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) -- cgit v1.2.3 From dca384a3bf5af1c781cfa6aec63904bdb5018c36 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 2 Feb 2022 10:43:40 +0100 Subject: drm/connector: Fix typo in documentation Commit 4adc33f36d80 ("drm/edid: Split deep color modes between RGB and YUV444") introduced two new variables in struct drm_display_info and their documentation, but the documentation part had a typo resulting in a doc build warning. Fixes: 4adc33f36d80 ("drm/edid: Split deep color modes between RGB and YUV444") Reported-by: Stephen Rothwell Signed-off-by: Maxime Ripard Reviewed-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20220202094340.875190-1-maxime@cerno.tech --- include/drm/drm_connector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 64cf5f88c05b..5e36eb3df66f 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -592,13 +592,13 @@ struct drm_display_info { bool rgb_quant_range_selectable; /** - * @edid_hdmi_dc_rgb444_modes: Mask of supported hdmi deep color modes + * @edid_hdmi_rgb444_dc_modes: Mask of supported hdmi deep color modes * in RGB 4:4:4. Even more stuff redundant with @bus_formats. */ u8 edid_hdmi_rgb444_dc_modes; /** - * @edid_hdmi_dc_ycbcr444_modes: Mask of supported hdmi deep color + * @edid_hdmi_ycbcr444_dc_modes: Mask of supported hdmi deep color * modes in YCbCr 4:4:4. Even more stuff redundant with @bus_formats. */ u8 edid_hdmi_ycbcr444_dc_modes; -- cgit v1.2.3 From e85c81ba8859a4c839bcd69c5d83b32954133a5b Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Mon, 17 Jan 2022 17:36:54 +0800 Subject: ext4: fast commit may not fallback for ineligible commit For the follow scenario: 1. jbd start commit transaction n 2. task A get new handle for transaction n+1 3. task A do some ineligible actions and mark FC_INELIGIBLE 4. jbd complete transaction n and clean FC_INELIGIBLE 5. task A call fsync In this case fast commit will not fallback to full commit and transaction n+1 also not handled by jbd. Make ext4_fc_mark_ineligible() also record transaction tid for latest ineligible case, when call ext4_fc_cleanup() check current transaction tid, if small than latest ineligible tid do not clear the EXT4_MF_FC_INELIGIBLE. Reported-by: kernel test robot Reported-by: Dan Carpenter Reported-by: Ritesh Harjani Suggested-by: Harshad Shirwadkar Signed-off-by: Xin Yin Link: https://lore.kernel.org/r/20220117093655.35160-2-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd933c45281a..d63b8106796e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1295,7 +1295,7 @@ struct journal_s * Clean-up after fast commit or full commit. JBD2 calls this function * after every commit operation. */ - void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); /** * @j_fc_replay_callback: -- cgit v1.2.3 From 3ca40c0d329113a9f76f6aa01abe73d9f16ace9d Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Mon, 17 Jan 2022 17:41:50 +0530 Subject: jbd2: cleanup unused functions declarations from jbd2.h During code review found no references of few of these below function declarations. This patch cleans those up from jbd2.h Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/30d1fc327becda197a4136cf9cdc73d9baa3b7b9.1642416995.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d63b8106796e..afc5572e7b8a 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1419,9 +1419,7 @@ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_free_buffer(struct journal_head *bh); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_clean_data_list(transaction_t *transaction); static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) { list_add_tail(&bh->b_assoc_buffers, head); @@ -1486,9 +1484,6 @@ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct buffer_head **bh_out, sector_t blocknr); -/* Transaction locking */ -extern void __wait_on_journal (journal_t *); - /* Transaction cache support */ extern void jbd2_journal_destroy_transaction_cache(void); extern int __init jbd2_journal_init_transaction_cache(void); @@ -1774,8 +1769,6 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 -extern int jbd_blocks_per_page(struct inode *inode); - /* JBD uses a CRC32 checksum */ #define JBD_MAX_CHECKSUM_SIZE 4 -- cgit v1.2.3 From 4f98186848707f530669238d90e0562d92a78aab Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Mon, 17 Jan 2022 17:41:51 +0530 Subject: jbd2: refactor wait logic for transaction updates into a common function No functionality change as such in this patch. This only refactors the common piece of code which waits for t_updates to finish into a common function named as jbd2_journal_wait_updates(journal_t *) Signed-off-by: Ritesh Harjani Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/8c564f70f4b2591171677a2a74fccb22a7b6c3a4.1642416995.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index afc5572e7b8a..9c3ada74ffb1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -594,7 +594,7 @@ struct transaction_s */ unsigned long t_log_start; - /* + /* * Number of buffers on the t_buffers list [j_list_lock, no locks * needed for jbd2 thread] */ @@ -1538,6 +1538,8 @@ extern int jbd2_journal_flush(journal_t *journal, unsigned int flags); extern void jbd2_journal_lock_updates (journal_t *); extern void jbd2_journal_unlock_updates (journal_t *); +void jbd2_journal_wait_updates(journal_t *); + extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, unsigned long long start, int len, int bsize); -- cgit v1.2.3 From 67d6212afda218d564890d1674bab28e8612170f Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Thu, 27 Jan 2022 15:39:53 -0800 Subject: Revert "module, async: async_synchronize_full() on module init iff async is used" This reverts commit 774a1221e862b343388347bac9b318767336b20b. We need to finish all async code before the module init sequence is done. In the reverted commit the PF_USED_ASYNC flag was added to mark a thread that called async_schedule(). Then the PF_USED_ASYNC flag was used to determine whether or not async_synchronize_full() needs to be invoked. This works when modprobe thread is calling async_schedule(), but it does not work if module dispatches init code to a worker thread which then calls async_schedule(). For example, PCI driver probing is invoked from a worker thread based on a node where device is attached: if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, &ddi); else error = local_pci_probe(&ddi); We end up in a situation where a worker thread gets the PF_USED_ASYNC flag set instead of the modprobe thread. As a result, async_synchronize_full() is not invoked and modprobe completes without waiting for the async code to finish. The issue was discovered while loading the pm80xx driver: (scsi_mod.scan=async) modprobe pm80xx worker ... do_init_module() ... pci_call_probe() work_on_cpu(local_pci_probe) local_pci_probe() pm8001_pci_probe() scsi_scan_host() async_schedule() worker->flags |= PF_USED_ASYNC; ... < return from worker > ... if (current->flags & PF_USED_ASYNC) <--- false async_synchronize_full(); Commit 21c3c5d28007 ("block: don't request module during elevator init") fixed the deadlock issue which the reverted commit 774a1221e862 ("module, async: async_synchronize_full() on module init iff async is used") tried to fix. Since commit 0fdff3ec6d87 ("async, kmod: warn on synchronous request_module() from async workers") synchronous module loading from async is not allowed. Given that the original deadlock issue is fixed and it is no longer allowed to call synchronous request_module() from async we can remove PF_USED_ASYNC flag to make module init consistently invoke async_synchronize_full() unless async module probe is requested. Signed-off-by: Igor Pylypiv Reviewed-by: Changyuan Lyu Reviewed-by: Luis Chamberlain Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f5b2be39a78c..75ba8aa60248 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1680,7 +1680,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ -- cgit v1.2.3 From 87563a043cef044fed5db7967a75741cc16ad2b1 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 3 Feb 2022 23:08:11 +0800 Subject: ax25: fix reference count leaks of ax25_dev The previous commit d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs") introduces refcount into ax25_dev, but there are reference leak paths in ax25_ctl_ioctl(), ax25_fwd_ioctl(), ax25_rt_add(), ax25_rt_del() and ax25_rt_opt(). This patch uses ax25_dev_put() and adjusts the position of ax25_addr_ax25dev() to fix reference cout leaks of ax25_dev. Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs") Signed-off-by: Duoming Zhou Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20220203150811.42256-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- include/net/ax25.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ax25.h b/include/net/ax25.h index 50b417df6221..8221af1811df 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -294,10 +294,12 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } -#define ax25_dev_hold(__ax25_dev) \ - refcount_inc(&((__ax25_dev)->refcount)) +static inline void ax25_dev_hold(ax25_dev *ax25_dev) +{ + refcount_inc(&ax25_dev->refcount); +} -static __inline__ void ax25_dev_put(ax25_dev *ax25_dev) +static inline void ax25_dev_put(ax25_dev *ax25_dev) { if (refcount_dec_and_test(&ax25_dev->refcount)) { kfree(ax25_dev); -- cgit v1.2.3 From d1ca60efc53d665cf89ed847a14a510a81770b81 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Feb 2022 12:00:56 +0100 Subject: netfilter: ctnetlink: disable helper autoassign When userspace, e.g. conntrackd, inserts an entry with a specified helper, its possible that the helper is lost immediately after its added: ctnetlink_create_conntrack -> nf_ct_helper_ext_add + assign helper -> ctnetlink_setup_nat -> ctnetlink_parse_nat_setup -> parse_nat_setup -> nfnetlink_parse_nat_setup -> nf_nat_setup_info -> nf_conntrack_alter_reply -> __nf_ct_try_assign_helper ... and __nf_ct_try_assign_helper will zero the helper again. Set IPS_HELPER bit to bypass auto-assign logic, its unwanted, just like when helper is assigned via ruleset. Dropped old 'not strictly necessary' comment, it referred to use of rcu_assign_pointer() before it got replaced by RCU_INIT_POINTER(). NB: Fixes tag intentionally incorrect, this extends the referenced commit, but this change won't build without IPS_HELPER introduced there. Fixes: 6714cf5465d280 ("netfilter: nf_conntrack: fix explicit helper attachment and NAT") Reported-by: Pham Thanh Tuyen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 4b3395082d15..26071021e986 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -106,7 +106,7 @@ enum ip_conntrack_status { IPS_NAT_CLASH = IPS_UNTRACKED, #endif - /* Conntrack got a helper explicitly attached via CT target. */ + /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), -- cgit v1.2.3 From ac9f0c810684a1b161c18eb4b91ce84cbc13c91d Mon Sep 17 00:00:00 2001 From: Anton Lundin Date: Thu, 3 Feb 2022 10:41:35 +0100 Subject: ata: libata-core: Introduce ATA_HORKAGE_NO_LOG_DIR horkage 06f6c4c6c3e8 ("ata: libata: add missing ata_identify_page_supported() calls") introduced additional calls to ata_identify_page_supported(), thus also adding indirectly accesses to the device log directory log page through ata_log_supported(). Reading this log page causes SATADOM-ML 3ME devices to lock up. Introduce the horkage flag ATA_HORKAGE_NO_LOG_DIR to prevent accesses to the log directory in ata_log_supported() and add a blacklist entry with this flag for "SATADOM-ML 3ME" devices. Fixes: 636f6e2af4fb ("libata: add horkage for missing Identify Device log") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Anton Lundin Signed-off-by: Damien Le Moal --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 605756f645be..7f99b4d78822 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -380,6 +380,7 @@ enum { ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ + ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 80110bbfbba6f0078d5a1cbc8df004506db8ffe5 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 3 Feb 2022 20:49:24 -0800 Subject: mm/page_table_check: check entries at pmd levels syzbot detected a case where the page table counters were not properly updated. syzkaller login: ------------[ cut here ]------------ kernel BUG at mm/page_table_check.c:162! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 3099 Comm: pasha Not tainted 5.16.0+ #48 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIO4 RIP: 0010:__page_table_check_zero+0x159/0x1a0 Call Trace: free_pcp_prepare+0x3be/0xaa0 free_unref_page+0x1c/0x650 free_compound_page+0xec/0x130 free_transhuge_page+0x1be/0x260 __put_compound_page+0x90/0xd0 release_pages+0x54c/0x1060 __pagevec_release+0x7c/0x110 shmem_undo_range+0x85e/0x1250 ... The repro involved having a huge page that is split due to uprobe event temporarily replacing one of the pages in the huge page. Later the huge page was combined again, but the counters were off, as the PTE level was not properly updated. Make sure that when PMD is cleared and prior to freeing the level the PTEs are updated. Link: https://lkml.kernel.org/r/20220131203249.2832273-5-pasha.tatashin@soleen.com Fixes: df4e817b7108 ("mm: page table check") Signed-off-by: Pasha Tatashin Acked-by: David Rientjes Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Dave Hansen Cc: Greg Thelen Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jiri Slaby Cc: Mike Rapoport Cc: Muchun Song Cc: Paul Turner Cc: Wei Xu Cc: Will Deacon Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_table_check.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 38cace1da7b6..01e16c7696ec 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -26,6 +26,9 @@ void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud); +void __page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd); static inline void page_table_check_alloc(struct page *page, unsigned int order) { @@ -100,6 +103,16 @@ static inline void page_table_check_pud_set(struct mm_struct *mm, __page_table_check_pud_set(mm, addr, pudp, pud); } +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_clear_range(mm, addr, pmd); +} + #else static inline void page_table_check_alloc(struct page *page, unsigned int order) @@ -143,5 +156,11 @@ static inline void page_table_check_pud_set(struct mm_struct *mm, { } +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ +} + #endif /* CONFIG_PAGE_TABLE_CHECK */ #endif /* __LINUX_PAGE_TABLE_CHECK_H */ -- cgit v1.2.3 From 314c459a6fe0957b5885fbc65c53d51444092880 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 3 Feb 2022 20:49:29 -0800 Subject: mm/pgtable: define pte_index so that preprocessor could recognize it Since commit 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") pte_index is a static inline and there is no define for it that can be recognized by the preprocessor. As a result, vm_insert_pages() uses slower loop over vm_insert_page() instead of insert_pages() that amortizes the cost of spinlock operations when inserting multiple pages. Link: https://lkml.kernel.org/r/20220111145457.20748-1-rppt@kernel.org Fixes: 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") Signed-off-by: Mike Rapoport Reported-by: Christian Dietrich Reviewed-by: Khalid Aziz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pgtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index bc8713a76e03..f4f4077b97aa 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } +#define pte_index pte_index #ifndef pmd_index static inline unsigned long pmd_index(unsigned long address) -- cgit v1.2.3 From d2a02e3c8bb6b347818518edff5a4b40ff52d6d8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 19 Jan 2022 14:35:06 +0100 Subject: lib/crypto: blake2s: avoid indirect calls to compression function for Clang CFI blake2s_compress_generic is weakly aliased by blake2s_compress. The current harness for function selection uses a function pointer, which is ordinarily inlined and resolved at compile time. But when Clang's CFI is enabled, CFI still triggers when making an indirect call via a weak symbol. This seems like a bug in Clang's CFI, as though it's bucketing weak symbols and strong symbols differently. It also only seems to trigger when "full LTO" mode is used, rather than "thin LTO". [ 0.000000][ T0] Kernel panic - not syncing: CFI failure (target: blake2s_compress_generic+0x0/0x1444) [ 0.000000][ T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-mainline-06981-g076c855b846e #1 [ 0.000000][ T0] Hardware name: MT6873 (DT) [ 0.000000][ T0] Call trace: [ 0.000000][ T0] dump_backtrace+0xfc/0x1dc [ 0.000000][ T0] dump_stack_lvl+0xa8/0x11c [ 0.000000][ T0] panic+0x194/0x464 [ 0.000000][ T0] __cfi_check_fail+0x54/0x58 [ 0.000000][ T0] __cfi_slowpath_diag+0x354/0x4b0 [ 0.000000][ T0] blake2s_update+0x14c/0x178 [ 0.000000][ T0] _extract_entropy+0xf4/0x29c [ 0.000000][ T0] crng_initialize_primary+0x24/0x94 [ 0.000000][ T0] rand_initialize+0x2c/0x6c [ 0.000000][ T0] start_kernel+0x2f8/0x65c [ 0.000000][ T0] __primary_switched+0xc4/0x7be4 [ 0.000000][ T0] Rebooting in 5 seconds.. Nonetheless, the function pointer method isn't so terrific anyway, so this patch replaces it with a simple boolean, which also gets inlined away. This successfully works around the Clang bug. In general, I'm not too keen on all of the indirection involved here; it clearly does more harm than good. Hopefully the whole thing can get cleaned up down the road when lib/crypto is overhauled more comprehensively. But for now, we go with a simple bandaid. Fixes: 6048fdcc5f26 ("lib/crypto: blake2s: include as built-in") Link: https://github.com/ClangBuiltLinux/linux/issues/1567 Reported-by: Miles Chen Tested-by: Miles Chen Tested-by: Nathan Chancellor Tested-by: John Stultz Acked-by: Nick Desaulniers Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- include/crypto/internal/blake2s.h | 40 ++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index d39cfa0d333e..52363eee2b20 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) state->f[0] = -1; } -typedef void (*blake2s_compress_t)(struct blake2s_state *state, - const u8 *block, size_t nblocks, u32 inc); - /* Helper functions for BLAKE2s shared by the library and shash APIs */ -static inline void __blake2s_update(struct blake2s_state *state, - const u8 *in, size_t inlen, - blake2s_compress_t compress) +static __always_inline void +__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, + bool force_generic) { const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; @@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state, return; if (inlen > fill) { memcpy(state->buf + state->buflen, in, fill); - (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); state->buflen = 0; in += fill; inlen -= fill; @@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state, if (inlen > BLAKE2S_BLOCK_SIZE) { const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); /* Hash one less (full) block than strictly possible */ - (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); } @@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state, state->buflen += inlen; } -static inline void __blake2s_final(struct blake2s_state *state, u8 *out, - blake2s_compress_t compress) +static __always_inline void +__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic) { blake2s_set_lastblock(state); memset(state->buf + state->buflen, 0, BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - (*compress)(state, state->buf, 1, state->buflen); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, state->buflen); + else + blake2s_compress(state, state->buf, 1, state->buflen); cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); memcpy(out, state->h, state->outlen); } @@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc) static inline int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, unsigned int inlen, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_update(state, in, inlen, compress); + __blake2s_update(state, in, inlen, force_generic); return 0; } static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_final(state, out, compress); + __blake2s_final(state, out, force_generic); return 0; } -- cgit v1.2.3 From a3574119826d9a4ef807fb973cf5150c3b90da43 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 3 Feb 2022 13:38:06 +0000 Subject: drm: document struct drm_mode_fb_cmd2 Follow-up for the DRM_IOCTL_MODE_GETFB2 docs. v2: (Daniel Stone) - Replace fourcc.org with drm_fourcc.h because this is the authoritative source and the website may have mismatches. - Drop assumption that offsets will generally be 0. - Mention that unused entries must be zero'ed out. v3: (Pekka) - Mention that a handle can be re-used - Add unit for pitches/offsets Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Acked-by: Pekka Paalanen Cc: Daniel Stone Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20220203133753.261507-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 88 ++++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index e1e351682872..0a0d56a6158e 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -663,41 +663,73 @@ struct drm_mode_fb_cmd { #define DRM_MODE_FB_INTERLACED (1<<0) /* for interlaced framebuffers */ #define DRM_MODE_FB_MODIFIERS (1<<1) /* enables ->modifer[] */ +/** + * struct drm_mode_fb_cmd2 - Frame-buffer metadata. + * + * This struct holds frame-buffer metadata. There are two ways to use it: + * + * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2 + * ioctl to register a new frame-buffer. The new frame-buffer object ID will + * be set by the kernel in @fb_id. + * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to + * fetch metadata about an existing frame-buffer. + * + * In case of planar formats, this struct allows up to 4 buffer objects with + * offsets and pitches per plane. The pitch and offset order is dictated by the + * format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as: + * + * YUV 4:2:0 image with a plane of 8 bit Y samples followed by an + * interleaved U/V plane containing 8 bit 2x2 subsampled colour difference + * samples. + * + * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at + * ``offsets[1]``. + * + * To accommodate tiled, compressed, etc formats, a modifier can be specified. + * For more information see the "Format Modifiers" section. Note that even + * though it looks like we have a modifier per-plane, we in fact do not. The + * modifier for each plane must be identical. Thus all combinations of + * different data layouts for multi-plane formats must be enumerated as + * separate modifiers. + * + * All of the entries in @handles, @pitches, @offsets and @modifier must be + * zero when unused. Warning, for @offsets and @modifier zero can't be used to + * figure out whether the entry is used or not since it's a valid value (a zero + * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR). + */ struct drm_mode_fb_cmd2 { + /** @fb_id: Object ID of the frame-buffer. */ __u32 fb_id; + /** @width: Width of the frame-buffer. */ __u32 width; + /** @height: Height of the frame-buffer. */ __u32 height; - __u32 pixel_format; /* fourcc code from drm_fourcc.h */ - __u32 flags; /* see above flags */ + /** + * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in + * ``drm_fourcc.h``. + */ + __u32 pixel_format; + /** + * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and + * &DRM_MODE_FB_MODIFIERS). + */ + __u32 flags; - /* - * In case of planar formats, this ioctl allows up to 4 - * buffer objects with offsets and pitches per plane. - * The pitch and offset order is dictated by the fourcc, - * e.g. NV12 (https://fourcc.org/yuv.php#NV12) is described as: - * - * YUV 4:2:0 image with a plane of 8 bit Y samples - * followed by an interleaved U/V plane containing - * 8 bit 2x2 subsampled colour difference samples. - * - * So it would consist of Y as offsets[0] and UV as - * offsets[1]. Note that offsets[0] will generally - * be 0 (but this is not required). - * - * To accommodate tiled, compressed, etc formats, a - * modifier can be specified. The default value of zero - * indicates "native" format as specified by the fourcc. - * Vendor specific modifier token. Note that even though - * it looks like we have a modifier per-plane, we in fact - * do not. The modifier for each plane must be identical. - * Thus all combinations of different data layouts for - * multi plane formats must be enumerated as separate - * modifiers. + /** + * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is + * unused. The same handle can be used for multiple planes. */ __u32 handles[4]; - __u32 pitches[4]; /* pitch for each plane */ - __u32 offsets[4]; /* offset of each plane */ - __u64 modifier[4]; /* ie, tiling, compress */ + /** @pitches: Pitch (aka. stride) in bytes, one per plane. */ + __u32 pitches[4]; + /** @offsets: Offset into the buffer in bytes, one per plane. */ + __u32 offsets[4]; + /** + * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*`` + * constants in ``drm_fourcc.h``. All planes must use the same + * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags. + */ + __u64 modifier[4]; }; #define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 -- cgit v1.2.3 From fda17afc6166e975bec1197bd94cd2a3317bce3f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 7 Feb 2022 11:27:53 +0900 Subject: ata: libata-core: Fix ata_dev_config_cpr() The concurrent positioning ranges log page 47h is a general purpose log page and not a subpage of the indentify device log. Using ata_identify_page_supported() to test for concurrent positioning ranges support is thus wrong. ata_log_supported() must be used. Furthermore, unlike other advanced ATA features (e.g. NCQ priority), accesses to the concurrent positioning ranges log page are not gated by a feature bit from the device IDENTIFY data. Since many older drives react badly to the READ LOG EXT and/or READ LOG DMA EXT commands isued to read device log pages, avoid problems with older drives by limiting the concurrent positioning ranges support detection to drives implementing at least the ACS-4 ATA standard (major version 11). This additional condition effectively turns ata_dev_config_cpr() into a nop for older drives, avoiding problems in the field. Fixes: fe22e1c2f705 ("libata: support concurrent positioning ranges log") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215519 Cc: stable@vger.kernel.org Reviewed-by: Hannes Reinecke Tested-by: Abderraouf Adjal Signed-off-by: Damien Le Moal --- include/linux/ata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index 199e47e97d64..21292b5bbb55 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -324,12 +324,12 @@ enum { ATA_LOG_NCQ_NON_DATA = 0x12, ATA_LOG_NCQ_SEND_RECV = 0x13, ATA_LOG_IDENTIFY_DEVICE = 0x30, + ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device log pages: */ ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, - ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device SATA settings log:*/ ATA_LOG_DEVSLP_OFFSET = 0x30, -- cgit v1.2.3 From cb1f65c1e1424a4b5e4a86da8aa3b8fd8459c8ec Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Feb 2022 18:35:22 +0100 Subject: PM: s2idle: ACPI: Fix wakeup interrupts handling After commit e3728b50cd9b ("ACPI: PM: s2idle: Avoid possible race related to the EC GPE") wakeup interrupts occurring immediately after the one discarded by acpi_s2idle_wake() may be missed. Moreover, if the SCI triggers again immediately after the rearming in acpi_s2idle_wake(), that wakeup may be missed too. The problem is that pm_system_irq_wakeup() only calls pm_system_wakeup() when pm_wakeup_irq is 0, but that's not the case any more after the interrupt causing acpi_s2idle_wake() to run until pm_wakeup_irq is cleared by the pm_wakeup_clear() call in s2idle_loop(). However, there may be wakeup interrupts occurring in that time frame and if that happens, they will be missed. To address that issue first move the clearing of pm_wakeup_irq to the point at which it is known that the interrupt causing acpi_s2idle_wake() to tun will be discarded, before rearming the SCI for wakeup. Moreover, because that only reduces the size of the time window in which the issue may manifest itself, allow pm_system_irq_wakeup() to register two second wakeup interrupts in a row and, when discarding the first one, replace it with the second one. [Of course, this assumes that only one wakeup interrupt can be discarded in one go, but currently that is the case and I am not aware of any plans to change that.] Fixes: e3728b50cd9b ("ACPI: PM: s2idle: Avoid possible race related to the EC GPE") Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3e8ecdebe601..300273ff40cc 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -497,14 +497,14 @@ extern void ksys_sync_helper(void); /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; -extern unsigned int pm_wakeup_irq; extern suspend_state_t pm_suspend_target_state; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); extern void pm_system_cancel_wakeup(void); -extern void pm_wakeup_clear(bool reset); +extern void pm_wakeup_clear(unsigned int irq_number); extern void pm_system_irq_wakeup(unsigned int irq_number); +extern unsigned int pm_wakeup_irq(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); -- cgit v1.2.3 From ea4692c75e1c63926e4fb0728f5775ef0d733888 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jan 2022 01:39:41 -0800 Subject: lib/string_helpers: Consolidate string helpers implementation There are a few implementations of string helpers in the tree like yesno() that just returns "yes" or "no" depending on a boolean argument. Those are helpful to output strings to the user or log. In order to consolidate them, prefix all of them str_ prefix to make it clear what they are about and avoid symbol clashes. Taking the commoon `val ? "yes" : "no"` implementation, quite a few users of open coded yesno() could later be converted to the new function: $ git grep '?\s*"yes"\s*' | wc -l 286 $ git grep '?\s*"no"\s*' | wc -l 20 The inlined function should keep the const strings local to each compilation unit, the same way it's now, thus not changing the current behavior. Signed-off-by: Lucas De Marchi Reviewed-by: Andy Shevchenko Acked-by: Jani Nikula Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220126093951.1470898-2-lucas.demarchi@intel.com --- include/linux/string_helpers.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 7a22921c9db7..4d72258d42fd 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -106,4 +106,24 @@ void kfree_strarray(char **array, size_t n); char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n); +static inline const char *str_yes_no(bool v) +{ + return v ? "yes" : "no"; +} + +static inline const char *str_on_off(bool v) +{ + return v ? "on" : "off"; +} + +static inline const char *str_enable_disable(bool v) +{ + return v ? "enable" : "disable"; +} + +static inline const char *str_enabled_disabled(bool v) +{ + return v ? "enabled" : "disabled"; +} + #endif -- cgit v1.2.3 From 3698807094ecae945436921325f5c309d1123f11 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 24 Aug 2021 16:13:41 -0400 Subject: drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs Checkpoint-Restore in userspace (CRIU) is a powerful tool that can snapshot a running process and later restore it on same or a remote machine but expects the processes that have a device file (e.g. GPU) associated with them, provide necessary driver support to assist CRIU and its extensible plugin interface. Thus, In order to support the Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute Kernel driver, needs to provide a set of new APIs that provide necessary VRAM metadata and its contents to a userspace component (CRIU plugin) that can store it in form of image files. This introduces some new ioctls which will be used to checkpoint-Restore any KFD bound user process. KFD only allows ioctl calls from the same process that opened the KFD file descriptor. Since these ioctls are expected to be called from a KFD criu plugin which has elevated ptrace attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with the file descriptors so modify KFD to allow such calls. (API redesigned by David Yat Sin) Suggested-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 81 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..49429a6c42fc 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/************************************************************************************************** + * CRIU IOCTLs (Checkpoint Restore In Userspace) + * + * When checkpointing a process, the userspace application will perform: + * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts + * all the queues. + * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges) + * 3. UNPAUSE op to un-evict all the queues + * + * When restoring a process, the CRIU userspace application will perform: + * + * 1. RESTORE op to restore process contents + * 2. RESUME op to start the process + * + * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User + * application needs to perform an UNPAUSE operation after calling PROCESS_INFO. + */ + +enum kfd_criu_op { + KFD_CRIU_OP_PROCESS_INFO, + KFD_CRIU_OP_CHECKPOINT, + KFD_CRIU_OP_UNPAUSE, + KFD_CRIU_OP_RESTORE, + KFD_CRIU_OP_RESUME, +}; + +/** + * kfd_ioctl_criu_args - Arguments perform CRIU operation + * @devices: [in/out] User pointer to memory location for devices information. + * This is an array of type kfd_criu_device_bucket. + * @bos: [in/out] User pointer to memory location for BOs information + * This is an array of type kfd_criu_bo_bucket. + * @priv_data: [in/out] User pointer to memory location for private data + * @priv_data_size: [in/out] Size of priv_data in bytes + * @num_devices: [in/out] Number of GPUs used by process. Size of @devices array. + * @num_bos [in/out] Number of BOs used by process. Size of @bos array. + * @num_objects: [in/out] Number of objects used by process. Objects are opaque to + * user application. + * @pid: [in/out] PID of the process being checkpointed + * @op [in] Type of operation (kfd_criu_op) + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_criu_args { + __u64 devices; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 bos; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 pid; /* Used during ops: PROCESS_INFO, RESUME */ + __u32 op; +}; + +struct kfd_criu_device_bucket { + __u32 user_gpu_id; + __u32 actual_gpu_id; + __u32 drm_fd; + __u32 pad; +}; + +struct kfd_criu_bo_bucket { + __u64 addr; + __u64 size; + __u64 offset; + __u64 restored_offset; /* During restore, updated offset for BO */ + __u32 gpu_id; /* This is the user_gpu_id */ + __u32 alloc_flags; + __u32 dmabuf_fd; + __u32 pad; +}; + +/* CRIU IOCTLs - END */ +/**************************************************************************************************/ + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_CRIU_OP \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 #endif -- cgit v1.2.3 From 692996f2bef7aa1737e07554255ba0d9a73fb750 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 18 Jan 2022 01:47:58 -0500 Subject: drm/amdkfd: Bump up KFD API version for CRIU - Change KFD minor version to 7 for CRIU Proposed userspace changes: https://github.com/RadeonOpenCompute/criu Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 49429a6c42fc..e6a56c146920 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Checkpoint Restore (CRIU) API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From 7938f4218168ae9fc4bdddb15976f9ebbae41999 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 4 Feb 2022 09:05:41 -0800 Subject: dma-buf-map: Rename to iosys-map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename struct dma_buf_map to struct iosys_map and corresponding APIs. Over time dma-buf-map grew up to more functionality than the one used by dma-buf: in fact it's just a shim layer to abstract system memory, that can be accessed via regular load and store, from IO memory that needs to be acessed via arch helpers. The idea is to extend this API so it can fulfill other needs, internal to a single driver. Example: in the i915 driver it's desired to share the implementation for integrated graphics, which uses mostly system memory, with discrete graphics, which may need to access IO memory. The conversion was mostly done with the following semantic patch: @r1@ @@ - struct dma_buf_map + struct iosys_map @r2@ @@ ( - DMA_BUF_MAP_INIT_VADDR + IOSYS_MAP_INIT_VADDR | - dma_buf_map_set_vaddr + iosys_map_set_vaddr | - dma_buf_map_set_vaddr_iomem + iosys_map_set_vaddr_iomem | - dma_buf_map_is_equal + iosys_map_is_equal | - dma_buf_map_is_null + iosys_map_is_null | - dma_buf_map_is_set + iosys_map_is_set | - dma_buf_map_clear + iosys_map_clear | - dma_buf_map_memcpy_to + iosys_map_memcpy_to | - dma_buf_map_incr + iosys_map_incr ) @@ @@ - #include + #include Then some files had their includes adjusted and some comments were update to remove mentions to dma-buf-map. Since this is not specific to dma-buf anymore, move the documentation to the "Bus-Independent Device Accesses" section. v2: - Squash patches v3: - Fix wrong removal of dma-buf.h from MAINTAINERS - Move documentation from dma-buf.rst to device-io.rst v4: - Change documentation title and level Signed-off-by: Lucas De Marchi Acked-by: Christian König Acked-by: Sumit Semwal Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220204170541.829227-1-lucas.demarchi@intel.com --- include/drm/drm_cache.h | 6 +- include/drm/drm_client.h | 7 +- include/drm/drm_gem.h | 6 +- include/drm/drm_gem_atomic_helper.h | 6 +- include/drm/drm_gem_cma_helper.h | 6 +- include/drm/drm_gem_framebuffer_helper.h | 8 +- include/drm/drm_gem_shmem_helper.h | 12 +- include/drm/drm_gem_ttm_helper.h | 6 +- include/drm/drm_gem_vram_helper.h | 9 +- include/drm/drm_prime.h | 6 +- include/drm/ttm/ttm_bo_api.h | 10 +- include/drm/ttm/ttm_kmap_iter.h | 10 +- include/drm/ttm/ttm_resource.h | 6 +- include/linux/dma-buf-map.h | 266 ------------------------------- include/linux/dma-buf.h | 12 +- include/linux/iosys-map.h | 257 +++++++++++++++++++++++++++++ 16 files changed, 316 insertions(+), 317 deletions(-) delete mode 100644 include/linux/dma-buf-map.h create mode 100644 include/linux/iosys-map.h (limited to 'include') diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index cc9de1632dd3..22deb216b59c 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -35,7 +35,7 @@ #include -struct dma_buf_map; +struct iosys_map; void drm_clflush_pages(struct page *pages[], unsigned long num_pages); void drm_clflush_sg(struct sg_table *st); @@ -74,7 +74,7 @@ static inline bool drm_arch_can_wc_memory(void) void drm_memcpy_init_early(void); -void drm_memcpy_from_wc(struct dma_buf_map *dst, - const struct dma_buf_map *src, +void drm_memcpy_from_wc(struct iosys_map *dst, + const struct iosys_map *src, unsigned long len); #endif diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h index f07f2fb02e75..4fc8018eddda 100644 --- a/include/drm/drm_client.h +++ b/include/drm/drm_client.h @@ -3,7 +3,7 @@ #ifndef _DRM_CLIENT_H_ #define _DRM_CLIENT_H_ -#include +#include #include #include #include @@ -144,7 +144,7 @@ struct drm_client_buffer { /** * @map: Virtual address for the buffer */ - struct dma_buf_map map; + struct iosys_map map; /** * @fb: DRM framebuffer @@ -156,7 +156,8 @@ struct drm_client_buffer * drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format); void drm_client_framebuffer_delete(struct drm_client_buffer *buffer); int drm_client_framebuffer_flush(struct drm_client_buffer *buffer, struct drm_rect *rect); -int drm_client_buffer_vmap(struct drm_client_buffer *buffer, struct dma_buf_map *map); +int drm_client_buffer_vmap(struct drm_client_buffer *buffer, + struct iosys_map *map); void drm_client_buffer_vunmap(struct drm_client_buffer *buffer); int drm_client_modeset_create(struct drm_client_dev *client); diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 35e7f44c2a75..e2941cee14b6 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -39,7 +39,7 @@ #include -struct dma_buf_map; +struct iosys_map; struct drm_gem_object; /** @@ -139,7 +139,7 @@ struct drm_gem_object_funcs { * * This callback is optional. */ - int (*vmap)(struct drm_gem_object *obj, struct dma_buf_map *map); + int (*vmap)(struct drm_gem_object *obj, struct iosys_map *map); /** * @vunmap: @@ -149,7 +149,7 @@ struct drm_gem_object_funcs { * * This callback is optional. */ - void (*vunmap)(struct drm_gem_object *obj, struct dma_buf_map *map); + void (*vunmap)(struct drm_gem_object *obj, struct iosys_map *map); /** * @mmap: diff --git a/include/drm/drm_gem_atomic_helper.h b/include/drm/drm_gem_atomic_helper.h index 0b1e2dd2ac3f..6e3319e9001a 100644 --- a/include/drm/drm_gem_atomic_helper.h +++ b/include/drm/drm_gem_atomic_helper.h @@ -3,7 +3,7 @@ #ifndef __DRM_GEM_ATOMIC_HELPER_H__ #define __DRM_GEM_ATOMIC_HELPER_H__ -#include +#include #include #include @@ -59,7 +59,7 @@ struct drm_shadow_plane_state { * The memory mappings stored in map should be established in the plane's * prepare_fb callback and removed in the cleanup_fb callback. */ - struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; + struct iosys_map map[DRM_FORMAT_MAX_PLANES]; /** * @data: Address of each framebuffer BO's data @@ -67,7 +67,7 @@ struct drm_shadow_plane_state { * The address of the data stored in each mapping. This is different * for framebuffers with non-zero offset fields. */ - struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]; + struct iosys_map data[DRM_FORMAT_MAX_PLANES]; }; /** diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h index adb507a9dbf0..fbda4ce5d5fb 100644 --- a/include/drm/drm_gem_cma_helper.h +++ b/include/drm/drm_gem_cma_helper.h @@ -38,7 +38,8 @@ void drm_gem_cma_free(struct drm_gem_cma_object *cma_obj); void drm_gem_cma_print_info(const struct drm_gem_cma_object *cma_obj, struct drm_printer *p, unsigned int indent); struct sg_table *drm_gem_cma_get_sg_table(struct drm_gem_cma_object *cma_obj); -int drm_gem_cma_vmap(struct drm_gem_cma_object *cma_obj, struct dma_buf_map *map); +int drm_gem_cma_vmap(struct drm_gem_cma_object *cma_obj, + struct iosys_map *map); int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct *vma); extern const struct vm_operations_struct drm_gem_cma_vm_ops; @@ -106,7 +107,8 @@ static inline struct sg_table *drm_gem_cma_object_get_sg_table(struct drm_gem_ob * Returns: * 0 on success or a negative error code on failure. */ -static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) +static inline int drm_gem_cma_object_vmap(struct drm_gem_object *obj, + struct iosys_map *map) { struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index 905727719ead..1091e4fa08cb 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -2,7 +2,7 @@ #define __DRM_GEM_FB_HELPER_H__ #include -#include +#include #include @@ -40,10 +40,10 @@ drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); int drm_gem_fb_vmap(struct drm_framebuffer *fb, - struct dma_buf_map map[static DRM_FORMAT_MAX_PLANES], - struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]); + struct iosys_map map[static DRM_FORMAT_MAX_PLANES], + struct iosys_map data[DRM_FORMAT_MAX_PLANES]); void drm_gem_fb_vunmap(struct drm_framebuffer *fb, - struct dma_buf_map map[static DRM_FORMAT_MAX_PLANES]); + struct iosys_map map[static DRM_FORMAT_MAX_PLANES]); int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir); void drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir); diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 311d66c9cf4b..68347b63fc71 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -113,8 +113,10 @@ int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem); -int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); -void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); +int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, + struct iosys_map *map); +void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, + struct iosys_map *map); int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); @@ -226,7 +228,8 @@ static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_ * Returns: * 0 on success or a negative error code on failure. */ -static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) +static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, + struct iosys_map *map) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); @@ -241,7 +244,8 @@ static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct d * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should * use it as their &drm_gem_object_funcs.vunmap handler. */ -static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) +static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, + struct iosys_map *map) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); diff --git a/include/drm/drm_gem_ttm_helper.h b/include/drm/drm_gem_ttm_helper.h index 78040f6cc6f3..4c003b4f173e 100644 --- a/include/drm/drm_gem_ttm_helper.h +++ b/include/drm/drm_gem_ttm_helper.h @@ -10,7 +10,7 @@ #include #include -struct dma_buf_map; +struct iosys_map; #define drm_gem_ttm_of_gem(gem_obj) \ container_of(gem_obj, struct ttm_buffer_object, base) @@ -18,9 +18,9 @@ struct dma_buf_map; void drm_gem_ttm_print_info(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *gem); int drm_gem_ttm_vmap(struct drm_gem_object *gem, - struct dma_buf_map *map); + struct iosys_map *map); void drm_gem_ttm_vunmap(struct drm_gem_object *gem, - struct dma_buf_map *map); + struct iosys_map *map); int drm_gem_ttm_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma); diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h index b4ce27a72773..c083a1d71cf4 100644 --- a/include/drm/drm_gem_vram_helper.h +++ b/include/drm/drm_gem_vram_helper.h @@ -12,7 +12,7 @@ #include #include -#include +#include struct drm_mode_create_dumb; struct drm_plane; @@ -51,7 +51,7 @@ struct vm_area_struct; */ struct drm_gem_vram_object { struct ttm_buffer_object bo; - struct dma_buf_map map; + struct iosys_map map; /** * @vmap_use_count: @@ -97,8 +97,9 @@ void drm_gem_vram_put(struct drm_gem_vram_object *gbo); s64 drm_gem_vram_offset(struct drm_gem_vram_object *gbo); int drm_gem_vram_pin(struct drm_gem_vram_object *gbo, unsigned long pl_flag); int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo); -int drm_gem_vram_vmap(struct drm_gem_vram_object *gbo, struct dma_buf_map *map); -void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo, struct dma_buf_map *map); +int drm_gem_vram_vmap(struct drm_gem_vram_object *gbo, struct iosys_map *map); +void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo, + struct iosys_map *map); int drm_gem_vram_fill_create_dumb(struct drm_file *file, struct drm_device *dev, diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h index 54f2c58305d2..2a1d01e5b56b 100644 --- a/include/drm/drm_prime.h +++ b/include/drm/drm_prime.h @@ -54,7 +54,7 @@ struct device; struct dma_buf_export_info; struct dma_buf; struct dma_buf_attachment; -struct dma_buf_map; +struct iosys_map; enum dma_data_direction; @@ -83,8 +83,8 @@ struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir); -int drm_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map); -void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, struct dma_buf_map *map); +int drm_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct iosys_map *map); +void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map); int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma); diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index c17b2df9178b..155b19ee12fb 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -47,7 +47,7 @@ struct ttm_global; struct ttm_device; -struct dma_buf_map; +struct iosys_map; struct drm_mm_node; @@ -481,17 +481,17 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map); * ttm_bo_vmap * * @bo: The buffer object. - * @map: pointer to a struct dma_buf_map representing the map. + * @map: pointer to a struct iosys_map representing the map. * * Sets up a kernel virtual mapping, using ioremap or vmap to the * data in the buffer object. The parameter @map returns the virtual - * address as struct dma_buf_map. Unmap the buffer with ttm_bo_vunmap(). + * address as struct iosys_map. Unmap the buffer with ttm_bo_vunmap(). * * Returns * -ENOMEM: Out of memory. * -EINVAL: Invalid range. */ -int ttm_bo_vmap(struct ttm_buffer_object *bo, struct dma_buf_map *map); +int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map); /** * ttm_bo_vunmap @@ -501,7 +501,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct dma_buf_map *map); * * Unmaps a kernel map set up by ttm_bo_vmap(). */ -void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct dma_buf_map *map); +void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map); /** * ttm_bo_mmap_obj - mmap memory backed by a ttm buffer object. diff --git a/include/drm/ttm/ttm_kmap_iter.h b/include/drm/ttm/ttm_kmap_iter.h index 8bb00fd39d6c..cc5c09a211b4 100644 --- a/include/drm/ttm/ttm_kmap_iter.h +++ b/include/drm/ttm/ttm_kmap_iter.h @@ -8,7 +8,7 @@ #include struct ttm_kmap_iter; -struct dma_buf_map; +struct iosys_map; /** * struct ttm_kmap_iter_ops - Ops structure for a struct @@ -24,22 +24,22 @@ struct ttm_kmap_iter_ops { * kmap_local semantics. * @res_iter: Pointer to the struct ttm_kmap_iter representing * the resource. - * @dmap: The struct dma_buf_map holding the virtual address after + * @dmap: The struct iosys_map holding the virtual address after * the operation. * @i: The location within the resource to map. PAGE_SIZE granularity. */ void (*map_local)(struct ttm_kmap_iter *res_iter, - struct dma_buf_map *dmap, pgoff_t i); + struct iosys_map *dmap, pgoff_t i); /** * unmap_local() - Unmap a PAGE_SIZE part of the resource previously * mapped using kmap_local. * @res_iter: Pointer to the struct ttm_kmap_iter representing * the resource. - * @dmap: The struct dma_buf_map holding the virtual address after + * @dmap: The struct iosys_map holding the virtual address after * the operation. */ void (*unmap_local)(struct ttm_kmap_iter *res_iter, - struct dma_buf_map *dmap); + struct iosys_map *dmap); bool maps_tt; }; diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 5952051091cd..6eae09e382d2 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include #include @@ -41,7 +41,7 @@ struct ttm_resource; struct ttm_place; struct ttm_buffer_object; struct ttm_placement; -struct dma_buf_map; +struct iosys_map; struct io_mapping; struct sg_table; struct scatterlist; @@ -207,7 +207,7 @@ struct ttm_kmap_iter_iomap { */ struct ttm_kmap_iter_linear_io { struct ttm_kmap_iter base; - struct dma_buf_map dmap; + struct iosys_map dmap; bool needs_unmap; }; diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h deleted file mode 100644 index 278d489e4bdd..000000000000 --- a/include/linux/dma-buf-map.h +++ /dev/null @@ -1,266 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Pointer to dma-buf-mapped memory, plus helpers. - */ - -#ifndef __DMA_BUF_MAP_H__ -#define __DMA_BUF_MAP_H__ - -#include -#include - -/** - * DOC: overview - * - * Calling dma-buf's vmap operation returns a pointer to the buffer's memory. - * Depending on the location of the buffer, users may have to access it with - * I/O operations or memory load/store operations. For example, copying to - * system memory could be done with memcpy(), copying to I/O memory would be - * done with memcpy_toio(). - * - * .. code-block:: c - * - * void *vaddr = ...; // pointer to system memory - * memcpy(vaddr, src, len); - * - * void *vaddr_iomem = ...; // pointer to I/O memory - * memcpy_toio(vaddr, _iomem, src, len); - * - * When using dma-buf's vmap operation, the returned pointer is encoded as - * :c:type:`struct dma_buf_map `. - * :c:type:`struct dma_buf_map ` stores the buffer's address in - * system or I/O memory and a flag that signals the required method of - * accessing the buffer. Use the returned instance and the helper functions - * to access the buffer's memory in the correct way. - * - * The type :c:type:`struct dma_buf_map ` and its helpers are - * actually independent from the dma-buf infrastructure. When sharing buffers - * among devices, drivers have to know the location of the memory to access - * the buffers in a safe way. :c:type:`struct dma_buf_map ` - * solves this problem for dma-buf and its users. If other drivers or - * sub-systems require similar functionality, the type could be generalized - * and moved to a more prominent header file. - * - * Open-coding access to :c:type:`struct dma_buf_map ` is - * considered bad style. Rather then accessing its fields directly, use one - * of the provided helper functions, or implement your own. For example, - * instances of :c:type:`struct dma_buf_map ` can be initialized - * statically with DMA_BUF_MAP_INIT_VADDR(), or at runtime with - * dma_buf_map_set_vaddr(). These helpers will set an address in system memory. - * - * .. code-block:: c - * - * struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf); - * - * dma_buf_map_set_vaddr(&map. 0xdeadbeaf); - * - * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem(). - * - * .. code-block:: c - * - * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf); - * - * Instances of struct dma_buf_map do not have to be cleaned up, but - * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings - * always refer to system memory. - * - * .. code-block:: c - * - * dma_buf_map_clear(&map); - * - * Test if a mapping is valid with either dma_buf_map_is_set() or - * dma_buf_map_is_null(). - * - * .. code-block:: c - * - * if (dma_buf_map_is_set(&map) != dma_buf_map_is_null(&map)) - * // always true - * - * Instances of :c:type:`struct dma_buf_map ` can be compared - * for equality with dma_buf_map_is_equal(). Mappings the point to different - * memory spaces, system or I/O, are never equal. That's even true if both - * spaces are located in the same address space, both mappings contain the - * same address value, or both mappings refer to NULL. - * - * .. code-block:: c - * - * struct dma_buf_map sys_map; // refers to system memory - * struct dma_buf_map io_map; // refers to I/O memory - * - * if (dma_buf_map_is_equal(&sys_map, &io_map)) - * // always false - * - * A set up instance of struct dma_buf_map can be used to access or manipulate - * the buffer memory. Depending on the location of the memory, the provided - * helpers will pick the correct operations. Data can be copied into the memory - * with dma_buf_map_memcpy_to(). The address can be manipulated with - * dma_buf_map_incr(). - * - * .. code-block:: c - * - * const void *src = ...; // source buffer - * size_t len = ...; // length of src - * - * dma_buf_map_memcpy_to(&map, src, len); - * dma_buf_map_incr(&map, len); // go to first byte after the memcpy - */ - -/** - * struct dma_buf_map - Pointer to vmap'ed dma-buf memory. - * @vaddr_iomem: The buffer's address if in I/O memory - * @vaddr: The buffer's address if in system memory - * @is_iomem: True if the dma-buf memory is located in I/O - * memory, or false otherwise. - */ -struct dma_buf_map { - union { - void __iomem *vaddr_iomem; - void *vaddr; - }; - bool is_iomem; -}; - -/** - * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory - * @vaddr_: A system-memory address - */ -#define DMA_BUF_MAP_INIT_VADDR(vaddr_) \ - { \ - .vaddr = (vaddr_), \ - .is_iomem = false, \ - } - -/** - * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory - * @map: The dma-buf mapping structure - * @vaddr: A system-memory address - * - * Sets the address and clears the I/O-memory flag. - */ -static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) -{ - map->vaddr = vaddr; - map->is_iomem = false; -} - -/** - * dma_buf_map_set_vaddr_iomem - Sets a dma-buf mapping structure to an address in I/O memory - * @map: The dma-buf mapping structure - * @vaddr_iomem: An I/O-memory address - * - * Sets the address and the I/O-memory flag. - */ -static inline void dma_buf_map_set_vaddr_iomem(struct dma_buf_map *map, - void __iomem *vaddr_iomem) -{ - map->vaddr_iomem = vaddr_iomem; - map->is_iomem = true; -} - -/** - * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality - * @lhs: The dma-buf mapping structure - * @rhs: A dma-buf mapping structure to compare with - * - * Two dma-buf mapping structures are equal if they both refer to the same type of memory - * and to the same address within that memory. - * - * Returns: - * True is both structures are equal, or false otherwise. - */ -static inline bool dma_buf_map_is_equal(const struct dma_buf_map *lhs, - const struct dma_buf_map *rhs) -{ - if (lhs->is_iomem != rhs->is_iomem) - return false; - else if (lhs->is_iomem) - return lhs->vaddr_iomem == rhs->vaddr_iomem; - else - return lhs->vaddr == rhs->vaddr; -} - -/** - * dma_buf_map_is_null - Tests for a dma-buf mapping to be NULL - * @map: The dma-buf mapping structure - * - * Depending on the state of struct dma_buf_map.is_iomem, tests if the - * mapping is NULL. - * - * Returns: - * True if the mapping is NULL, or false otherwise. - */ -static inline bool dma_buf_map_is_null(const struct dma_buf_map *map) -{ - if (map->is_iomem) - return !map->vaddr_iomem; - return !map->vaddr; -} - -/** - * dma_buf_map_is_set - Tests is the dma-buf mapping has been set - * @map: The dma-buf mapping structure - * - * Depending on the state of struct dma_buf_map.is_iomem, tests if the - * mapping has been set. - * - * Returns: - * True if the mapping is been set, or false otherwise. - */ -static inline bool dma_buf_map_is_set(const struct dma_buf_map *map) -{ - return !dma_buf_map_is_null(map); -} - -/** - * dma_buf_map_clear - Clears a dma-buf mapping structure - * @map: The dma-buf mapping structure - * - * Clears all fields to zero; including struct dma_buf_map.is_iomem. So - * mapping structures that were set to point to I/O memory are reset for - * system memory. Pointers are cleared to NULL. This is the default. - */ -static inline void dma_buf_map_clear(struct dma_buf_map *map) -{ - if (map->is_iomem) { - map->vaddr_iomem = NULL; - map->is_iomem = false; - } else { - map->vaddr = NULL; - } -} - -/** - * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping - * @dst: The dma-buf mapping structure - * @src: The source buffer - * @len: The number of byte in src - * - * Copies data into a dma-buf mapping. The source buffer is in system - * memory. Depending on the buffer's location, the helper picks the correct - * method of accessing the memory. - */ -static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len) -{ - if (dst->is_iomem) - memcpy_toio(dst->vaddr_iomem, src, len); - else - memcpy(dst->vaddr, src, len); -} - -/** - * dma_buf_map_incr - Increments the address stored in a dma-buf mapping - * @map: The dma-buf mapping structure - * @incr: The number of bytes to increment - * - * Increments the address stored in a dma-buf mapping. Depending on the - * buffer's location, the correct value will be updated. - */ -static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr) -{ - if (map->is_iomem) - map->vaddr_iomem += incr; - else - map->vaddr += incr; -} - -#endif /* __DMA_BUF_MAP_H__ */ diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 7ab50076e7a6..2097760e8e95 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -13,7 +13,7 @@ #ifndef __DMA_BUF_H__ #define __DMA_BUF_H__ -#include +#include #include #include #include @@ -283,8 +283,8 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); - int (*vmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); - void (*vunmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); + int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); + void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); }; /** @@ -347,7 +347,7 @@ struct dma_buf { * @vmap_ptr: * The current vmap ptr if @vmapping_counter > 0. Protected by @lock. */ - struct dma_buf_map vmap_ptr; + struct iosys_map vmap_ptr; /** * @exp_name: @@ -628,6 +628,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dma_buf, int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); -int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); -void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); +int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); +void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); #endif /* __DMA_BUF_H__ */ diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h new file mode 100644 index 000000000000..f4186f91caa6 --- /dev/null +++ b/include/linux/iosys-map.h @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Pointer abstraction for IO/system memory + */ + +#ifndef __IOSYS_MAP_H__ +#define __IOSYS_MAP_H__ + +#include +#include + +/** + * DOC: overview + * + * When accessing a memory region, depending on its location, users may have to + * access it with I/O operations or memory load/store operations. For example, + * copying to system memory could be done with memcpy(), copying to I/O memory + * would be done with memcpy_toio(). + * + * .. code-block:: c + * + * void *vaddr = ...; // pointer to system memory + * memcpy(vaddr, src, len); + * + * void *vaddr_iomem = ...; // pointer to I/O memory + * memcpy_toio(vaddr, _iomem, src, len); + * + * The user of such pointer may not have information about the mapping of that + * region or may want to have a single code path to handle operations on that + * buffer, regardless if it's located in system or IO memory. The type + * :c:type:`struct iosys_map ` and its helpers abstract that so the + * buffer can be passed around to other drivers or have separate duties inside + * the same driver for allocation, read and write operations. + * + * Open-coding access to :c:type:`struct iosys_map ` is considered + * bad style. Rather then accessing its fields directly, use one of the provided + * helper functions, or implement your own. For example, instances of + * :c:type:`struct iosys_map ` can be initialized statically with + * IOSYS_MAP_INIT_VADDR(), or at runtime with iosys_map_set_vaddr(). These + * helpers will set an address in system memory. + * + * .. code-block:: c + * + * struct iosys_map map = IOSYS_MAP_INIT_VADDR(0xdeadbeaf); + * + * iosys_map_set_vaddr(&map, 0xdeadbeaf); + * + * To set an address in I/O memory, use iosys_map_set_vaddr_iomem(). + * + * .. code-block:: c + * + * iosys_map_set_vaddr_iomem(&map, 0xdeadbeaf); + * + * Instances of struct iosys_map do not have to be cleaned up, but + * can be cleared to NULL with iosys_map_clear(). Cleared mappings + * always refer to system memory. + * + * .. code-block:: c + * + * iosys_map_clear(&map); + * + * Test if a mapping is valid with either iosys_map_is_set() or + * iosys_map_is_null(). + * + * .. code-block:: c + * + * if (iosys_map_is_set(&map) != iosys_map_is_null(&map)) + * // always true + * + * Instances of :c:type:`struct iosys_map ` can be compared for + * equality with iosys_map_is_equal(). Mappings that point to different memory + * spaces, system or I/O, are never equal. That's even true if both spaces are + * located in the same address space, both mappings contain the same address + * value, or both mappings refer to NULL. + * + * .. code-block:: c + * + * struct iosys_map sys_map; // refers to system memory + * struct iosys_map io_map; // refers to I/O memory + * + * if (iosys_map_is_equal(&sys_map, &io_map)) + * // always false + * + * A set up instance of struct iosys_map can be used to access or manipulate the + * buffer memory. Depending on the location of the memory, the provided helpers + * will pick the correct operations. Data can be copied into the memory with + * iosys_map_memcpy_to(). The address can be manipulated with iosys_map_incr(). + * + * .. code-block:: c + * + * const void *src = ...; // source buffer + * size_t len = ...; // length of src + * + * iosys_map_memcpy_to(&map, src, len); + * iosys_map_incr(&map, len); // go to first byte after the memcpy + */ + +/** + * struct iosys_map - Pointer to IO/system memory + * @vaddr_iomem: The buffer's address if in I/O memory + * @vaddr: The buffer's address if in system memory + * @is_iomem: True if the buffer is located in I/O memory, or false + * otherwise. + */ +struct iosys_map { + union { + void __iomem *vaddr_iomem; + void *vaddr; + }; + bool is_iomem; +}; + +/** + * IOSYS_MAP_INIT_VADDR - Initializes struct iosys_map to an address in system memory + * @vaddr_: A system-memory address + */ +#define IOSYS_MAP_INIT_VADDR(vaddr_) \ + { \ + .vaddr = (vaddr_), \ + .is_iomem = false, \ + } + +/** + * iosys_map_set_vaddr - Sets a iosys mapping structure to an address in system memory + * @map: The iosys_map structure + * @vaddr: A system-memory address + * + * Sets the address and clears the I/O-memory flag. + */ +static inline void iosys_map_set_vaddr(struct iosys_map *map, void *vaddr) +{ + map->vaddr = vaddr; + map->is_iomem = false; +} + +/** + * iosys_map_set_vaddr_iomem - Sets a iosys mapping structure to an address in I/O memory + * @map: The iosys_map structure + * @vaddr_iomem: An I/O-memory address + * + * Sets the address and the I/O-memory flag. + */ +static inline void iosys_map_set_vaddr_iomem(struct iosys_map *map, + void __iomem *vaddr_iomem) +{ + map->vaddr_iomem = vaddr_iomem; + map->is_iomem = true; +} + +/** + * iosys_map_is_equal - Compares two iosys mapping structures for equality + * @lhs: The iosys_map structure + * @rhs: A iosys_map structure to compare with + * + * Two iosys mapping structures are equal if they both refer to the same type of memory + * and to the same address within that memory. + * + * Returns: + * True is both structures are equal, or false otherwise. + */ +static inline bool iosys_map_is_equal(const struct iosys_map *lhs, + const struct iosys_map *rhs) +{ + if (lhs->is_iomem != rhs->is_iomem) + return false; + else if (lhs->is_iomem) + return lhs->vaddr_iomem == rhs->vaddr_iomem; + else + return lhs->vaddr == rhs->vaddr; +} + +/** + * iosys_map_is_null - Tests for a iosys mapping to be NULL + * @map: The iosys_map structure + * + * Depending on the state of struct iosys_map.is_iomem, tests if the + * mapping is NULL. + * + * Returns: + * True if the mapping is NULL, or false otherwise. + */ +static inline bool iosys_map_is_null(const struct iosys_map *map) +{ + if (map->is_iomem) + return !map->vaddr_iomem; + return !map->vaddr; +} + +/** + * iosys_map_is_set - Tests if the iosys mapping has been set + * @map: The iosys_map structure + * + * Depending on the state of struct iosys_map.is_iomem, tests if the + * mapping has been set. + * + * Returns: + * True if the mapping is been set, or false otherwise. + */ +static inline bool iosys_map_is_set(const struct iosys_map *map) +{ + return !iosys_map_is_null(map); +} + +/** + * iosys_map_clear - Clears a iosys mapping structure + * @map: The iosys_map structure + * + * Clears all fields to zero, including struct iosys_map.is_iomem, so + * mapping structures that were set to point to I/O memory are reset for + * system memory. Pointers are cleared to NULL. This is the default. + */ +static inline void iosys_map_clear(struct iosys_map *map) +{ + if (map->is_iomem) { + map->vaddr_iomem = NULL; + map->is_iomem = false; + } else { + map->vaddr = NULL; + } +} + +/** + * iosys_map_memcpy_to - Memcpy into iosys mapping + * @dst: The iosys_map structure + * @src: The source buffer + * @len: The number of byte in src + * + * Copies data into a iosys mapping. The source buffer is in system + * memory. Depending on the buffer's location, the helper picks the correct + * method of accessing the memory. + */ +static inline void iosys_map_memcpy_to(struct iosys_map *dst, const void *src, + size_t len) +{ + if (dst->is_iomem) + memcpy_toio(dst->vaddr_iomem, src, len); + else + memcpy(dst->vaddr, src, len); +} + +/** + * iosys_map_incr - Increments the address stored in a iosys mapping + * @map: The iosys_map structure + * @incr: The number of bytes to increment + * + * Increments the address stored in a iosys mapping. Depending on the + * buffer's location, the correct value will be updated. + */ +static inline void iosys_map_incr(struct iosys_map *map, size_t incr) +{ + if (map->is_iomem) + map->vaddr_iomem += incr; + else + map->vaddr += incr; +} + +#endif /* __IOSYS_MAP_H__ */ -- cgit v1.2.3 From 976b6d97c62347df3e686f60a5f455bb8ed6ea23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 19 Jan 2022 11:17:32 +0100 Subject: dma-buf: consolidate dma_fence subclass checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate the wrapper functions to check for dma_fence subclasses in the dma_fence header. This makes it easier to document and also check the different requirements for fence containers in the subclasses. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220204100429.2049-2-christian.koenig@amd.com --- include/linux/dma-fence-array.h | 15 +-------------- include/linux/dma-fence-chain.h | 3 +-- include/linux/dma-fence.h | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 303dd712220f..fec374f69e12 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -45,19 +45,6 @@ struct dma_fence_array { struct irq_work work; }; -extern const struct dma_fence_ops dma_fence_array_ops; - -/** - * dma_fence_is_array - check if a fence is from the array subsclass - * @fence: fence to test - * - * Return true if it is a dma_fence_array and false otherwise. - */ -static inline bool dma_fence_is_array(struct dma_fence *fence) -{ - return fence->ops == &dma_fence_array_ops; -} - /** * to_dma_fence_array - cast a fence to a dma_fence_array * @fence: fence to cast to a dma_fence_array @@ -68,7 +55,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence) static inline struct dma_fence_array * to_dma_fence_array(struct dma_fence *fence) { - if (fence->ops != &dma_fence_array_ops) + if (!fence || !dma_fence_is_array(fence)) return NULL; return container_of(fence, struct dma_fence_array, base); diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 54fe3443fd2c..ee906b659694 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -49,7 +49,6 @@ struct dma_fence_chain { spinlock_t lock; }; -extern const struct dma_fence_ops dma_fence_chain_ops; /** * to_dma_fence_chain - cast a fence to a dma_fence_chain @@ -61,7 +60,7 @@ extern const struct dma_fence_ops dma_fence_chain_ops; static inline struct dma_fence_chain * to_dma_fence_chain(struct dma_fence *fence) { - if (!fence || fence->ops != &dma_fence_chain_ops) + if (!fence || !dma_fence_is_chain(fence)) return NULL; return container_of(fence, struct dma_fence_chain, base); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 1ea691753bd3..775cdc0b4f24 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -587,4 +587,42 @@ struct dma_fence *dma_fence_get_stub(void); struct dma_fence *dma_fence_allocate_private_stub(void); u64 dma_fence_context_alloc(unsigned num); +extern const struct dma_fence_ops dma_fence_array_ops; +extern const struct dma_fence_ops dma_fence_chain_ops; + +/** + * dma_fence_is_array - check if a fence is from the array subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_array and false otherwise. + */ +static inline bool dma_fence_is_array(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_array_ops; +} + +/** + * dma_fence_is_chain - check if a fence is from the chain subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_chain and false otherwise. + */ +static inline bool dma_fence_is_chain(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_chain_ops; +} + +/** + * dma_fence_is_container - check if a fence is a container for other fences + * @fence: the fence to test + * + * Return true if this fence is a container for other fences, false otherwise. + * This is important since we can't build up large fence structure or otherwise + * we run into recursion during operation on those fences. + */ +static inline bool dma_fence_is_container(struct dma_fence *fence) +{ + return dma_fence_is_array(fence) || dma_fence_is_chain(fence); +} + #endif /* __LINUX_DMA_FENCE_H */ -- cgit v1.2.3 From 18f5fad275efef015226ee4f90eae34d8f44aa5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 20 Jan 2022 11:42:40 +0100 Subject: dma-buf: add dma_fence_chain_contained helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a reoccurring pattern that we need to extract the fence from a dma_fence_chain object. Add a helper for this. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220204100429.2049-6-christian.koenig@amd.com --- include/linux/dma-fence-chain.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index ee906b659694..10d51bcdf7b7 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -66,6 +66,21 @@ to_dma_fence_chain(struct dma_fence *fence) return container_of(fence, struct dma_fence_chain, base); } +/** + * dma_fence_chain_contained - return the contained fence + * @fence: the fence to test + * + * If the fence is a dma_fence_chain the function returns the fence contained + * inside the chain object, otherwise it returns the fence itself. + */ +static inline struct dma_fence * +dma_fence_chain_contained(struct dma_fence *fence) +{ + struct dma_fence_chain *chain = to_dma_fence_chain(fence); + + return chain ? chain->fence : fence; +} + /** * dma_fence_chain_alloc * -- cgit v1.2.3 From cfc56f85e72f5b9c5c5be26dc2b16518d36a7868 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 7 Feb 2022 18:13:18 +0100 Subject: net: do not keep the dst cache when uncloning an skb dst and its metadata When uncloning an skb dst and its associated metadata a new dst+metadata is allocated and the tunnel information from the old metadata is copied over there. The issue is the tunnel metadata has references to cached dst, which are copied along the way. When a dst+metadata refcount drops to 0 the metadata is freed including the cached dst entries. As they are also referenced in the initial dst+metadata, this ends up in UaFs. In practice the above did not happen because of another issue, the dst+metadata was never freed because its refcount never dropped to 0 (this will be fixed in a subsequent patch). Fix this by initializing the dst cache after copying the tunnel information from the old metadata to also unshare the dst cache. Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel") Cc: Paolo Abeni Reported-by: Vlad Buslov Tested-by: Vlad Buslov Signed-off-by: Antoine Tenart Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 14efa0ded75d..b997e0c1e362 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -123,6 +123,19 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); +#ifdef CONFIG_DST_CACHE + /* Unclone the dst cache if there is one */ + if (new_md->u.tun_info.dst_cache.cache) { + int ret; + + ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); + if (ret) { + metadata_dst_free(new_md); + return ERR_PTR(ret); + } + } +#endif + skb_dst_drop(skb); dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); -- cgit v1.2.3 From 9eeabdf17fa0ab75381045c867c370f4cc75a613 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 7 Feb 2022 18:13:19 +0100 Subject: net: fix a memleak when uncloning an skb dst and its metadata When uncloning an skb dst and its associated metadata, a new dst+metadata is allocated and later replaces the old one in the skb. This is helpful to have a non-shared dst+metadata attached to a specific skb. The issue is the uncloned dst+metadata is initialized with a refcount of 1, which is increased to 2 before attaching it to the skb. When tun_dst_unclone returns, the dst+metadata is only referenced from a single place (the skb) while its refcount is 2. Its refcount will never drop to 0 (when the skb is consumed), leading to a memory leak. Fix this by removing the call to dst_hold in tun_dst_unclone, as the dst+metadata refcount is already 1. Fixes: fc4099f17240 ("openvswitch: Fix egress tunnel info.") Cc: Pravin B Shelar Reported-by: Vlad Buslov Tested-by: Vlad Buslov Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index b997e0c1e362..adab27ba1ecb 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -137,7 +137,6 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) #endif skb_dst_drop(skb); - dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } -- cgit v1.2.3 From c306d737691ef84305d4ed0d302c63db2932f0bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jan 2022 15:57:45 -0500 Subject: NFSD: Deprecate NFS_OFFSET_MAX NFS_OFFSET_MAX was introduced way back in Linux v2.3.y before there was a kernel-wide OFFSET_MAX value. As a clean up, replace the last few uses of it with its generic equivalent, and get rid of it. Signed-off-by: Chuck Lever --- include/linux/nfs.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 0dc7ad38a0da..b06375e88e58 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -36,14 +36,6 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc memcpy(target->data, source->data, source->size); } - -/* - * This is really a general kernel constant, but since nothing like - * this is defined in the kernel headers, I have to do it here. - */ -#define NFS_OFFSET_MAX ((__s64)((~(__u64)0) >> 1)) - - enum nfs3_stable_how { NFS_UNSTABLE = 0, NFS_DATA_SYNC = 1, -- cgit v1.2.3 From 5bdd3eb253544b1e80f904e1205699d0a126d2d6 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 4 Feb 2022 11:58:32 -0500 Subject: drm/amdkfd: Remove unused old debugger implementation Cleanup the kfd code by removing the unused old debugger implementation. The address watch was only ever implemented in the upstream driver for GFXv7 (Kaveri). The user mode tools runtime using this API was never open-sourced. Work on the old debugger prototype that used this API has been discontinued years ago. Only a small piece of resetting wavefronts is kept and is moved to kfd_device_queue_manager.c. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e6a56c146920..6e4268f5e482 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -756,16 +756,16 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_WAIT_EVENTS \ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ +#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ +#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ +#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ +#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ -- cgit v1.2.3 From 0f4b58423f3500ee3e3159fbbd6c41a6e6f920d4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 3 Feb 2022 11:03:50 +0200 Subject: drm/dp: add drm_dp_128b132b_read_aux_rd_interval() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DP 2.0 errata changes DP_128B132B_TRAINING_AUX_RD_INTERVAL (DPCD 0x2216) completely. Add a new function to read that. Follow-up will need to clean up existing functions. v2: fix reversed interpretation of bit 7 meaning (Uma) Cc: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/22f6637194c9edb22b6a84be82dd385550dbb958.1643878928.git.jani.nikula@intel.com --- include/drm/drm_dp_helper.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 30359e434c3f..629ac2272e55 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -1115,6 +1115,7 @@ struct drm_panel; # define DP_UHBR13_5 (1 << 2) #define DP_128B132B_TRAINING_AUX_RD_INTERVAL 0x2216 /* 2.0 */ +# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_1MS_UNIT (1 << 7) # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK 0x7f # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_400_US 0x00 # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_4_MS 0x01 @@ -1552,6 +1553,8 @@ void drm_dp_link_train_channel_eq_delay(const struct drm_dp_aux *aux, void drm_dp_lttpr_link_train_channel_eq_delay(const struct drm_dp_aux *aux, const u8 caps[DP_LTTPR_PHY_CAP_SIZE]); +int drm_dp_128b132b_read_aux_rd_interval(struct drm_dp_aux *aux); + u8 drm_dp_link_rate_to_bw_code(int link_rate); int drm_dp_bw_code_to_link_rate(u8 link_bw); -- cgit v1.2.3 From 0192c25c03cd2feaeaadae375fe6aadff788939a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 3 Feb 2022 11:03:51 +0200 Subject: drm/dp: add 128b/132b link status helpers from DP 2.0 E11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DP 2.0 errata redefines link training. There are some new status bits, and some of the old ones need to be checked independently. Add helpers to do this. Cc: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/5a46260d1f171fed46d0ab8fe4b6499abd65ce24.1643878928.git.jani.nikula@intel.com --- include/drm/drm_dp_helper.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 629ac2272e55..c9ca38941514 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -738,11 +738,13 @@ struct drm_panel; DP_LANE_CHANNEL_EQ_DONE | \ DP_LANE_SYMBOL_LOCKED) -#define DP_LANE_ALIGN_STATUS_UPDATED 0x204 - -#define DP_INTERLANE_ALIGN_DONE (1 << 0) -#define DP_DOWNSTREAM_PORT_STATUS_CHANGED (1 << 6) -#define DP_LINK_STATUS_UPDATED (1 << 7) +#define DP_LANE_ALIGN_STATUS_UPDATED 0x204 +#define DP_INTERLANE_ALIGN_DONE (1 << 0) +#define DP_128B132B_DPRX_EQ_INTERLANE_ALIGN_DONE (1 << 2) /* 2.0 E11 */ +#define DP_128B132B_DPRX_CDS_INTERLANE_ALIGN_DONE (1 << 3) /* 2.0 E11 */ +#define DP_128B132B_LT_FAILED (1 << 4) /* 2.0 E11 */ +#define DP_DOWNSTREAM_PORT_STATUS_CHANGED (1 << 6) +#define DP_LINK_STATUS_UPDATED (1 << 7) #define DP_SINK_STATUS 0x205 # define DP_RECEIVE_PORT_0_STATUS (1 << 0) @@ -1554,6 +1556,13 @@ void drm_dp_lttpr_link_train_channel_eq_delay(const struct drm_dp_aux *aux, const u8 caps[DP_LTTPR_PHY_CAP_SIZE]); int drm_dp_128b132b_read_aux_rd_interval(struct drm_dp_aux *aux); +bool drm_dp_128b132b_lane_channel_eq_done(const u8 link_status[DP_LINK_STATUS_SIZE], + int lane_count); +bool drm_dp_128b132b_lane_symbol_locked(const u8 link_status[DP_LINK_STATUS_SIZE], + int lane_count); +bool drm_dp_128b132b_eq_interlane_align_done(const u8 link_status[DP_LINK_STATUS_SIZE]); +bool drm_dp_128b132b_cds_interlane_align_done(const u8 link_status[DP_LINK_STATUS_SIZE]); +bool drm_dp_128b132b_link_training_failed(const u8 link_status[DP_LINK_STATUS_SIZE]); u8 drm_dp_link_rate_to_bw_code(int link_rate); int drm_dp_bw_code_to_link_rate(u8 link_bw); -- cgit v1.2.3 From 427153ef63a82a4d51c6046e2457787127f4d6d7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 3 Feb 2022 11:03:52 +0200 Subject: drm/dp: add some new DPCD macros from DP 2.0 E11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some of the new additions from DP 2.0 E11. Cc: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/ec9c1b94858de36b9f4ef6c197effa4ca667afc3.1643878928.git.jani.nikula@intel.com --- include/drm/drm_dp_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index c9ca38941514..c48bf958a967 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -560,6 +560,7 @@ struct drm_panel; # define DP_TRAINING_PATTERN_DISABLE 0 # define DP_TRAINING_PATTERN_1 1 # define DP_TRAINING_PATTERN_2 2 +# define DP_TRAINING_PATTERN_2_CDS 3 /* 2.0 E11 */ # define DP_TRAINING_PATTERN_3 3 /* 1.2 */ # define DP_TRAINING_PATTERN_4 7 /* 1.4 */ # define DP_TRAINING_PATTERN_MASK 0x3 @@ -1353,6 +1354,7 @@ struct drm_panel; # define DP_PHY_REPEATER_128B132B_SUPPORTED (1 << 0) /* See DP_128B132B_SUPPORTED_LINK_RATES for values */ #define DP_PHY_REPEATER_128B132B_RATES 0xf0007 /* 2.0 */ +#define DP_PHY_REPEATER_EQ_DONE 0xf0008 /* 2.0 E11 */ enum drm_dp_phy { DP_PHY_DPRX, -- cgit v1.2.3 From 0764db9b49c932b89ee4d9e3236dff4bb07b4a66 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 11 Feb 2022 16:32:32 -0800 Subject: mm: memcg: synchronize objcg lists with a dedicated spinlock Alexander reported a circular lock dependency revealed by the mmap1 ltp test: LOCKDEP_CIRCULAR (suite: ltp, case: mtest06 (mmap1)) WARNING: possible circular locking dependency detected 5.17.0-20220113.rc0.git0.f2211f194038.300.fc35.s390x+debug #1 Not tainted ------------------------------------------------------ mmap1/202299 is trying to acquire lock: 00000001892c0188 (css_set_lock){..-.}-{2:2}, at: obj_cgroup_release+0x4a/0xe0 but task is already holding lock: 00000000ca3b3818 (&sighand->siglock){-.-.}-{2:2}, at: force_sig_info_to_task+0x38/0x180 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sighand->siglock){-.-.}-{2:2}: __lock_acquire+0x604/0xbd8 lock_acquire.part.0+0xe2/0x238 lock_acquire+0xb0/0x200 _raw_spin_lock_irqsave+0x6a/0xd8 __lock_task_sighand+0x90/0x190 cgroup_freeze_task+0x2e/0x90 cgroup_migrate_execute+0x11c/0x608 cgroup_update_dfl_csses+0x246/0x270 cgroup_subtree_control_write+0x238/0x518 kernfs_fop_write_iter+0x13e/0x1e0 new_sync_write+0x100/0x190 vfs_write+0x22c/0x2d8 ksys_write+0x6c/0xf8 __do_syscall+0x1da/0x208 system_call+0x82/0xb0 -> #0 (css_set_lock){..-.}-{2:2}: check_prev_add+0xe0/0xed8 validate_chain+0x736/0xb20 __lock_acquire+0x604/0xbd8 lock_acquire.part.0+0xe2/0x238 lock_acquire+0xb0/0x200 _raw_spin_lock_irqsave+0x6a/0xd8 obj_cgroup_release+0x4a/0xe0 percpu_ref_put_many.constprop.0+0x150/0x168 drain_obj_stock+0x94/0xe8 refill_obj_stock+0x94/0x278 obj_cgroup_charge+0x164/0x1d8 kmem_cache_alloc+0xac/0x528 __sigqueue_alloc+0x150/0x308 __send_signal+0x260/0x550 send_signal+0x7e/0x348 force_sig_info_to_task+0x104/0x180 force_sig_fault+0x48/0x58 __do_pgm_check+0x120/0x1f0 pgm_check_handler+0x11e/0x180 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sighand->siglock); lock(css_set_lock); lock(&sighand->siglock); lock(css_set_lock); *** DEADLOCK *** 2 locks held by mmap1/202299: #0: 00000000ca3b3818 (&sighand->siglock){-.-.}-{2:2}, at: force_sig_info_to_task+0x38/0x180 #1: 00000001892ad560 (rcu_read_lock){....}-{1:2}, at: percpu_ref_put_many.constprop.0+0x0/0x168 stack backtrace: CPU: 15 PID: 202299 Comm: mmap1 Not tainted 5.17.0-20220113.rc0.git0.f2211f194038.300.fc35.s390x+debug #1 Hardware name: IBM 3906 M04 704 (LPAR) Call Trace: dump_stack_lvl+0x76/0x98 check_noncircular+0x136/0x158 check_prev_add+0xe0/0xed8 validate_chain+0x736/0xb20 __lock_acquire+0x604/0xbd8 lock_acquire.part.0+0xe2/0x238 lock_acquire+0xb0/0x200 _raw_spin_lock_irqsave+0x6a/0xd8 obj_cgroup_release+0x4a/0xe0 percpu_ref_put_many.constprop.0+0x150/0x168 drain_obj_stock+0x94/0xe8 refill_obj_stock+0x94/0x278 obj_cgroup_charge+0x164/0x1d8 kmem_cache_alloc+0xac/0x528 __sigqueue_alloc+0x150/0x308 __send_signal+0x260/0x550 send_signal+0x7e/0x348 force_sig_info_to_task+0x104/0x180 force_sig_fault+0x48/0x58 __do_pgm_check+0x120/0x1f0 pgm_check_handler+0x11e/0x180 INFO: lockdep is turned off. In this example a slab allocation from __send_signal() caused a refilling and draining of a percpu objcg stock, resulted in a releasing of another non-related objcg. Objcg release path requires taking the css_set_lock, which is used to synchronize objcg lists. This can create a circular dependency with the sighandler lock, which is taken with the locked css_set_lock by the freezer code (to freeze a task). In general it seems that using css_set_lock to synchronize objcg lists makes any slab allocations and deallocation with the locked css_set_lock and any intervened locks risky. To fix the problem and make the code more robust let's stop using css_set_lock to synchronize objcg lists and use a new dedicated spinlock instead. Link: https://lkml.kernel.org/r/Yfm1IHmoGdyUR81T@carbon.dhcp.thefacebook.com Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API") Signed-off-by: Roman Gushchin Reported-by: Alexander Egorenkov Tested-by: Alexander Egorenkov Reviewed-by: Waiman Long Acked-by: Tejun Heo Reviewed-by: Shakeel Butt Reviewed-by: Jeremy Linton Tested-by: Jeremy Linton Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b72d75141e12..0abbd685703b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -219,7 +219,7 @@ struct obj_cgroup { struct mem_cgroup *memcg; atomic_t nr_charged_bytes; union { - struct list_head list; + struct list_head list; /* protected by objcg_lock */ struct rcu_head rcu; }; }; @@ -315,7 +315,8 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; struct obj_cgroup __rcu *objcg; - struct list_head objcg_list; /* list of inherited objcgs */ + /* list of inherited objcgs, protected by objcg_lock */ + struct list_head objcg_list; #endif MEMCG_PADDING(_pad2_); -- cgit v1.2.3 From 8913c61001482378d4ed8cc577b17c1ba3e847e4 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Fri, 11 Feb 2022 16:32:35 -0800 Subject: kfence: make test case compatible with run time set sample interval The parameter kfence_sample_interval can be set via boot parameter and late shell command, which is convenient for automated tests and KFENCE parameter optimization. However, KFENCE test case just uses compile-time CONFIG_KFENCE_SAMPLE_INTERVAL, which will make KFENCE test case not run as users desired. Export kfence_sample_interval, so that KFENCE test case can use run-time-set sample interval. Link: https://lkml.kernel.org/r/20220207034432.185532-1-liupeng256@huawei.com Signed-off-by: Peng Liu Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Sumit Semwal Cc: Christian Knig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfence.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 4b5e3679a72c..f49e64222628 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -17,6 +17,8 @@ #include #include +extern unsigned long kfence_sample_interval; + /* * We allocate an even number of pages, as it simplifies calculations to map * address to metadata indices; effectively, the very first page serves as an -- cgit v1.2.3 From d315bdbfebd517cf5efabf666c8099e027ef666f Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 9 Feb 2022 16:56:33 +0100 Subject: drm/gem-shmem: Set vm_ops in static initializer Initialize default vm_ops in static initialization of the GEM SHMEM funcs, instead of the mmap code. It's simply better style. GEM helpers will later set a VMA's vm_ops from the default automatically. v2: * also update the drivers that build upon GEM SHMEM Signed-off-by: Thomas Zimmermann Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patchwork.freedesktop.org/patch/msgid/20220209155634.3994-2-tzimmermann@suse.de --- include/drm/drm_gem_shmem_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 311d66c9cf4b..08e7846e8abc 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -135,6 +135,8 @@ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem) void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, struct drm_printer *p, unsigned int indent); +extern const struct vm_operations_struct drm_gem_shmem_vm_ops; + /* * GEM object functions */ -- cgit v1.2.3 From 90d4aa20c8cc76f5baecd423b5dc289b899ebc42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 14 Feb 2022 07:28:38 +0100 Subject: drm/ttm: fix resource manager size type and description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leave the man->size units as driver defined. Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220214093439.2989-1-christian.koenig@amd.com Reviewed-by: Matthew Auld --- include/drm/ttm/ttm_resource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 69eea9d6399b..555a11fb8a7f 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -278,7 +278,7 @@ void ttm_resource_set_bo(struct ttm_resource *res, void ttm_resource_manager_init(struct ttm_resource_manager *man, struct ttm_device *bdev, - unsigned long p_size); + uint64_t size); int ttm_resource_manager_evict_all(struct ttm_device *bdev, struct ttm_resource_manager *man); -- cgit v1.2.3 From 0e05fc49c358cb49e59ce8d6ecda652951335e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 12 Jul 2021 14:05:01 +0200 Subject: drm/ttm: add common accounting to the resource mgr v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes sense to have this in the common manager for debugging and accounting of how much resources are used. v2: cleanup kerneldoc a bit v3: drop the atomic, update counter under lock instead Signed-off-by: Christian König Reviewed-by: Huang Rui (v1) Reviewed-by: Matthew Auld Tested-by: Bas Nieuwenhuizen Link: https://patchwork.freedesktop.org/patch/msgid/20220214093439.2989-2-christian.koenig@amd.com --- include/drm/ttm/ttm_resource.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 555a11fb8a7f..323c14a30c6b 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -130,10 +131,15 @@ struct ttm_resource_manager { struct dma_fence *move; /* - * Protected by the global->lru_lock. + * Protected by the bdev->lru_lock. */ - struct list_head lru[TTM_MAX_BO_PRIORITY]; + + /** + * @usage: How much of the resources are used, protected by the + * bdev->lru_lock. + */ + uint64_t usage; }; /** @@ -283,6 +289,7 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man, int ttm_resource_manager_evict_all(struct ttm_device *bdev, struct ttm_resource_manager *man); +uint64_t ttm_resource_manager_usage(struct ttm_resource_manager *man); void ttm_resource_manager_debug(struct ttm_resource_manager *man, struct drm_printer *p); -- cgit v1.2.3 From 2509969a9862b522d2208e8663057fb227556687 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 4 Feb 2022 16:13:41 -0800 Subject: drm: Plumb debugfs_init through to panels We'd like panels to be able to add things to debugfs underneath the connector's directory. Let's plumb it through. A panel will be able to put things in a "panel" directory under the connector's directory. Note that debugfs is not ABI and so it's always possible that the location that the panel gets for its debugfs could change in the future. NOTE: this currently only works if you're using a modern architecture. Specifically the plumbing relies on _both_ drm_bridge_connector and drm_panel_bridge. If you're not using one or both of these things then things won't be plumbed through. As a side effect of this change, drm_bridges can also get callbacks to put stuff underneath the connector's debugfs directory. At the moment all bridges in the chain have their debugfs_init() called with the connector's root directory. Signed-off-by: Douglas Anderson Reviewed-by: Javier Martinez Canillas Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20220204161245.v2.2.Ib0bd5346135cbb0b63006b69b61d4c8af6484740@changeid --- include/drm/drm_bridge.h | 7 +++++++ include/drm/drm_connector.h | 7 +++++++ include/drm/drm_panel.h | 8 ++++++++ 3 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 061d87313fac..f27b4060faa2 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -649,6 +649,13 @@ struct drm_bridge_funcs { * the DRM_BRIDGE_OP_HPD flag in their &drm_bridge->ops. */ void (*hpd_disable)(struct drm_bridge *bridge); + + /** + * @debugfs_init: + * + * Allows bridges to create bridge-specific debugfs files. + */ + void (*debugfs_init)(struct drm_bridge *bridge, struct dentry *root); }; /** diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 5e36eb3df66f..5166186146f4 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1142,6 +1142,13 @@ struct drm_connector_funcs { * has been received from a source outside the display driver / device. */ void (*oob_hotplug_event)(struct drm_connector *connector); + + /** + * @debugfs_init: + * + * Allows connectors to create connector-specific debugfs files. + */ + void (*debugfs_init)(struct drm_connector *connector, struct dentry *root); }; /** diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index 4602f833eb51..1ba2d424a53f 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -29,6 +29,7 @@ #include struct backlight_device; +struct dentry; struct device_node; struct drm_connector; struct drm_device; @@ -125,6 +126,13 @@ struct drm_panel_funcs { */ int (*get_timings)(struct drm_panel *panel, unsigned int num_timings, struct display_timing *timings); + + /** + * @debugfs_init: + * + * Allows panels to create panels-specific debugfs files. + */ + void (*debugfs_init)(struct drm_panel *panel, struct dentry *root); }; /** -- cgit v1.2.3 From bcf8b616deb8794179e3e9c6233a53f42664afb2 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 14 Feb 2022 14:37:06 +0100 Subject: drm/format-helper: Add drm_fb_xrgb8888_to_mono_reversed() Add support to convert from XR24 to reversed monochrome for drivers that control monochromatic display panels, that only have 1 bit per pixel. The function does a line-by-line conversion doing an intermediate step first from XR24 to 8-bit grayscale and then to reversed monochrome. The drm_fb_gray8_to_mono_reversed_line() helper was based on code from drivers/gpu/drm/tiny/repaper.c driver. Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Andy Shevchenko Reviewed-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220214133710.3278506-3-javierm@redhat.com --- include/drm/drm_format_helper.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index b30ed5de0a33..0b0937c0b2f6 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -43,4 +43,8 @@ int drm_fb_blit_toio(void __iomem *dst, unsigned int dst_pitch, uint32_t dst_for const void *vmap, const struct drm_framebuffer *fb, const struct drm_rect *rect); +void drm_fb_xrgb8888_to_mono_reversed(void *dst, unsigned int dst_pitch, const void *src, + const struct drm_framebuffer *fb, + const struct drm_rect *clip); + #endif /* __LINUX_DRM_FORMAT_HELPER_H */ -- cgit v1.2.3 From 8c30e2d81bfddc5ab9f6b04db1c0f7d6ca7bdf46 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 11 Feb 2022 10:46:40 +0100 Subject: fbdev: Don't sort deferred-I/O pages by default Fbdev's deferred I/O sorts all dirty pages by default, which incurs a significant overhead. Make the sorting step optional and update the few drivers that require it. Use a FIFO list by default. Most fbdev drivers with deferred I/O build a bounding rectangle around the dirty pages or simply flush the whole screen. The only two affected DRM drivers, generic fbdev and vmwgfx, both use a bounding rectangle. In those cases, the exact order of the pages doesn't matter. The other drivers look at the page index or handle pages one-by-one. The patch sets the sort_pagelist flag for those, even though some of them would probably work correctly without sorting. Driver maintainers should update their driver accordingly. Sorting pages by memory offset for deferred I/O performs an implicit bubble-sort step on the list of dirty pages. The algorithm goes through the list of dirty pages and inserts each new page according to its index field. Even worse, list traversal always starts at the first entry. As video memory is most likely updated scanline by scanline, the algorithm traverses through the complete list for each updated page. For example, with 1024x768x32bpp each page covers exactly one scanline. Writing a single screen update from top to bottom requires updating 768 pages. With an average list length of 384 entries, a screen update creates (768 * 384 =) 294912 compare operation. Fix this by making the sorting step opt-in and update the few drivers that require it. All other drivers work with unsorted page lists. Pages are appended to the list. Therefore, in the common case of writing the framebuffer top to bottom, pages are still sorted by offset, which may have a positive effect on performance. Playing a video [1] in mplayer's benchmark mode shows the difference (i7-4790, FullHD, simpledrm, kernel with debugging). mplayer -benchmark -nosound -vo fbdev ./big_buck_bunny_720p_stereo.ogg With sorted page lists: BENCHMARKs: VC: 32.960s VO: 73.068s A: 0.000s Sys: 2.413s = 108.441s BENCHMARK%: VC: 30.3947% VO: 67.3802% A: 0.0000% Sys: 2.2251% = 100.0000% With unsorted page lists: BENCHMARKs: VC: 31.005s VO: 42.889s A: 0.000s Sys: 2.256s = 76.150s BENCHMARK%: VC: 40.7156% VO: 56.3219% A: 0.0000% Sys: 2.9625% = 100.0000% VC shows the overhead of video decoding, VO shows the overhead of the video output. Using unsorted page lists reduces the benchmark's run time by ~32s/~25%. v2: * Make sorted pagelists the special case (Sam) * Comment on drivers' use of pagelist (Sam) * Warn about the overhead in comment Signed-off-by: Thomas Zimmermann Acked-by: Sam Ravnborg Acked-by: Andy Shevchenko Link: https://download.blender.org/peach/bigbuckbunny_movies/big_buck_bunny_720p_stereo.ogg # [1] Link: https://patchwork.freedesktop.org/patch/msgid/20220211094640.21632-3-tzimmermann@suse.de --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 9a14f3f8a329..39baa9a70779 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -204,6 +204,7 @@ struct fb_pixmap { struct fb_deferred_io { /* delay between mkwrite and deferred handler */ unsigned long delay; + bool sort_pagelist; /* sort pagelist by offset */ struct mutex lock; /* mutex that protects the page list */ struct list_head pagelist; /* list of touched pages */ /* callback */ -- cgit v1.2.3 From 79aa0367385ceaf5351ea77ea1fb66136739ea9d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 15 Feb 2022 18:54:38 -0500 Subject: drm/amdkfd: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays CC: Changcheng Deng Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e4268f5e482..baec5a41de3e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -673,7 +673,7 @@ struct kfd_ioctl_svm_args { __u32 op; __u32 nattr; /* Variable length array of attributes */ - struct kfd_ioctl_svm_attribute attrs[0]; + struct kfd_ioctl_svm_attribute attrs[]; }; /** -- cgit v1.2.3 From a65dbf7cded724a5ed4a5e1a718616b048ca0c34 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 23 Dec 2021 09:35:37 +0800 Subject: drm/amdgpu/gfx10: Add GC 10.3.7 Support Needed to properly initialize GC 10.3.7. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 76b580d10a52..55fa660e35f4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1151,6 +1151,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #if defined(__cplusplus) } -- cgit v1.2.3 From 874bfdfa4735cbb1b0d6e0c6157c712a312371a1 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 10 Feb 2022 14:42:09 -0500 Subject: drm/amdgpu: add gc 10.3.6 support this patch adds gc 10.3.6 support. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 55fa660e35f4..1d65c1fbc4ec 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1151,6 +1151,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #if defined(__cplusplus) -- cgit v1.2.3 From cccd73d607fee52f35b4b030408fa5f6c21ef503 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:32 -0800 Subject: iosys-map: Add offset to iosys_map_memcpy_to() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In certain situations it's useful to be able to write to an offset of the mapping. Add a dst_offset to iosys_map_memcpy_to(). Cc: Sumit Semwal Cc: Christian König Cc: Thomas Zimmermann Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Lucas De Marchi Reviewed-by: Christian König Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-2-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index f4186f91caa6..edd730b1e899 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -220,22 +220,23 @@ static inline void iosys_map_clear(struct iosys_map *map) } /** - * iosys_map_memcpy_to - Memcpy into iosys mapping + * iosys_map_memcpy_to - Memcpy into offset of iosys_map * @dst: The iosys_map structure + * @dst_offset: The offset from which to copy * @src: The source buffer * @len: The number of byte in src * - * Copies data into a iosys mapping. The source buffer is in system - * memory. Depending on the buffer's location, the helper picks the correct - * method of accessing the memory. + * Copies data into a iosys_map with an offset. The source buffer is in + * system memory. Depending on the buffer's location, the helper picks the + * correct method of accessing the memory. */ -static inline void iosys_map_memcpy_to(struct iosys_map *dst, const void *src, - size_t len) +static inline void iosys_map_memcpy_to(struct iosys_map *dst, size_t dst_offset, + const void *src, size_t len) { if (dst->is_iomem) - memcpy_toio(dst->vaddr_iomem, src, len); + memcpy_toio(dst->vaddr_iomem + dst_offset, src, len); else - memcpy(dst->vaddr, src, len); + memcpy(dst->vaddr + dst_offset, src, len); } /** -- cgit v1.2.3 From e62f25e8b3cdd29224c27938addba817aedd4b54 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 16 Feb 2022 09:41:33 -0800 Subject: iosys-map: Add a few more helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First the simplest ones: - iosys_map_memset(): when abstracting system and I/O memory, just like the memcpy() use case, memset() also has dedicated functions to be called for using IO memory. - iosys_map_memcpy_from(): we may need to copy data from I/O memory, not only to. In certain situations it's useful to be able to read or write to an offset that is calculated by having the memory layout given by a struct declaration. Usually we are going to read/write a u8, u16, u32 or u64. As a pre-requisite for the implementation, add iosys_map_memcpy_from() to be the equivalent of iosys_map_memcpy_to(), but in the other direction. Then add 2 pairs of macros: - iosys_map_rd() / iosys_map_wr() - iosys_map_rd_field() / iosys_map_wr_field() The first pair takes the C-type and offset to read/write. The second pair uses a struct describing the layout of the mapping in order to calculate the offset and size being read/written. We could use readb, readw, readl, readq and the write* counterparts, however due to alignment issues this may not work on all architectures. If alignment needs to be checked to call the right function, it's not possible to decide at compile-time which function to call: so just leave the decision to the memcpy function that will do exactly that. Finally, in order to use the above macros with a map derived from another, add another initializer: IOSYS_MAP_INIT_OFFSET(). v2: - Rework IOSYS_MAP_INIT_OFFSET() so it doesn't rely on aliasing rules within the union - Add offset to both iosys_map_rd_field() and iosys_map_wr_field() to allow the struct itself to be at an offset from the mapping - Add documentation to iosys_map_rd_field() with example and expected memory layout v3: - Drop kernel.h include as it's not needed anymore Cc: Sumit Semwal Cc: Christian König Cc: Thomas Zimmermann Cc: Mauro Carvalho Chehab Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Lucas De Marchi Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Matt Atwood Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220216174147.3073235-3-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 201 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) (limited to 'include') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index edd730b1e899..e69a002d5aa4 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -120,6 +120,45 @@ struct iosys_map { .is_iomem = false, \ } +/** + * IOSYS_MAP_INIT_OFFSET - Initializes struct iosys_map from another iosys_map + * @map_: The dma-buf mapping structure to copy from + * @offset_: Offset to add to the other mapping + * + * Initializes a new iosys_map struct based on another passed as argument. It + * does a shallow copy of the struct so it's possible to update the back storage + * without changing where the original map points to. It is the equivalent of + * doing: + * + * .. code-block:: c + * + * iosys_map map = other_map; + * iosys_map_incr(&map, &offset); + * + * Example usage: + * + * .. code-block:: c + * + * void foo(struct device *dev, struct iosys_map *base_map) + * { + * ... + * struct iosys_map map = IOSYS_MAP_INIT_OFFSET(base_map, FIELD_OFFSET); + * ... + * } + * + * The advantage of using the initializer over just increasing the offset with + * iosys_map_incr() like above is that the new map will always point to the + * right place of the buffer during its scope. It reduces the risk of updating + * the wrong part of the buffer and having no compiler warning about that. If + * the assignment to IOSYS_MAP_INIT_OFFSET() is forgotten, the compiler can warn + * about the use of uninitialized variable. + */ +#define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({ \ + struct iosys_map copy = *map_; \ + iosys_map_incr(©, offset_); \ + copy; \ +}) + /** * iosys_map_set_vaddr - Sets a iosys mapping structure to an address in system memory * @map: The iosys_map structure @@ -239,6 +278,26 @@ static inline void iosys_map_memcpy_to(struct iosys_map *dst, size_t dst_offset, memcpy(dst->vaddr + dst_offset, src, len); } +/** + * iosys_map_memcpy_from - Memcpy from iosys_map into system memory + * @dst: Destination in system memory + * @src: The iosys_map structure + * @src_offset: The offset from which to copy + * @len: The number of byte in src + * + * Copies data from a iosys_map with an offset. The dest buffer is in + * system memory. Depending on the mapping location, the helper picks the + * correct method of accessing the memory. + */ +static inline void iosys_map_memcpy_from(void *dst, const struct iosys_map *src, + size_t src_offset, size_t len) +{ + if (src->is_iomem) + memcpy_fromio(dst, src->vaddr_iomem + src_offset, len); + else + memcpy(dst, src->vaddr + src_offset, len); +} + /** * iosys_map_incr - Increments the address stored in a iosys mapping * @map: The iosys_map structure @@ -255,4 +314,146 @@ static inline void iosys_map_incr(struct iosys_map *map, size_t incr) map->vaddr += incr; } +/** + * iosys_map_memset - Memset iosys_map + * @dst: The iosys_map structure + * @offset: Offset from dst where to start setting value + * @value: The value to set + * @len: The number of bytes to set in dst + * + * Set value in iosys_map. Depending on the buffer's location, the helper + * picks the correct method of accessing the memory. + */ +static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, + int value, size_t len) +{ + if (dst->is_iomem) + memset_io(dst->vaddr_iomem + offset, value, len); + else + memset(dst->vaddr + offset, value, len); +} + +/** + * iosys_map_rd - Read a C-type value from the iosys_map + * + * @map__: The iosys_map structure + * @offset__: The offset from which to read + * @type__: Type of the value being read + * + * Read a C type value from iosys_map, handling possible un-aligned accesses to + * the mapping. + * + * Returns: + * The value read from the mapping. + */ +#define iosys_map_rd(map__, offset__, type__) ({ \ + type__ val; \ + iosys_map_memcpy_from(&val, map__, offset__, sizeof(val)); \ + val; \ +}) + +/** + * iosys_map_wr - Write a C-type value to the iosys_map + * + * @map__: The iosys_map structure + * @offset__: The offset from the mapping to write to + * @type__: Type of the value being written + * @val__: Value to write + * + * Write a C-type value to the iosys_map, handling possible un-aligned accesses + * to the mapping. + */ +#define iosys_map_wr(map__, offset__, type__, val__) ({ \ + type__ val = (val__); \ + iosys_map_memcpy_to(map__, offset__, &val, sizeof(val)); \ +}) + +/** + * iosys_map_rd_field - Read a member from a struct in the iosys_map + * + * @map__: The iosys_map structure + * @struct_offset__: Offset from the beggining of the map, where the struct + * is located + * @struct_type__: The struct describing the layout of the mapping + * @field__: Member of the struct to read + * + * Read a value from iosys_map considering its layout is described by a C struct + * starting at @struct_offset__. The field offset and size is calculated and its + * value read handling possible un-aligned memory accesses. For example: suppose + * there is a @struct foo defined as below and the value ``foo.field2.inner2`` + * needs to be read from the iosys_map: + * + * .. code-block:: c + * + * struct foo { + * int field1; + * struct { + * int inner1; + * int inner2; + * } field2; + * int field3; + * } __packed; + * + * This is the expected memory layout of a buffer using iosys_map_rd_field(): + * + * +------------------------------+--------------------------+ + * | Address | Content | + * +==============================+==========================+ + * | buffer + 0000 | start of mmapped buffer | + * | | pointed by iosys_map | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + ``struct_offset__`` | start of ``struct foo`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + wwww | ``foo.field2.inner2`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + yyyy | end of ``struct foo`` | + * +------------------------------+--------------------------+ + * | ... | ... | + * +------------------------------+--------------------------+ + * | buffer + zzzz | end of mmaped buffer | + * +------------------------------+--------------------------+ + * + * Values automatically calculated by this macro or not needed are denoted by + * wwww, yyyy and zzzz. This is the code to read that value: + * + * .. code-block:: c + * + * x = iosys_map_rd_field(&map, offset, struct foo, field2.inner2); + * + * Returns: + * The value read from the mapping. + */ +#define iosys_map_rd_field(map__, struct_offset__, struct_type__, field__) ({ \ + struct_type__ *s; \ + iosys_map_rd(map__, struct_offset__ + offsetof(struct_type__, field__), \ + typeof(s->field__)); \ +}) + +/** + * iosys_map_wr_field - Write to a member of a struct in the iosys_map + * + * @map__: The iosys_map structure + * @struct_offset__: Offset from the beggining of the map, where the struct + * is located + * @struct_type__: The struct describing the layout of the mapping + * @field__: Member of the struct to read + * @val__: Value to write + * + * Write a value to the iosys_map considering its layout is described by a C struct + * starting at @struct_offset__. The field offset and size is calculated and the + * @val__ is written handling possible un-aligned memory accesses. Refer to + * iosys_map_rd_field() for expected usage and memory layout. + */ +#define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({ \ + struct_type__ *s; \ + iosys_map_wr(map__, struct_offset__ + offsetof(struct_type__, field__), \ + typeof(s->field__), val__); \ +}) + #endif /* __IOSYS_MAP_H__ */ -- cgit v1.2.3 From afea229fe10282da14595870b44f82792451dfb2 Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Mon, 21 Feb 2022 22:15:48 +0530 Subject: drm: improve drm_buddy_alloc function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Make drm_buddy_alloc a single function to handle range allocation and non-range allocation demands - Implemented a new function alloc_range() which allocates the requested power-of-two block comply with range limitations - Moved order computation and memory alignment logic from i915 driver to drm buddy v2: merged below changes to keep the build unbroken - drm_buddy_alloc_range() becomes obsolete and may be removed - enable ttm range allocation (fpfn / lpfn) support in i915 driver - apply enhanced drm_buddy_alloc() function to i915 driver v3(Matthew Auld): - Fix alignment issues and remove unnecessary list_empty check - add more validation checks for input arguments - make alloc_range() block allocations as bottom-up - optimize order computation logic - replace uint64_t with u64, which is preferred in the kernel v4(Matthew Auld): - keep drm_buddy_alloc_range() function implementation for generic actual range allocations - keep alloc_range() implementation for end bias allocations v5(Matthew Auld): - modify drm_buddy_alloc() passing argument place->lpfn to lpfn as place->lpfn will currently always be zero for i915 v6(Matthew Auld): - fixup potential uaf - If we are unlucky and can't allocate enough memory when splitting blocks, where we temporarily end up with the given block and its buddy on the respective free list, then we need to ensure we delete both blocks, and no just the buddy, before potentially freeing them - fix warnings reported by kernel test robot v7(Matthew Auld): - revert fixup potential uaf - keep __alloc_range() add node to the list logic same as drm_buddy_alloc_blocks() by having a temporary list variable - at drm_buddy_alloc_blocks() keep i915 range_overflows macro and add a new check for end variable v8: - fix warnings reported by kernel test robot v9(Matthew Auld): - remove DRM_BUDDY_RANGE_ALLOCATION flag - remove unnecessary function description v10: - keep DRM_BUDDY_RANGE_ALLOCATION flag as removing the flag and replacing with (end < size) logic fails amdgpu driver load Signed-off-by: Arunpravin Reviewed-by: Matthew Auld Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220221164552.2434-1-Arunpravin.PaneerSelvam@amd.com --- include/drm/drm_buddy.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index f524db152413..54f25a372f27 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -22,6 +22,8 @@ start__ >= max__ || size__ > max__ - start__; \ }) +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0) + struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) #define DRM_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) @@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size); void drm_buddy_fini(struct drm_buddy *mm); -struct drm_buddy_block * -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order); - -int drm_buddy_alloc_range(struct drm_buddy *mm, - struct list_head *blocks, - u64 start, u64 size); +int drm_buddy_alloc_blocks(struct drm_buddy *mm, + u64 start, u64 end, u64 size, + u64 min_page_size, + struct list_head *blocks, + unsigned long flags); void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); -- cgit v1.2.3 From 476e4063022787b5720758239ee4c22fa2495e82 Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Mon, 21 Feb 2022 22:15:49 +0530 Subject: drm: implement top-down allocation method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implemented a function which walk through the order list, compares the offset and returns the maximum offset block, this method is unpredictable in obtaining the high range address blocks which depends on allocation and deallocation. for instance, if driver requests address at a low specific range, allocator traverses from the root block and splits the larger blocks until it reaches the specific block and in the process of splitting, lower orders in the freelist are occupied with low range address blocks and for the subsequent TOPDOWN memory request we may return the low range blocks.To overcome this issue, we may go with the below approach. The other approach, sorting each order list entries in ascending order and compares the last entry of each order list in the freelist and return the max block. This creates sorting overhead on every drm_buddy_free() request and split up of larger blocks for a single page request. v2: - Fix alignment issues(Matthew Auld) - Remove unnecessary list_empty check(Matthew Auld) - merged the below patch to see the feature in action - add top-down alloc support to i915 driver Signed-off-by: Arunpravin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220221164552.2434-2-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- include/drm/drm_buddy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 54f25a372f27..f0378fb48d06 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -23,6 +23,7 @@ }) #define DRM_BUDDY_RANGE_ALLOCATION (1 << 0) +#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) -- cgit v1.2.3 From 95ee2a8b4b3cd1fb25f7e14e2202da4045030173 Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Mon, 21 Feb 2022 22:15:50 +0530 Subject: drm: implement a method to free unused pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On contiguous allocation, we round up the size to the *next* power of 2, implement a function to free the unused pages after the newly allocate block. v2(Matthew Auld): - replace function name 'drm_buddy_free_unused_pages' with drm_buddy_block_trim - replace input argument name 'actual_size' with 'new_size' - add more validation checks for input arguments - add overlaps check to avoid needless searching and splitting - merged the below patch to see the feature in action - add free unused pages support to i915 driver - lock drm_buddy_block_trim() function as it calls mark_free/mark_split are all globally visible v3(Matthew Auld): - remove trim method error handling as we address the failure case at drm_buddy_block_trim() function v4: - in case of trim, at __alloc_range() split_block failure path marks the block as free and removes it from the original list, potentially also freeing it, to overcome this problem, we turn the drm_buddy_block_trim() input node into a temporary node to prevent recursively freeing itself, but still retain the un-splitting/freeing of the other nodes(Matthew Auld) - modify the drm_buddy_block_trim() function return type v5(Matthew Auld): - revert drm_buddy_block_trim() function return type changes in v4 - modify drm_buddy_block_trim() passing argument n_pages to original_size as n_pages has already been rounded up to the next power-of-two and passing n_pages results noop v6: - fix warnings reported by kernel test robot v7: - modify drm_buddy_block_trim() function doc description - at drm_buddy_block_trim() handle non-allocated block as a serious programmer error - fix a typo Signed-off-by: Arunpravin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220221164552.2434-3-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- include/drm/drm_buddy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index f0378fb48d06..bd21f9dfd15e 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -140,6 +140,10 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, struct list_head *blocks, unsigned long flags); +int drm_buddy_block_trim(struct drm_buddy *mm, + u64 new_size, + struct list_head *blocks); + void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects); -- cgit v1.2.3 From 8ab62eda177bc350f34fea4fcea23603b8184bfd Mon Sep 17 00:00:00 2001 From: Jiawei Gu Date: Tue, 22 Feb 2022 17:42:52 +0800 Subject: drm/sched: Add device pointer to drm_gpu_scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add device pointer so scheduler's printing can use DRM_DEV_ERROR() instead, which makes life easier under multiple GPU scenario. v2: amend all calls of drm_sched_init() v3: fill dev pointer for all drm_sched_init() calls Signed-off-by: Jiawei Gu Reviewed-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220221095705.5290-1-Jiawei.Gu@amd.com --- include/drm/gpu_scheduler.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index bbc22fad8d80..944f83ef9f2e 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -457,13 +457,14 @@ struct drm_gpu_scheduler { atomic_t _score; bool ready; bool free_guilty; + struct device *dev; }; int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, uint32_t hw_submission, unsigned hang_limit, long timeout, struct workqueue_struct *timeout_wq, - atomic_t *score, const char *name); + atomic_t *score, const char *name, struct device *dev); void drm_sched_fini(struct drm_gpu_scheduler *sched); int drm_sched_job_init(struct drm_sched_job *job, -- cgit v1.2.3 From 92937f170d3f49f41d7acb86243ee691a98eb2be Mon Sep 17 00:00:00 2001 From: Arunpravin Date: Tue, 22 Feb 2022 23:18:41 +0530 Subject: drm/selftests: add drm buddy alloc range testcase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add a test to check the range allocation - export get_buddy() function in drm_buddy.c - export drm_prandom_u32_max_state() in lib/drm_random.c - include helper functions - include prime number header file v2: - add drm_get_buddy() function description (Matthew Auld) - removed unnecessary test succeeded print Signed-off-by: Arunpravin Reviewed-by: Matthew Auld Acked-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220222174845.2175-3-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- include/drm/drm_buddy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index bd21f9dfd15e..572077ff8ae7 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -134,6 +134,9 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size); void drm_buddy_fini(struct drm_buddy *mm); +struct drm_buddy_block * +drm_get_buddy(struct drm_buddy_block *block); + int drm_buddy_alloc_blocks(struct drm_buddy *mm, u64 start, u64 end, u64 size, u64 min_page_size, -- cgit v1.2.3