From 7283f862bd991c8657e9bf1c02db772fcf018f13 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 22 Jun 2022 16:01:33 +0200 Subject: drm: Implement DRM aperture helpers under video/ Implement DRM's aperture helpers under video/ for sharing with other sub-systems. Remove DRM-isms from the interface. The helpers track the ownership of framebuffer apertures and provide hand-over from firmware, such as EFI and VESA, to native graphics drivers. Other subsystems, such as fbdev and vfio, also have to maintain ownership of framebuffer apertures. Moving DRM's aperture helpers to a more public location allows all subsystems to interact with each other and share a common implementation. The aperture helpers are selected by the various firmware drivers within DRM and fbdev, and the VGA text-console driver. The original DRM interface is kept in place for use by DRM drivers. v3: * prefix all interfaces with aperture_ (Javier) * rework and simplify documentation (Javier) * rename struct dev_aperture to struct aperture_range * rebase onto latest DRM * update MAINTAINERS entry Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Tested-by: Laszlo Ersek Link: https://patchwork.freedesktop.org/patch/msgid/20220622140134.12763-3-tzimmermann@suse.de --- include/linux/aperture.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 include/linux/aperture.h (limited to 'include/linux') diff --git a/include/linux/aperture.h b/include/linux/aperture.h new file mode 100644 index 000000000000..442f15a57cad --- /dev/null +++ b/include/linux/aperture.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef _LINUX_APERTURE_H_ +#define _LINUX_APERTURE_H_ + +#include + +struct pci_dev; +struct platform_device; + +#if defined(CONFIG_APERTURE_HELPERS) +int devm_aperture_acquire_for_platform_device(struct platform_device *pdev, + resource_size_t base, + resource_size_t size); + +int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t size, + bool primary, const char *name); + +int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name); +#else +static inline int devm_aperture_acquire_for_platform_device(struct platform_device *pdev, + resource_size_t base, + resource_size_t size) +{ + return 0; +} + +static inline int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t size, + bool primary, const char *name) +{ + return 0; +} + +static inline int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name) +{ + return 0; +} +#endif + +/** + * aperture_remove_all_conflicting_devices - remove all existing framebuffers + * @primary: also kick vga16fb if present; only relevant for VGA devices + * @name: a descriptive name of the requesting driver + * + * This function removes all graphics device drivers. Use this function on systems + * that can have their framebuffer located anywhere in memory. + * + * Returns: + * 0 on success, or a negative errno code otherwise + */ +static inline int aperture_remove_all_conflicting_devices(bool primary, const char *name) +{ + return aperture_remove_conflicting_devices(0, (resource_size_t)-1, primary, name); +} + +#endif -- cgit v1.2.3 From 5f278dbd540b7548bc5193552e6d478255c14c2d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 28 Jun 2022 12:10:15 -0700 Subject: iosys-map: Add per-word read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of always falling back to memcpy_fromio() for any size, prefer using read{b,w,l}(). When reading struct members it's common to read individual integer variables individually. Going through memcpy_fromio() for each of them poses a high penalty. Employ a similar trick as __seqprop() by using _Generic() to generate only the specific call based on a type-compatible variable. For a pariticular i915 workload producing GPU context switches, __get_engine_usage_record() is particularly hot since the engine usage is read from device local memory with dgfx, possibly multiple times since it's racy. Test execution time for this test shows a ~12.5% improvement with DG2: Before: nrepeats = 1000; min = 7.63243e+06; max = 1.01817e+07; median = 9.52548e+06; var = 526149; After: nrepeats = 1000; min = 7.03402e+06; max = 8.8832e+06; median = 8.33955e+06; var = 333113; Other things attempted that didn't prove very useful: 1) Change the _Generic() on x86 to just dereference the memory address 2) Change __get_engine_usage_record() to do just 1 read per loop, comparing with the previous value read 3) Change __get_engine_usage_record() to access the fields directly as it was before the conversion to iosys-map (3) did gave a small improvement (~3%), but doesn't seem to scale well to other similar cases in the driver. Additional test by Chris Wilson using gem_create from igt with some changes to track object creation time. This happens to accidentally stress this code path: Pre iosys_map conversion of engine busyness: lmem0: Creating 262144 4KiB objects took 59274.2ms Unpatched: lmem0: Creating 262144 4KiB objects took 108830.2ms With readl (this patch): lmem0: Creating 262144 4KiB objects took 61348.6ms s/readl/READ_ONCE/ lmem0: Creating 262144 4KiB objects took 61333.2ms So we do take a little bit more time than before the conversion, but that is due to other factors: bringing the READ_ONCE back would be as good as just doing this conversion. v2: - Remove default from _Generic() - callers wanting to read more than u64 should use iosys_map_memcpy_from() - Add READ_ONCE() cases dereferencing the pointer when using system memory v3: - Fix precedence issue when casting inside READ_ONCE(). By not using () around vaddr__ the offset was not part of the cast, but rather added to it, producing a wrong address - Remove compiletime_assert() as READ_ONCE() already contains it Signed-off-by: Lucas De Marchi Reviewed-by: Christian König # v1 Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220628191016.3899428-1-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index 4b8406ee8bc4..d19977e011ae 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -6,6 +6,7 @@ #ifndef __IOSYS_MAP_H__ #define __IOSYS_MAP_H__ +#include #include #include @@ -333,6 +334,23 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, memset(dst->vaddr + offset, value, len); } +#ifdef CONFIG_64BIT +#define __iosys_map_rd_io_u64_case(val_, vaddr_iomem_) \ + u64: val_ = readq(vaddr_iomem_) +#else +#define __iosys_map_rd_io_u64_case(val_, vaddr_iomem_) \ + u64: memcpy_fromio(&(val_), vaddr_iomem_, sizeof(u64)) +#endif + +#define __iosys_map_rd_io(val__, vaddr_iomem__, type__) _Generic(val__, \ + u8: val__ = readb(vaddr_iomem__), \ + u16: val__ = readw(vaddr_iomem__), \ + u32: val__ = readl(vaddr_iomem__), \ + __iosys_map_rd_io_u64_case(val__, vaddr_iomem__)) + +#define __iosys_map_rd_sys(val__, vaddr__, type__) \ + val__ = READ_ONCE(*(type__ *)(vaddr__)) + /** * iosys_map_rd - Read a C-type value from the iosys_map * @@ -340,16 +358,21 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * @offset__: The offset from which to read * @type__: Type of the value being read * - * Read a C type value from iosys_map, handling possible un-aligned accesses to - * the mapping. + * Read a C type value (u8, u16, u32 and u64) from iosys_map. For other types or + * if pointer may be unaligned (and problematic for the architecture supported), + * use iosys_map_memcpy_from(). * * Returns: * The value read from the mapping. */ -#define iosys_map_rd(map__, offset__, type__) ({ \ - type__ val; \ - iosys_map_memcpy_from(&val, map__, offset__, sizeof(val)); \ - val; \ +#define iosys_map_rd(map__, offset__, type__) ({ \ + type__ val; \ + if ((map__)->is_iomem) { \ + __iosys_map_rd_io(val, (map__)->vaddr_iomem + (offset__), type__);\ + } else { \ + __iosys_map_rd_sys(val, (map__)->vaddr + (offset__), type__); \ + } \ + val; \ }) /** @@ -379,9 +402,10 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * * Read a value from iosys_map considering its layout is described by a C struct * starting at @struct_offset__. The field offset and size is calculated and its - * value read handling possible un-aligned memory accesses. For example: suppose - * there is a @struct foo defined as below and the value ``foo.field2.inner2`` - * needs to be read from the iosys_map: + * value read. If the field access would incur in un-aligned access, then either + * iosys_map_memcpy_from() needs to be used or the architecture must support it. + * For example: suppose there is a @struct foo defined as below and the value + * ``foo.field2.inner2`` needs to be read from the iosys_map: * * .. code-block:: c * -- cgit v1.2.3 From 6fb5ee7cec06266a29f25ecc01a23b9d107f64e1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 28 Jun 2022 12:10:16 -0700 Subject: iosys-map: Add per-word write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like was done for read, provide the equivalent for write. Even if current users are not in the hot path, this should future-proof it. v2: - Remove default from _Generic() - callers wanting to write more than u64 should use iosys_map_memcpy_to() - Add WRITE_ONCE() cases dereferencing the pointer when using system memory v3: - Fix precedence issue when casting inside WRITE_ONCE(). By not using () around vaddr__ the offset was not part of the cast, but rather added to it, producing a wrong address - Remove compiletime_assert() as WRITE_ONCE() already contains it Signed-off-by: Lucas De Marchi Reviewed-by: Reviewed-by: Christian König # v1 Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220628191016.3899428-2-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index d19977e011ae..a533cae189d7 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -337,9 +337,13 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, #ifdef CONFIG_64BIT #define __iosys_map_rd_io_u64_case(val_, vaddr_iomem_) \ u64: val_ = readq(vaddr_iomem_) +#define __iosys_map_wr_io_u64_case(val_, vaddr_iomem_) \ + u64: writeq(val_, vaddr_iomem_) #else #define __iosys_map_rd_io_u64_case(val_, vaddr_iomem_) \ u64: memcpy_fromio(&(val_), vaddr_iomem_, sizeof(u64)) +#define __iosys_map_wr_io_u64_case(val_, vaddr_iomem_) \ + u64: memcpy_toio(vaddr_iomem_, &(val_), sizeof(u64)) #endif #define __iosys_map_rd_io(val__, vaddr_iomem__, type__) _Generic(val__, \ @@ -351,6 +355,15 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, #define __iosys_map_rd_sys(val__, vaddr__, type__) \ val__ = READ_ONCE(*(type__ *)(vaddr__)) +#define __iosys_map_wr_io(val__, vaddr_iomem__, type__) _Generic(val__, \ + u8: writeb(val__, vaddr_iomem__), \ + u16: writew(val__, vaddr_iomem__), \ + u32: writel(val__, vaddr_iomem__), \ + __iosys_map_wr_io_u64_case(val__, vaddr_iomem__)) + +#define __iosys_map_wr_sys(val__, vaddr__, type__) \ + WRITE_ONCE(*(type__ *)(vaddr__), val__) + /** * iosys_map_rd - Read a C-type value from the iosys_map * @@ -383,12 +396,17 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * @type__: Type of the value being written * @val__: Value to write * - * Write a C-type value to the iosys_map, handling possible un-aligned accesses - * to the mapping. + * Write a C type value (u8, u16, u32 and u64) to the iosys_map. For other types + * or if pointer may be unaligned (and problematic for the architecture + * supported), use iosys_map_memcpy_to() */ -#define iosys_map_wr(map__, offset__, type__, val__) ({ \ - type__ val = (val__); \ - iosys_map_memcpy_to(map__, offset__, &val, sizeof(val)); \ +#define iosys_map_wr(map__, offset__, type__, val__) ({ \ + type__ val = (val__); \ + if ((map__)->is_iomem) { \ + __iosys_map_wr_io(val, (map__)->vaddr_iomem + (offset__), type__);\ + } else { \ + __iosys_map_wr_sys(val, (map__)->vaddr + (offset__), type__); \ + } \ }) /** @@ -469,10 +487,12 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * @field__: Member of the struct to read * @val__: Value to write * - * Write a value to the iosys_map considering its layout is described by a C struct - * starting at @struct_offset__. The field offset and size is calculated and the - * @val__ is written handling possible un-aligned memory accesses. Refer to - * iosys_map_rd_field() for expected usage and memory layout. + * Write a value to the iosys_map considering its layout is described by a C + * struct starting at @struct_offset__. The field offset and size is calculated + * and the @val__ is written. If the field access would incur in un-aligned + * access, then either iosys_map_memcpy_to() needs to be used or the + * architecture must support it. Refer to iosys_map_rd_field() for expected + * usage and memory layout. */ #define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({ \ struct_type__ *s; \ -- cgit v1.2.3