From 6d89ead19946181df1e41d38917fddc951dbd95b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 12 Jul 2024 11:35:23 +0200 Subject: UAPI/ioctl: Improve parameter name of ioctl request definition helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The third parameter to _IOR et al is a type name, not a size. So the parameter being named "size" is irritating. Rename it to "argtype" instead to reduce confusion. There is a very minor chance that this breaks stuff. It only hurts however if there is a variable (or macro) in userspace that is called "argtype" *and* it's used in the parameters of _IOR and friends. IMHO this is negligible because usually definitions making use of these macros are provided by kernel headers (i.e. us) or if they are replicated in userspace code, they are replicated and so supposed to match the kernel definitions (e.g. to make them usable by programs without the need to update the kernel headers used to compile the program). Signed-off-by: Uwe Kleine-König Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/ioctl.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/asm-generic/ioctl.h b/include/uapi/asm-generic/ioctl.h index a84f4db8a250..e3290a5824c9 100644 --- a/include/uapi/asm-generic/ioctl.h +++ b/include/uapi/asm-generic/ioctl.h @@ -82,13 +82,13 @@ * NOTE: _IOW means userland is writing and kernel is reading. _IOR * means userland is reading and kernel is writing. */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,argtype) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOW(type,nr,argtype) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOWR(type,nr,argtype) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOR_BAD(type,nr,argtype) _IOC(_IOC_READ,(type),(nr),sizeof(argtype)) +#define _IOW_BAD(type,nr,argtype) _IOC(_IOC_WRITE,(type),(nr),sizeof(argtype)) +#define _IOWR_BAD(type,nr,argtype) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(argtype)) /* used to decode ioctl numbers.. */ #define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -- cgit v1.2.3 From 7c7e6c8924e7bf98db4e5b2edb202842003c00c2 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 24 Oct 2024 19:54:43 +0200 Subject: tty: serial: handle HAS_IOPORT dependencies In a future patch HAS_IOPORT=n will disable inb()/outb() and friends at compile time. We thus need to add HAS_IOPORT as dependency for those drivers using them unconditionally. Some 8250 serial drivers support MMIO only use, so fence only the parts requiring I/O ports and print an error message if a device can't be supported with the current configuration. Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Acked-by: Greg Kroah-Hartman Reviewed-by: Maciej W. Rozycki Signed-off-by: Niklas Schnelle Signed-off-by: Arnd Bergmann --- include/linux/serial_core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 4ab65874a850..743b4afaad4c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -505,7 +505,11 @@ struct uart_port { * The remaining bits are serial-core specific and not modifiable by * userspace. */ +#ifdef CONFIG_HAS_IOPORT #define UPF_FOURPORT ((__force upf_t) ASYNC_FOURPORT /* 1 */ ) +#else +#define UPF_FOURPORT 0 +#endif #define UPF_SAK ((__force upf_t) ASYNC_SAK /* 2 */ ) #define UPF_SPD_HI ((__force upf_t) ASYNC_SPD_HI /* 4 */ ) #define UPF_SPD_VHI ((__force upf_t) ASYNC_SPD_VHI /* 5 */ ) -- cgit v1.2.3 From 6f043e75744596968b6547c4bd43e4d30bbb6d6e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 24 Oct 2024 19:54:44 +0200 Subject: asm-generic/io.h: Remove I/O port accessors for HAS_IOPORT=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With all subsystems and drivers either declaring their dependence on HAS_IOPORT or fencing I/O port specific code sections we can finally make inb()/outb() and friends compile-time dependent on HAS_IOPORT as suggested by Linus in the linked mail. The main benefit of this is that on platforms such as s390 which have no meaningful way of implementing inb()/outb() their use without the proper HAS_IOPORT dependency will result in easy to catch and fix compile-time errors instead of compiling code that can never work. Link: https://lore.kernel.org/lkml/CAHk-=wg80je=K7madF4e7WrRNp37e3qh6y10Svhdc7O8SZ_-8g@mail.gmail.com/ Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Niklas Schnelle Reviewed-by: Takashi Iwai Reviewed-by: Ville Syrjälä Reviewed-by: Jarkko Sakkinen Acked-by: Lucas De Marchi Acked-by: Maciej W. Rozycki Acked-by: Damien Le Moal Acked-by: Jaroslav Kysela Acked-by: Greg Kroah-Hartman Acked-by: Rafael J. Wysocki Acked-by: Johannes Berg # for ARCH=um Acked-by: Geert Uytterhoeven Acked-by: Kalle Valo Acked-by: Jakub Kicinski Acked-by: Corey Minyard Acked-by: Guenter Roeck Acked-by: Damien Le Moal Acked-by: Sebastian Reichel Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 80de699bf6af..1027be6a62bc 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -540,6 +540,7 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer, #if !defined(inb) && !defined(_inb) #define _inb _inb +#ifdef CONFIG_HAS_IOPORT static inline u8 _inb(unsigned long addr) { u8 val; @@ -549,10 +550,15 @@ static inline u8 _inb(unsigned long addr) __io_par(val); return val; } +#else +u8 _inb(unsigned long addr) + __compiletime_error("inb()) requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(inw) && !defined(_inw) #define _inw _inw +#ifdef CONFIG_HAS_IOPORT static inline u16 _inw(unsigned long addr) { u16 val; @@ -562,10 +568,15 @@ static inline u16 _inw(unsigned long addr) __io_par(val); return val; } +#else +u16 _inw(unsigned long addr) + __compiletime_error("inw() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(inl) && !defined(_inl) #define _inl _inl +#ifdef CONFIG_HAS_IOPORT static inline u32 _inl(unsigned long addr) { u32 val; @@ -575,36 +586,55 @@ static inline u32 _inl(unsigned long addr) __io_par(val); return val; } +#else +u32 _inl(unsigned long addr) + __compiletime_error("inl() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outb) && !defined(_outb) #define _outb _outb +#ifdef CONFIG_HAS_IOPORT static inline void _outb(u8 value, unsigned long addr) { __io_pbw(); __raw_writeb(value, PCI_IOBASE + addr); __io_paw(); } +#else +void _outb(u8 value, unsigned long addr) + __compiletime_error("outb() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outw) && !defined(_outw) #define _outw _outw +#ifdef CONFIG_HAS_IOPORT static inline void _outw(u16 value, unsigned long addr) { __io_pbw(); __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); __io_paw(); } +#else +void _outw(u16 value, unsigned long addr) + __compiletime_error("outw() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outl) && !defined(_outl) #define _outl _outl +#ifdef CONFIG_HAS_IOPORT static inline void _outl(u32 value, unsigned long addr) { __io_pbw(); __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); __io_paw(); } +#else +void _outl(u32 value, unsigned long addr) + __compiletime_error("outl() requires CONFIG_HAS_IOPORT"); +#endif #endif #include @@ -688,53 +718,83 @@ static inline void outl_p(u32 value, unsigned long addr) #ifndef insb #define insb insb +#ifdef CONFIG_HAS_IOPORT static inline void insb(unsigned long addr, void *buffer, unsigned int count) { readsb(PCI_IOBASE + addr, buffer, count); } +#else +void insb(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insb() requires HAS_IOPORT"); +#endif #endif #ifndef insw #define insw insw +#ifdef CONFIG_HAS_IOPORT static inline void insw(unsigned long addr, void *buffer, unsigned int count) { readsw(PCI_IOBASE + addr, buffer, count); } +#else +void insw(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insw() requires HAS_IOPORT"); +#endif #endif #ifndef insl #define insl insl +#ifdef CONFIG_HAS_IOPORT static inline void insl(unsigned long addr, void *buffer, unsigned int count) { readsl(PCI_IOBASE + addr, buffer, count); } +#else +void insl(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insl() requires HAS_IOPORT"); +#endif #endif #ifndef outsb #define outsb outsb +#ifdef CONFIG_HAS_IOPORT static inline void outsb(unsigned long addr, const void *buffer, unsigned int count) { writesb(PCI_IOBASE + addr, buffer, count); } +#else +void outsb(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsb() requires HAS_IOPORT"); +#endif #endif #ifndef outsw #define outsw outsw +#ifdef CONFIG_HAS_IOPORT static inline void outsw(unsigned long addr, const void *buffer, unsigned int count) { writesw(PCI_IOBASE + addr, buffer, count); } +#else +void outsw(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsw() requires HAS_IOPORT"); +#endif #endif #ifndef outsl #define outsl outsl +#ifdef CONFIG_HAS_IOPORT static inline void outsl(unsigned long addr, const void *buffer, unsigned int count) { writesl(PCI_IOBASE + addr, buffer, count); } +#else +void outsl(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsl() requires HAS_IOPORT"); +#endif #endif #ifndef insb_p -- cgit v1.2.3 From c5c3238d9b8cee58cd4b08bbbe9347a94a566390 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Oct 2024 07:36:36 +0200 Subject: asm-generic: provide generic page_to_phys and phys_to_page implementations page_to_phys is duplicated by all architectures, and from some strange reason placed in where it doesn't fit at all. phys_to_page is only provided by a few architectures despite having a lot of open coded users. Provide generic versions in to make these helpers more easily usable. Note with this patch powerpc loses the CONFIG_DEBUG_VIRTUAL pfn_valid check. It will be added back in a generic version later. Signed-off-by: Christoph Hellwig Signed-off-by: Arnd Bergmann --- include/asm-generic/memory_model.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 6796abe1900e..a73a140cbecd 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -64,6 +64,9 @@ static inline int pfn_valid(unsigned long pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page +#define page_to_phys(page) PFN_PHYS(page_to_pfn(page)) +#define phys_to_page(phys) pfn_to_page(PHYS_PFN(phys)) + #endif /* __ASSEMBLY__ */ #endif -- cgit v1.2.3 From 3e25d5a49f99b75be2c6cfb165e4f77dc6d739a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Oct 2024 07:36:37 +0200 Subject: asm-generic: add an optional pfn_valid check to page_to_phys page_to_pfn is usually implemented by pointer arithmetics on the memory map, which means that bogus input can lead to even more bogus output. Powerpc had a pfn_valid check on the intermediate pfn in the page_to_phys implementation when CONFIG_DEBUG_VIRTUAL is defined, which seems generally useful, so add that to the generic version. Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Huth Signed-off-by: Arnd Bergmann --- include/asm-generic/memory_model.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index a73a140cbecd..6d1fb6162ac1 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -64,7 +64,17 @@ static inline int pfn_valid(unsigned long pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page +#ifdef CONFIG_DEBUG_VIRTUAL +#define page_to_phys(page) \ +({ \ + unsigned long __pfn = page_to_pfn(page); \ + \ + WARN_ON_ONCE(!pfn_valid(__pfn)); \ + PFN_PHYS(__pfn); \ +}) +#else #define page_to_phys(page) PFN_PHYS(page_to_pfn(page)) +#endif /* CONFIG_DEBUG_VIRTUAL */ #define phys_to_page(phys) pfn_to_page(PHYS_PFN(phys)) #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 00a31dd3acea0f88f947fc71e268ebb34b59f218 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 3 Oct 2024 17:16:14 -0400 Subject: asm-generic/div64: optimize/simplify __div64_const32() Several years later I just realized that this code could be greatly simplified. First, let's formalize the need for overflow handling in __arch_xprod64(). Assuming n = UINT64_MAX, there are 2 cases where an overflow may occur: 1) If a bias must be added, we have m_lo * n_lo + m or m_lo * 0xffffffff + ((m_hi << 32) + m_lo) or ((m_lo << 32) - m_lo) + ((m_hi << 32) + m_lo) or (m_lo + m_hi) << 32 which must be < (1 << 64). So the criteria for no overflow is m_lo + m_hi < (1 << 32). 2) The cross product m_lo * n_hi + m_hi * n_lo or m_lo * 0xffffffff + m_hi * 0xffffffff or ((m_lo << 32) - m_lo) + ((m_hi << 32) - m_hi). Assuming the top result from the previous step (m_lo + m_hi) that must be added to this, we get (m_lo + m_hi) << 32 again. So let's have a straight and simpler version when this is true. Otherwise some reordering allows for taking care of possible overflows without any actual conditionals. And prevent from generating both code variants by making sure this is considered only if m is perceived as constant by the compiler. This, in turn, allows for greatly simplifying __div64_const32(). The "special case" may go as well as the regular case works just fine without needing a bias. Then reduction should be applied all the time as minimizing m is the key. Signed-off-by: Nicolas Pitre Signed-off-by: Arnd Bergmann --- include/asm-generic/div64.h | 114 ++++++++++++++------------------------------ 1 file changed, 35 insertions(+), 79 deletions(-) (limited to 'include') diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h index 13f5aa68a455..5d59cf7e73d9 100644 --- a/include/asm-generic/div64.h +++ b/include/asm-generic/div64.h @@ -74,7 +74,8 @@ * do the trick here). \ */ \ uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ - uint32_t ___p, ___bias; \ + uint32_t ___p; \ + bool ___bias = false; \ \ /* determine MSB of b */ \ ___p = 1 << ilog2(___b); \ @@ -87,22 +88,14 @@ ___x = ~0ULL / ___b * ___b - 1; \ \ /* test our ___m with res = m * x / (p << 64) */ \ - ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \ - ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \ - ___res += (___x & 0xffffffff) * (___m >> 32); \ - ___t = (___res < ___t) ? (1ULL << 32) : 0; \ - ___res = (___res >> 32) + ___t; \ - ___res += (___m >> 32) * (___x >> 32); \ - ___res /= ___p; \ + ___res = (___m & 0xffffffff) * (___x & 0xffffffff); \ + ___t = (___m & 0xffffffff) * (___x >> 32) + (___res >> 32); \ + ___res = (___m >> 32) * (___x >> 32) + (___t >> 32); \ + ___t = (___m >> 32) * (___x & 0xffffffff) + (___t & 0xffffffff);\ + ___res = (___res + (___t >> 32)) / ___p; \ \ - /* Now sanitize and optimize what we've got. */ \ - if (~0ULL % (___b / (___b & -___b)) == 0) { \ - /* special case, can be simplified to ... */ \ - ___n /= (___b & -___b); \ - ___m = ~0ULL / (___b / (___b & -___b)); \ - ___p = 1; \ - ___bias = 1; \ - } else if (___res != ___x / ___b) { \ + /* Now validate what we've got. */ \ + if (___res != ___x / ___b) { \ /* \ * We can't get away without a bias to compensate \ * for bit truncation errors. To avoid it we'd need an \ @@ -111,45 +104,18 @@ * \ * Instead we do m = p / b and n / b = (n * m + m) / p. \ */ \ - ___bias = 1; \ + ___bias = true; \ /* Compute m = (p << 64) / b */ \ ___m = (~0ULL / ___b) * ___p; \ ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \ - } else { \ - /* \ - * Reduce m / p, and try to clear bit 31 of m when \ - * possible, otherwise that'll need extra overflow \ - * handling later. \ - */ \ - uint32_t ___bits = -(___m & -___m); \ - ___bits |= ___m >> 32; \ - ___bits = (~___bits) << 1; \ - /* \ - * If ___bits == 0 then setting bit 31 is unavoidable. \ - * Simply apply the maximum possible reduction in that \ - * case. Otherwise the MSB of ___bits indicates the \ - * best reduction we should apply. \ - */ \ - if (!___bits) { \ - ___p /= (___m & -___m); \ - ___m /= (___m & -___m); \ - } else { \ - ___p >>= ilog2(___bits); \ - ___m >>= ilog2(___bits); \ - } \ - /* No bias needed. */ \ - ___bias = 0; \ } \ \ + /* Reduce m / p to help avoid overflow handling later. */ \ + ___p /= (___m & -___m); \ + ___m /= (___m & -___m); \ + \ /* \ - * Now we have a combination of 2 conditions: \ - * \ - * 1) whether or not we need to apply a bias, and \ - * \ - * 2) whether or not there might be an overflow in the cross \ - * product determined by (___m & ((1 << 63) | (1 << 31))). \ - * \ - * Select the best way to do (m_bias + m * n) / (1 << 64). \ + * Perform (m_bias + m * n) / (1 << 64). \ * From now on there will be actual runtime code generated. \ */ \ ___res = __arch_xprod_64(___m, ___n, ___bias); \ @@ -165,7 +131,7 @@ * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 * * The product is a 128-bit value, scaled down to 64 bits. - * Assuming constant propagation to optimize away unused conditional code. + * Hoping for compile-time optimization of conditional code. * Architectures may provide their own optimized assembly implementation. */ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) @@ -174,38 +140,28 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) uint32_t m_hi = m >> 32; uint32_t n_lo = n; uint32_t n_hi = n >> 32; - uint64_t res; - uint32_t res_lo, res_hi, tmp; - - if (!bias) { - res = ((uint64_t)m_lo * n_lo) >> 32; - } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { - /* there can't be any overflow here */ - res = (m + (uint64_t)m_lo * n_lo) >> 32; - } else { - res = m + (uint64_t)m_lo * n_lo; - res_lo = res >> 32; - res_hi = (res_lo < m_hi); - res = res_lo | ((uint64_t)res_hi << 32); - } - - if (!(m & ((1ULL << 63) | (1ULL << 31)))) { - /* there can't be any overflow here */ - res += (uint64_t)m_lo * n_hi; - res += (uint64_t)m_hi * n_lo; - res >>= 32; + uint64_t x, y; + + /* Determine if overflow handling can be dispensed with. */ + bool no_ovf = __builtin_constant_p(m) && + ((m >> 32) + (m & 0xffffffff) < 0x100000000); + + if (no_ovf) { + x = (uint64_t)m_lo * n_lo + (bias ? m : 0); + x >>= 32; + x += (uint64_t)m_lo * n_hi; + x += (uint64_t)m_hi * n_lo; + x >>= 32; + x += (uint64_t)m_hi * n_hi; } else { - res += (uint64_t)m_lo * n_hi; - tmp = res >> 32; - res += (uint64_t)m_hi * n_lo; - res_lo = res >> 32; - res_hi = (res_lo < tmp); - res = res_lo | ((uint64_t)res_hi << 32); + x = (uint64_t)m_lo * n_lo + (bias ? m_lo : 0); + y = (uint64_t)m_lo * n_hi + (uint32_t)(x >> 32) + (bias ? m_hi : 0); + x = (uint64_t)m_hi * n_hi + (uint32_t)(y >> 32); + y = (uint64_t)m_hi * n_lo + (uint32_t)y; + x += (uint32_t)(y >> 32); } - res += (uint64_t)m_hi * n_hi; - - return res; + return x; } #endif -- cgit v1.2.3 From d533cb2d2af400f4689d8c5ca41db6d83ff173be Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 3 Oct 2024 17:16:16 -0400 Subject: __arch_xprod64(): make __always_inline when optimizing for performance Recent gcc versions started not systematically inline __arch_xprod64() and that has performance implications. Give the compiler the freedom to decide only when optimizing for size. Here's some timing numbers from lib/math/test_div64.c Using __always_inline: ``` test_div64: Starting 64bit/32bit division and modulo test test_div64: Completed 64bit/32bit division and modulo test, 0.048285584s elapsed ``` Without __always_inline: ``` test_div64: Starting 64bit/32bit division and modulo test test_div64: Completed 64bit/32bit division and modulo test, 0.053023584s elapsed ``` Forcing constant base through the non-constant base code path: ``` test_div64: Starting 64bit/32bit division and modulo test test_div64: Completed 64bit/32bit division and modulo test, 0.103263776s elapsed ``` It is worth noting that test_div64 does half the test with non constant divisors already so the impact is greater than what those numbers show. And for what it is worth, those numbers were obtained using QEMU. The gcc version is 14.1.0. Signed-off-by: Nicolas Pitre Signed-off-by: Arnd Bergmann --- include/asm-generic/div64.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h index 5d59cf7e73d9..25e7b4b58dcf 100644 --- a/include/asm-generic/div64.h +++ b/include/asm-generic/div64.h @@ -134,7 +134,12 @@ * Hoping for compile-time optimization of conditional code. * Architectures may provide their own optimized assembly implementation. */ -static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) +#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE +static __always_inline +#else +static inline +#endif +uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) { uint32_t m_lo = m; uint32_t m_hi = m >> 32; -- cgit v1.2.3 From b660d0a2acb9053d627befbb259a397d26aa9582 Mon Sep 17 00:00:00 2001 From: Julian Vetter Date: Mon, 28 Oct 2024 14:42:24 +0100 Subject: New implementation for IO memcpy and IO memset The IO memcpy and IO memset functions in asm-generic/io.h simply call memcpy and memset. This can lead to alignment problems or faults on architectures that do not define their own version and fall back to these defaults. This patch introduces new implementations for IO memcpy and IO memset, that use read{l,q} accessor functions, align accesses to machine word size, and resort to byte accesses when the target memory is not aligned. For new architectures and existing ones that were using the old fallbacks these functions are save to use, because IO memory constraints are taken into account. Moreover, architectures with similar implementations can now use these new versions, not needing to implement their own. Reviewed-by: Yann Sionneau Signed-off-by: Julian Vetter Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 1027be6a62bc..a5cbbf3e26ec 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1211,7 +1211,6 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) #endif #ifndef memset_io -#define memset_io memset_io /** * memset_io Set a range of I/O memory to a constant value * @addr: The beginning of the I/O-memory range to set @@ -1220,15 +1219,10 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) * * Set a range of I/O memory to a given value. */ -static inline void memset_io(volatile void __iomem *addr, int value, - size_t size) -{ - memset(__io_virt(addr), value, size); -} +void memset_io(volatile void __iomem *addr, int val, size_t count); #endif #ifndef memcpy_fromio -#define memcpy_fromio memcpy_fromio /** * memcpy_fromio Copy a block of data from I/O memory * @dst: The (RAM) destination for the copy @@ -1237,16 +1231,10 @@ static inline void memset_io(volatile void __iomem *addr, int value, * * Copy a block of data from I/O memory. */ -static inline void memcpy_fromio(void *buffer, - const volatile void __iomem *addr, - size_t size) -{ - memcpy(buffer, __io_virt(addr), size); -} +void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count); #endif #ifndef memcpy_toio -#define memcpy_toio memcpy_toio /** * memcpy_toio Copy a block of data into I/O memory * @dst: The (I/O memory) destination for the copy @@ -1255,11 +1243,7 @@ static inline void memcpy_fromio(void *buffer, * * Copy a block of data to I/O memory. */ -static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, - size_t size) -{ - memcpy(__io_virt(addr), buffer, size); -} +void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count); #endif extern int devmem_is_allowed(unsigned long pfn); -- cgit v1.2.3 From fb56007c9bc38550755a53f7afd71a287340b724 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 Nov 2024 06:10:19 +0000 Subject: vt_buffer.h: get rid of dead code in default scr_...() instances Only 4 architectures define VT_BUF_HAVE_RW (alpha, mips, powerpc, sparc) and all of them define VT_BUF_HAVE_MEM{SET,CPY,MOVE}W. In other words, the code under #ifdef VT_BUF_HAVE_RW in default scr_mem...w() instances won't be compiled anyway. Signed-off-by: Al Viro Signed-off-by: Arnd Bergmann --- include/linux/vt_buffer.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h index 919d999a8c1d..b6eeb8cb6070 100644 --- a/include/linux/vt_buffer.h +++ b/include/linux/vt_buffer.h @@ -28,45 +28,21 @@ #ifndef VT_BUF_HAVE_MEMSETW static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - count /= 2; - while (count--) - scr_writew(c, s++); -#else memset16(s, c, count / 2); -#endif } #endif #ifndef VT_BUF_HAVE_MEMCPYW static inline void scr_memcpyw(u16 *d, const u16 *s, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - count /= 2; - while (count--) - scr_writew(scr_readw(s++), d++); -#else memcpy(d, s, count); -#endif } #endif #ifndef VT_BUF_HAVE_MEMMOVEW static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - if (d < s) - scr_memcpyw(d, s, count); - else { - count /= 2; - d += count; - s += count; - while (count--) - scr_writew(scr_readw(--s), --d); - } -#else memmove(d, s, count); -#endif } #endif -- cgit v1.2.3 From 0af8e32343f8d0db31f593464fc140eaef25a281 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 Nov 2024 06:11:47 +0000 Subject: empty include/asm-generic/vga.h all places that use anything defined in it (vgacon, mdacon and vga16fb) are built only on architectures that have all that stuff in their native asm/vga.h allows to kill stub asm/vga.h on sh, while we are at it... Signed-off-by: Al Viro Signed-off-by: Arnd Bergmann --- include/asm-generic/vga.h | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h index adf91a783b5c..5dcaf4ae904a 100644 --- a/include/asm-generic/vga.h +++ b/include/asm-generic/vga.h @@ -1,25 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * Access to VGA videoram - * - * (c) 1998 Martin Mares - */ #ifndef __ASM_GENERIC_VGA_H #define __ASM_GENERIC_VGA_H - -/* - * On most architectures that support VGA, we can just - * recalculate addresses and then access the videoram - * directly without any black magic. - * - * Everyone else needs to ioremap the address and use - * proper I/O accesses. - */ -#ifndef VGA_MAP_MEM -#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x) -#endif - -#define vga_readb(x) (*(x)) -#define vga_writeb(x, y) (*(y) = (x)) - -#endif /* _ASM_GENERIC_VGA_H */ +#endif /* __ASM_GENERIC_VGA_H */ -- cgit v1.2.3