diff options
Diffstat (limited to 'include/asm-generic')
| -rw-r--r-- | include/asm-generic/audit_change_attr.h | 6 | ||||
| -rw-r--r-- | include/asm-generic/codetag.lds.h | 19 | ||||
| -rw-r--r-- | include/asm-generic/delay.h | 96 | ||||
| -rw-r--r-- | include/asm-generic/div64.h | 121 | ||||
| -rw-r--r-- | include/asm-generic/hugetlb.h | 15 | ||||
| -rw-r--r-- | include/asm-generic/io.h | 82 | ||||
| -rw-r--r-- | include/asm-generic/memory_model.h | 13 | ||||
| -rw-r--r-- | include/asm-generic/qspinlock.h | 2 | ||||
| -rw-r--r-- | include/asm-generic/spinlock.h | 87 | ||||
| -rw-r--r-- | include/asm-generic/spinlock_types.h | 12 | ||||
| -rw-r--r-- | include/asm-generic/text-patching.h | 5 | ||||
| -rw-r--r-- | include/asm-generic/ticket_spinlock.h | 105 | ||||
| -rw-r--r-- | include/asm-generic/vdso/vsyscall.h | 3 | ||||
| -rw-r--r-- | include/asm-generic/vga.h | 23 | ||||
| -rw-r--r-- | include/asm-generic/vmlinux.lds.h | 53 |
15 files changed, 376 insertions, 266 deletions
diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h index 331670807cf0..cc840537885f 100644 --- a/include/asm-generic/audit_change_attr.h +++ b/include/asm-generic/audit_change_attr.h @@ -11,9 +11,15 @@ __NR_lchown, __NR_fchown, #endif __NR_setxattr, +#ifdef __NR_setxattrat +__NR_setxattrat, +#endif __NR_lsetxattr, __NR_fsetxattr, __NR_removexattr, +#ifdef __NR_removexattrat +__NR_removexattrat, +#endif __NR_lremovexattr, __NR_fremovexattr, #ifdef __NR_fchownat diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h index 64f536b80380..372c320c5043 100644 --- a/include/asm-generic/codetag.lds.h +++ b/include/asm-generic/codetag.lds.h @@ -11,4 +11,23 @@ #define CODETAG_SECTIONS() \ SECTION_WITH_BOUNDARIES(alloc_tags) +/* + * Module codetags which aren't used after module unload, therefore have the + * same lifespan as the module and can be safely unloaded with the module. + */ +#define MOD_CODETAG_SECTIONS() + +#define MOD_SEPARATE_CODETAG_SECTION(_name) \ + .codetag.##_name : { \ + SECTION_WITH_BOUNDARIES(_name) \ + } + +/* + * For codetags which might be used after module unload, therefore might stay + * longer in memory. Each such codetag type has its own section so that we can + * unload them individually once unused. + */ +#define MOD_SEPARATE_CODETAG_SECTIONS() \ + MOD_SEPARATE_CODETAG_SECTION(alloc_tags) + #endif /* __ASM_GENERIC_CODETAG_LDS_H */ diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h index e448ac61430c..03b0ec7afca6 100644 --- a/include/asm-generic/delay.h +++ b/include/asm-generic/delay.h @@ -2,6 +2,9 @@ #ifndef __ASM_GENERIC_DELAY_H #define __ASM_GENERIC_DELAY_H +#include <linux/math.h> +#include <vdso/time64.h> + /* Undefined functions to get compile-time errors */ extern void __bad_udelay(void); extern void __bad_ndelay(void); @@ -12,34 +15,73 @@ extern void __const_udelay(unsigned long xloops); extern void __delay(unsigned long loops); /* - * The weird n/20000 thing suppresses a "comparison is always false due to - * limited range of data type" warning with non-const 8-bit arguments. + * The microseconds/nanosecond delay multiplicators are used to convert a + * constant microseconds/nanoseconds value to a value which can be used by the + * architectures specific implementation to transform it into loops. + */ +#define UDELAY_CONST_MULT ((unsigned long)DIV_ROUND_UP(1ULL << 32, USEC_PER_SEC)) +#define NDELAY_CONST_MULT ((unsigned long)DIV_ROUND_UP(1ULL << 32, NSEC_PER_SEC)) + +/* + * The maximum constant udelay/ndelay value picked out of thin air to prevent + * too long constant udelays/ndelays. */ +#define DELAY_CONST_MAX 20000 -/* 0x10c7 is 2**32 / 1000000 (rounded up) */ -#define udelay(n) \ - ({ \ - if (__builtin_constant_p(n)) { \ - if ((n) / 20000 >= 1) \ - __bad_udelay(); \ - else \ - __const_udelay((n) * 0x10c7ul); \ - } else { \ - __udelay(n); \ - } \ - }) - -/* 0x5 is 2**32 / 1000000000 (rounded up) */ -#define ndelay(n) \ - ({ \ - if (__builtin_constant_p(n)) { \ - if ((n) / 20000 >= 1) \ - __bad_ndelay(); \ - else \ - __const_udelay((n) * 5ul); \ - } else { \ - __ndelay(n); \ - } \ - }) +/** + * udelay - Inserting a delay based on microseconds with busy waiting + * @usec: requested delay in microseconds + * + * When delaying in an atomic context ndelay(), udelay() and mdelay() are the + * only valid variants of delaying/sleeping to go with. + * + * When inserting delays in non atomic context which are shorter than the time + * which is required to queue e.g. an hrtimer and to enter then the scheduler, + * it is also valuable to use udelay(). But it is not simple to specify a + * generic threshold for this which will fit for all systems. An approximation + * is a threshold for all delays up to 10 microseconds. + * + * When having a delay which is larger than the architecture specific + * %MAX_UDELAY_MS value, please make sure mdelay() is used. Otherwise a overflow + * risk is given. + * + * Please note that ndelay(), udelay() and mdelay() may return early for several + * reasons (https://lists.openwall.net/linux-kernel/2011/01/09/56): + * + * #. computed loops_per_jiffy too low (due to the time taken to execute the + * timer interrupt.) + * #. cache behaviour affecting the time it takes to execute the loop function. + * #. CPU clock rate changes. + */ +static __always_inline void udelay(unsigned long usec) +{ + if (__builtin_constant_p(usec)) { + if (usec >= DELAY_CONST_MAX) + __bad_udelay(); + else + __const_udelay(usec * UDELAY_CONST_MULT); + } else { + __udelay(usec); + } +} + +/** + * ndelay - Inserting a delay based on nanoseconds with busy waiting + * @nsec: requested delay in nanoseconds + * + * See udelay() for basic information about ndelay() and it's variants. + */ +static __always_inline void ndelay(unsigned long nsec) +{ + if (__builtin_constant_p(nsec)) { + if (nsec >= DELAY_CONST_MAX) + __bad_ndelay(); + else + __const_udelay(nsec * NDELAY_CONST_MULT); + } else { + __ndelay(nsec); + } +} +#define ndelay(x) ndelay(x) #endif /* __ASM_GENERIC_DELAY_H */ diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h index 13f5aa68a455..25e7b4b58dcf 100644 --- a/include/asm-generic/div64.h +++ b/include/asm-generic/div64.h @@ -74,7 +74,8 @@ * do the trick here). \ */ \ uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ - uint32_t ___p, ___bias; \ + uint32_t ___p; \ + bool ___bias = false; \ \ /* determine MSB of b */ \ ___p = 1 << ilog2(___b); \ @@ -87,22 +88,14 @@ ___x = ~0ULL / ___b * ___b - 1; \ \ /* test our ___m with res = m * x / (p << 64) */ \ - ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \ - ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \ - ___res += (___x & 0xffffffff) * (___m >> 32); \ - ___t = (___res < ___t) ? (1ULL << 32) : 0; \ - ___res = (___res >> 32) + ___t; \ - ___res += (___m >> 32) * (___x >> 32); \ - ___res /= ___p; \ + ___res = (___m & 0xffffffff) * (___x & 0xffffffff); \ + ___t = (___m & 0xffffffff) * (___x >> 32) + (___res >> 32); \ + ___res = (___m >> 32) * (___x >> 32) + (___t >> 32); \ + ___t = (___m >> 32) * (___x & 0xffffffff) + (___t & 0xffffffff);\ + ___res = (___res + (___t >> 32)) / ___p; \ \ - /* Now sanitize and optimize what we've got. */ \ - if (~0ULL % (___b / (___b & -___b)) == 0) { \ - /* special case, can be simplified to ... */ \ - ___n /= (___b & -___b); \ - ___m = ~0ULL / (___b / (___b & -___b)); \ - ___p = 1; \ - ___bias = 1; \ - } else if (___res != ___x / ___b) { \ + /* Now validate what we've got. */ \ + if (___res != ___x / ___b) { \ /* \ * We can't get away without a bias to compensate \ * for bit truncation errors. To avoid it we'd need an \ @@ -111,45 +104,18 @@ * \ * Instead we do m = p / b and n / b = (n * m + m) / p. \ */ \ - ___bias = 1; \ + ___bias = true; \ /* Compute m = (p << 64) / b */ \ ___m = (~0ULL / ___b) * ___p; \ ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \ - } else { \ - /* \ - * Reduce m / p, and try to clear bit 31 of m when \ - * possible, otherwise that'll need extra overflow \ - * handling later. \ - */ \ - uint32_t ___bits = -(___m & -___m); \ - ___bits |= ___m >> 32; \ - ___bits = (~___bits) << 1; \ - /* \ - * If ___bits == 0 then setting bit 31 is unavoidable. \ - * Simply apply the maximum possible reduction in that \ - * case. Otherwise the MSB of ___bits indicates the \ - * best reduction we should apply. \ - */ \ - if (!___bits) { \ - ___p /= (___m & -___m); \ - ___m /= (___m & -___m); \ - } else { \ - ___p >>= ilog2(___bits); \ - ___m >>= ilog2(___bits); \ - } \ - /* No bias needed. */ \ - ___bias = 0; \ } \ \ + /* Reduce m / p to help avoid overflow handling later. */ \ + ___p /= (___m & -___m); \ + ___m /= (___m & -___m); \ + \ /* \ - * Now we have a combination of 2 conditions: \ - * \ - * 1) whether or not we need to apply a bias, and \ - * \ - * 2) whether or not there might be an overflow in the cross \ - * product determined by (___m & ((1 << 63) | (1 << 31))). \ - * \ - * Select the best way to do (m_bias + m * n) / (1 << 64). \ + * Perform (m_bias + m * n) / (1 << 64). \ * From now on there will be actual runtime code generated. \ */ \ ___res = __arch_xprod_64(___m, ___n, ___bias); \ @@ -165,47 +131,42 @@ * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 * * The product is a 128-bit value, scaled down to 64 bits. - * Assuming constant propagation to optimize away unused conditional code. + * Hoping for compile-time optimization of conditional code. * Architectures may provide their own optimized assembly implementation. */ -static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) +#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE +static __always_inline +#else +static inline +#endif +uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) { uint32_t m_lo = m; uint32_t m_hi = m >> 32; uint32_t n_lo = n; uint32_t n_hi = n >> 32; - uint64_t res; - uint32_t res_lo, res_hi, tmp; - - if (!bias) { - res = ((uint64_t)m_lo * n_lo) >> 32; - } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { - /* there can't be any overflow here */ - res = (m + (uint64_t)m_lo * n_lo) >> 32; + uint64_t x, y; + + /* Determine if overflow handling can be dispensed with. */ + bool no_ovf = __builtin_constant_p(m) && + ((m >> 32) + (m & 0xffffffff) < 0x100000000); + + if (no_ovf) { + x = (uint64_t)m_lo * n_lo + (bias ? m : 0); + x >>= 32; + x += (uint64_t)m_lo * n_hi; + x += (uint64_t)m_hi * n_lo; + x >>= 32; + x += (uint64_t)m_hi * n_hi; } else { - res = m + (uint64_t)m_lo * n_lo; - res_lo = res >> 32; - res_hi = (res_lo < m_hi); - res = res_lo | ((uint64_t)res_hi << 32); + x = (uint64_t)m_lo * n_lo + (bias ? m_lo : 0); + y = (uint64_t)m_lo * n_hi + (uint32_t)(x >> 32) + (bias ? m_hi : 0); + x = (uint64_t)m_hi * n_hi + (uint32_t)(y >> 32); + y = (uint64_t)m_hi * n_lo + (uint32_t)y; + x += (uint32_t)(y >> 32); } - if (!(m & ((1ULL << 63) | (1ULL << 31)))) { - /* there can't be any overflow here */ - res += (uint64_t)m_lo * n_hi; - res += (uint64_t)m_hi * n_lo; - res >>= 32; - } else { - res += (uint64_t)m_lo * n_hi; - tmp = res >> 32; - res += (uint64_t)m_hi * n_lo; - res_lo = res >> 32; - res_hi = (res_lo < tmp); - res = res_lo | ((uint64_t)res_hi << 32); - } - - res += (uint64_t)m_hi * n_hi; - - return res; + return x; } #endif diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 594d5905f615..f42133dae68e 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -42,20 +42,26 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) return pte_modify(pte, newprot); } +#ifndef __HAVE_ARCH_HUGE_PTE_MKUFFD_WP static inline pte_t huge_pte_mkuffd_wp(pte_t pte) { return huge_pte_wrprotect(pte_mkuffd_wp(pte)); } +#endif +#ifndef __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP static inline pte_t huge_pte_clear_uffd_wp(pte_t pte) { return pte_clear_uffd_wp(pte); } +#endif +#ifndef __HAVE_ARCH_HUGE_PTE_UFFD_WP static inline int huge_pte_uffd_wp(pte_t pte) { return pte_uffd_wp(pte); } +#endif #ifndef __HAVE_ARCH_HUGE_PTE_CLEAR static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, @@ -106,22 +112,17 @@ static inline int huge_pte_none(pte_t pte) #endif /* Please refer to comments above pte_none_mostly() for the usage */ +#ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY static inline int huge_pte_none_mostly(pte_t pte) { return huge_pte_none(pte) || is_pte_marker(pte); } +#endif #ifndef __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - struct hstate *h = hstate_file(file); - - if (len & ~huge_page_mask(h)) - return -EINVAL; - if (addr & ~huge_page_mask(h)) - return -EINVAL; - return 0; } #endif diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 80de699bf6af..a5cbbf3e26ec 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -540,6 +540,7 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer, #if !defined(inb) && !defined(_inb) #define _inb _inb +#ifdef CONFIG_HAS_IOPORT static inline u8 _inb(unsigned long addr) { u8 val; @@ -549,10 +550,15 @@ static inline u8 _inb(unsigned long addr) __io_par(val); return val; } +#else +u8 _inb(unsigned long addr) + __compiletime_error("inb()) requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(inw) && !defined(_inw) #define _inw _inw +#ifdef CONFIG_HAS_IOPORT static inline u16 _inw(unsigned long addr) { u16 val; @@ -562,10 +568,15 @@ static inline u16 _inw(unsigned long addr) __io_par(val); return val; } +#else +u16 _inw(unsigned long addr) + __compiletime_error("inw() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(inl) && !defined(_inl) #define _inl _inl +#ifdef CONFIG_HAS_IOPORT static inline u32 _inl(unsigned long addr) { u32 val; @@ -575,36 +586,55 @@ static inline u32 _inl(unsigned long addr) __io_par(val); return val; } +#else +u32 _inl(unsigned long addr) + __compiletime_error("inl() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outb) && !defined(_outb) #define _outb _outb +#ifdef CONFIG_HAS_IOPORT static inline void _outb(u8 value, unsigned long addr) { __io_pbw(); __raw_writeb(value, PCI_IOBASE + addr); __io_paw(); } +#else +void _outb(u8 value, unsigned long addr) + __compiletime_error("outb() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outw) && !defined(_outw) #define _outw _outw +#ifdef CONFIG_HAS_IOPORT static inline void _outw(u16 value, unsigned long addr) { __io_pbw(); __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); __io_paw(); } +#else +void _outw(u16 value, unsigned long addr) + __compiletime_error("outw() requires CONFIG_HAS_IOPORT"); +#endif #endif #if !defined(outl) && !defined(_outl) #define _outl _outl +#ifdef CONFIG_HAS_IOPORT static inline void _outl(u32 value, unsigned long addr) { __io_pbw(); __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); __io_paw(); } +#else +void _outl(u32 value, unsigned long addr) + __compiletime_error("outl() requires CONFIG_HAS_IOPORT"); +#endif #endif #include <linux/logic_pio.h> @@ -688,53 +718,83 @@ static inline void outl_p(u32 value, unsigned long addr) #ifndef insb #define insb insb +#ifdef CONFIG_HAS_IOPORT static inline void insb(unsigned long addr, void *buffer, unsigned int count) { readsb(PCI_IOBASE + addr, buffer, count); } +#else +void insb(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insb() requires HAS_IOPORT"); +#endif #endif #ifndef insw #define insw insw +#ifdef CONFIG_HAS_IOPORT static inline void insw(unsigned long addr, void *buffer, unsigned int count) { readsw(PCI_IOBASE + addr, buffer, count); } +#else +void insw(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insw() requires HAS_IOPORT"); +#endif #endif #ifndef insl #define insl insl +#ifdef CONFIG_HAS_IOPORT static inline void insl(unsigned long addr, void *buffer, unsigned int count) { readsl(PCI_IOBASE + addr, buffer, count); } +#else +void insl(unsigned long addr, void *buffer, unsigned int count) + __compiletime_error("insl() requires HAS_IOPORT"); +#endif #endif #ifndef outsb #define outsb outsb +#ifdef CONFIG_HAS_IOPORT static inline void outsb(unsigned long addr, const void *buffer, unsigned int count) { writesb(PCI_IOBASE + addr, buffer, count); } +#else +void outsb(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsb() requires HAS_IOPORT"); +#endif #endif #ifndef outsw #define outsw outsw +#ifdef CONFIG_HAS_IOPORT static inline void outsw(unsigned long addr, const void *buffer, unsigned int count) { writesw(PCI_IOBASE + addr, buffer, count); } +#else +void outsw(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsw() requires HAS_IOPORT"); +#endif #endif #ifndef outsl #define outsl outsl +#ifdef CONFIG_HAS_IOPORT static inline void outsl(unsigned long addr, const void *buffer, unsigned int count) { writesl(PCI_IOBASE + addr, buffer, count); } +#else +void outsl(unsigned long addr, const void *buffer, unsigned int count) + __compiletime_error("outsl() requires HAS_IOPORT"); +#endif #endif #ifndef insb_p @@ -1151,7 +1211,6 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) #endif #ifndef memset_io -#define memset_io memset_io /** * memset_io Set a range of I/O memory to a constant value * @addr: The beginning of the I/O-memory range to set @@ -1160,15 +1219,10 @@ static inline void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) * * Set a range of I/O memory to a given value. */ -static inline void memset_io(volatile void __iomem *addr, int value, - size_t size) -{ - memset(__io_virt(addr), value, size); -} +void memset_io(volatile void __iomem *addr, int val, size_t count); #endif #ifndef memcpy_fromio -#define memcpy_fromio memcpy_fromio /** * memcpy_fromio Copy a block of data from I/O memory * @dst: The (RAM) destination for the copy @@ -1177,16 +1231,10 @@ static inline void memset_io(volatile void __iomem *addr, int value, * * Copy a block of data from I/O memory. */ -static inline void memcpy_fromio(void *buffer, - const volatile void __iomem *addr, - size_t size) -{ - memcpy(buffer, __io_virt(addr), size); -} +void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count); #endif #ifndef memcpy_toio -#define memcpy_toio memcpy_toio /** * memcpy_toio Copy a block of data into I/O memory * @dst: The (I/O memory) destination for the copy @@ -1195,11 +1243,7 @@ static inline void memcpy_fromio(void *buffer, * * Copy a block of data to I/O memory. */ -static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, - size_t size) -{ - memcpy(__io_virt(addr), buffer, size); -} +void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count); #endif extern int devmem_is_allowed(unsigned long pfn); diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 6796abe1900e..6d1fb6162ac1 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -64,6 +64,19 @@ static inline int pfn_valid(unsigned long pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page +#ifdef CONFIG_DEBUG_VIRTUAL +#define page_to_phys(page) \ +({ \ + unsigned long __pfn = page_to_pfn(page); \ + \ + WARN_ON_ONCE(!pfn_valid(__pfn)); \ + PFN_PHYS(__pfn); \ +}) +#else +#define page_to_phys(page) PFN_PHYS(page_to_pfn(page)) +#endif /* CONFIG_DEBUG_VIRTUAL */ +#define phys_to_page(phys) pfn_to_page(PHYS_PFN(phys)) + #endif /* __ASSEMBLY__ */ #endif diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 0655aa5b57b2..bf47cca2c375 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock) } #endif +#ifndef __no_arch_spinlock_redefine /* * Remapping spinlock architecture specific functions to the corresponding * queued spinlock functions. @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock) #define arch_spin_lock(l) queued_spin_lock(l) #define arch_spin_trylock(l) queued_spin_trylock(l) #define arch_spin_unlock(l) queued_spin_unlock(l) +#endif #endif /* __ASM_GENERIC_QSPINLOCK_H */ diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h index 90803a826ba0..970590baf61b 100644 --- a/include/asm-generic/spinlock.h +++ b/include/asm-generic/spinlock.h @@ -1,94 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * 'Generic' ticket-lock implementation. - * - * It relies on atomic_fetch_add() having well defined forward progress - * guarantees under contention. If your architecture cannot provide this, stick - * to a test-and-set lock. - * - * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a - * sub-word of the value. This is generally true for anything LL/SC although - * you'd be hard pressed to find anything useful in architecture specifications - * about this. If your architecture cannot do this you might be better off with - * a test-and-set. - * - * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence - * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with - * a full fence after the spin to upgrade the otherwise-RCpc - * atomic_cond_read_acquire(). - * - * The implementation uses smp_cond_load_acquire() to spin, so if the - * architecture has WFE like instructions to sleep instead of poll for word - * modifications be sure to implement that (see ARM64 for example). - * - */ - #ifndef __ASM_GENERIC_SPINLOCK_H #define __ASM_GENERIC_SPINLOCK_H -#include <linux/atomic.h> -#include <asm-generic/spinlock_types.h> - -static __always_inline void arch_spin_lock(arch_spinlock_t *lock) -{ - u32 val = atomic_fetch_add(1<<16, lock); - u16 ticket = val >> 16; - - if (ticket == (u16)val) - return; - - /* - * atomic_cond_read_acquire() is RCpc, but rather than defining a - * custom cond_read_rcsc() here we just emit a full fence. We only - * need the prior reads before subsequent writes ordering from - * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we - * have no outstanding writes due to the atomic_fetch_add() the extra - * orderings are free. - */ - atomic_cond_read_acquire(lock, ticket == (u16)VAL); - smp_mb(); -} - -static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock) -{ - u32 old = atomic_read(lock); - - if ((old >> 16) != (old & 0xffff)) - return false; - - return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */ -} - -static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); - u32 val = atomic_read(lock); - - smp_store_release(ptr, (u16)val + 1); -} - -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - u32 val = lock.counter; - - return ((val >> 16) == (val & 0xffff)); -} - -static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - arch_spinlock_t val = READ_ONCE(*lock); - - return !arch_spin_value_unlocked(val); -} - -static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock) -{ - u32 val = atomic_read(lock); - - return (s16)((val >> 16) - (val & 0xffff)) > 1; -} - +#include <asm-generic/ticket_spinlock.h> #include <asm/qrwlock.h> #endif /* __ASM_GENERIC_SPINLOCK_H */ diff --git a/include/asm-generic/spinlock_types.h b/include/asm-generic/spinlock_types.h index 8962bb730945..f534aa5de394 100644 --- a/include/asm-generic/spinlock_types.h +++ b/include/asm-generic/spinlock_types.h @@ -3,15 +3,7 @@ #ifndef __ASM_GENERIC_SPINLOCK_TYPES_H #define __ASM_GENERIC_SPINLOCK_TYPES_H -#include <linux/types.h> -typedef atomic_t arch_spinlock_t; - -/* - * qrwlock_types depends on arch_spinlock_t, so we must typedef that before the - * include. - */ -#include <asm/qrwlock_types.h> - -#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0) +#include <asm-generic/qspinlock_types.h> +#include <asm-generic/qrwlock_types.h> #endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */ diff --git a/include/asm-generic/text-patching.h b/include/asm-generic/text-patching.h new file mode 100644 index 000000000000..2245c641b741 --- /dev/null +++ b/include/asm-generic/text-patching.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_TEXT_PATCHING_H +#define _ASM_GENERIC_TEXT_PATCHING_H + +#endif /* _ASM_GENERIC_TEXT_PATCHING_H */ diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h new file mode 100644 index 000000000000..325779970d8a --- /dev/null +++ b/include/asm-generic/ticket_spinlock.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * 'Generic' ticket-lock implementation. + * + * It relies on atomic_fetch_add() having well defined forward progress + * guarantees under contention. If your architecture cannot provide this, stick + * to a test-and-set lock. + * + * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a + * sub-word of the value. This is generally true for anything LL/SC although + * you'd be hard pressed to find anything useful in architecture specifications + * about this. If your architecture cannot do this you might be better off with + * a test-and-set. + * + * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence + * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with + * a full fence after the spin to upgrade the otherwise-RCpc + * atomic_cond_read_acquire(). + * + * The implementation uses smp_cond_load_acquire() to spin, so if the + * architecture has WFE like instructions to sleep instead of poll for word + * modifications be sure to implement that (see ARM64 for example). + * + */ + +#ifndef __ASM_GENERIC_TICKET_SPINLOCK_H +#define __ASM_GENERIC_TICKET_SPINLOCK_H + +#include <linux/atomic.h> +#include <asm-generic/spinlock_types.h> + +static __always_inline void ticket_spin_lock(arch_spinlock_t *lock) +{ + u32 val = atomic_fetch_add(1<<16, &lock->val); + u16 ticket = val >> 16; + + if (ticket == (u16)val) + return; + + /* + * atomic_cond_read_acquire() is RCpc, but rather than defining a + * custom cond_read_rcsc() here we just emit a full fence. We only + * need the prior reads before subsequent writes ordering from + * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we + * have no outstanding writes due to the atomic_fetch_add() the extra + * orderings are free. + */ + atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL); + smp_mb(); +} + +static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock) +{ + u32 old = atomic_read(&lock->val); + + if ((old >> 16) != (old & 0xffff)) + return false; + + return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */ +} + +static __always_inline void ticket_spin_unlock(arch_spinlock_t *lock) +{ + u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + u32 val = atomic_read(&lock->val); + + smp_store_release(ptr, (u16)val + 1); +} + +static __always_inline int ticket_spin_value_unlocked(arch_spinlock_t lock) +{ + u32 val = lock.val.counter; + + return ((val >> 16) == (val & 0xffff)); +} + +static __always_inline int ticket_spin_is_locked(arch_spinlock_t *lock) +{ + arch_spinlock_t val = READ_ONCE(*lock); + + return !ticket_spin_value_unlocked(val); +} + +static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock) +{ + u32 val = atomic_read(&lock->val); + + return (s16)((val >> 16) - (val & 0xffff)) > 1; +} + +#ifndef __no_arch_spinlock_redefine +/* + * Remapping spinlock architecture specific functions to the corresponding + * ticket spinlock functions. + */ +#define arch_spin_is_locked(l) ticket_spin_is_locked(l) +#define arch_spin_is_contended(l) ticket_spin_is_contended(l) +#define arch_spin_value_unlocked(l) ticket_spin_value_unlocked(l) +#define arch_spin_lock(l) ticket_spin_lock(l) +#define arch_spin_trylock(l) ticket_spin_trylock(l) +#define arch_spin_unlock(l) ticket_spin_unlock(l) +#endif + +#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */ diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index c835607f78ae..01dafd604188 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -12,8 +12,7 @@ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) #endif /* __arch_get_k_vdso_data */ #ifndef __arch_update_vsyscall -static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata, - struct timekeeper *tk) +static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata) { } #endif /* __arch_update_vsyscall */ diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h index adf91a783b5c..5dcaf4ae904a 100644 --- a/include/asm-generic/vga.h +++ b/include/asm-generic/vga.h @@ -1,25 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * Access to VGA videoram - * - * (c) 1998 Martin Mares <mj@ucw.cz> - */ #ifndef __ASM_GENERIC_VGA_H #define __ASM_GENERIC_VGA_H - -/* - * On most architectures that support VGA, we can just - * recalculate addresses and then access the videoram - * directly without any black magic. - * - * Everyone else needs to ioremap the address and use - * proper I/O accesses. - */ -#ifndef VGA_MAP_MEM -#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x) -#endif - -#define vga_readb(x) (*(x)) -#define vga_writeb(x, y) (*(y) = (x)) - -#endif /* _ASM_GENERIC_VGA_H */ +#endif /* __ASM_GENERIC_VGA_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eeadbaeccf88..54504013c749 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -95,18 +95,25 @@ * With LTO_CLANG, the linker also splits sections by default, so we need * these macros to combine the sections during the final link. * + * With AUTOFDO_CLANG and PROPELLER_CLANG, by default, the linker splits + * text sections and regroups functions into subsections. + * * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ +defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* +#else +#define TEXT_MAIN .text +#endif +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else -#define TEXT_MAIN .text #define DATA_MAIN .data #define SDATA_MAIN .sdata #define RODATA_MAIN .rodata @@ -350,9 +357,9 @@ *(.data..decrypted) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ - *(.data.unlikely) \ + *(.data..unlikely) \ __start_once = .; \ - *(.data.once) \ + *(.data..once) \ __end_once = .; \ STRUCT_ALIGN(); \ *(__tracepoints) \ @@ -549,24 +556,44 @@ __cpuidle_text_end = .; \ __noinstr_text_end = .; +#define TEXT_SPLIT \ + __split_text_start = .; \ + *(.text.split .text.split.[0-9a-zA-Z_]*) \ + __split_text_end = .; + +#define TEXT_UNLIKELY \ + __unlikely_text_start = .; \ + *(.text.unlikely .text.unlikely.*) \ + __unlikely_text_end = .; + +#define TEXT_HOT \ + __hot_text_start = .; \ + *(.text.hot .text.hot.*) \ + __hot_text_end = .; + /* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map * - * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead - * code elimination is enabled, so these sections should be converted - * to use ".." first. + * TEXT_MAIN here will match symbols with a fixed pattern (for example, + * .text.hot or .text.unlikely) if dead code elimination or + * function-section is enabled. Match these symbols first before + * TEXT_MAIN to ensure they are grouped together. + * + * Also placing .text.hot section at the beginning of a page, this + * would help the TLB performance. */ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ - *(.text.hot .text.hot.*) \ - *(TEXT_MAIN .text.fixup) \ - *(.text.unlikely .text.unlikely.*) \ + *(.text.asan.* .text.tsan.*) \ *(.text.unknown .text.unknown.*) \ + TEXT_SPLIT \ + TEXT_UNLIKELY \ + . = ALIGN(PAGE_SIZE); \ + TEXT_HOT \ + *(TEXT_MAIN .text.fixup) \ NOINSTR_TEXT \ - *(.ref.text) \ - *(.text.asan.* .text.tsan.*) - + *(.ref.text) /* sched.text is aling to function alignment to secure we have same * address even at second ld pass when generating System.map */ |
