diff options
Diffstat (limited to 'include/linux')
167 files changed, 3548 insertions, 935 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fbf0c3a65f59..3a412dcebc29 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -107,6 +107,7 @@ enum acpi_irq_model_id { ACPI_IRQ_MODEL_IOSAPIC, ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, + ACPI_IRQ_MODEL_GIC_V5, ACPI_IRQ_MODEL_LPIC, ACPI_IRQ_MODEL_RINTC, ACPI_IRQ_MODEL_COUNT diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index d4ed5622cf2b..17bb3374f4ca 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -27,12 +27,15 @@ int iort_register_domain_token(int trans_id, phys_addr_t base, struct fwnode_handle *fw_node); void iort_deregister_domain_token(int trans_id); struct fwnode_handle *iort_find_domain_token(int trans_id); -int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); +struct fwnode_handle *iort_iwb_handle(u32 iwb_id); #ifdef CONFIG_ACPI_IORT u32 iort_msi_map_id(struct device *dev, u32 id); +u32 iort_msi_xlate(struct device *dev, u32 id, struct fwnode_handle **node); +int iort_its_translate_pa(struct fwnode_handle *node, phys_addr_t *base); struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, enum irq_domain_bus_token bus_token); +int iort_pmsi_get_msi_info(struct device *dev, u32 *dev_id, phys_addr_t *pa); void acpi_configure_pmsi_domain(struct device *dev); void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head); @@ -46,9 +49,15 @@ phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else static inline u32 iort_msi_map_id(struct device *dev, u32 id) { return id; } +static inline u32 iort_msi_xlate(struct device *dev, u32 id, struct fwnode_handle **node) +{ return id; } +static inline int iort_its_translate_pa(struct fwnode_handle *node, phys_addr_t *base) +{ return -ENODEV; } static inline struct irq_domain *iort_get_device_domain( struct device *dev, u32 id, enum irq_domain_bus_token bus_token) { return NULL; } +static inline int iort_pmsi_get_msi_info(struct device *dev, u32 *dev_id, phys_addr_t *pa) +{ return -ENODEV; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } static inline void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head) { } diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 2f9d36b72bd8..cdc25f8979f7 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -2121,7 +2121,7 @@ raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * * Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new) @@ -2155,7 +2155,7 @@ raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new) * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) @@ -2189,7 +2189,7 @@ raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) @@ -2222,7 +2222,7 @@ raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) @@ -4247,7 +4247,7 @@ raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) @@ -4281,7 +4281,7 @@ raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) @@ -4315,7 +4315,7 @@ raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) @@ -4348,7 +4348,7 @@ raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) @@ -4690,4 +4690,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// b565db590afeeff0d7c9485ccbca5bb6e155749f +// 206314f82b8b73a5c3aa69cf7f35ac9e7b5d6b58 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index 37ab6314a9f7..feb3b5dc3e96 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -1269,7 +1269,7 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) @@ -1292,7 +1292,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) @@ -1314,7 +1314,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) @@ -1337,7 +1337,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) @@ -2847,7 +2847,7 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) @@ -2870,7 +2870,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) @@ -2892,7 +2892,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) @@ -2915,7 +2915,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) @@ -4425,7 +4425,7 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) @@ -4448,7 +4448,7 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) @@ -4470,7 +4470,7 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) @@ -4493,7 +4493,7 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new) @@ -5050,4 +5050,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// f618ac667f868941a84ce0ab2242f1786e049ed4 +// 9dd948d3012b22c4e75933a5172983f912e46439 diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h index f86b29d90877..6a4e47d2db35 100644 --- a/include/linux/atomic/atomic-long.h +++ b/include/linux/atomic/atomic-long.h @@ -1449,7 +1449,7 @@ raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) @@ -1473,7 +1473,7 @@ raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) @@ -1497,7 +1497,7 @@ raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) @@ -1521,7 +1521,7 @@ raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere. * - * Return: @true if the exchange occured, @false otherwise. + * Return: @true if the exchange occurred, @false otherwise. */ static __always_inline bool raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new) @@ -1809,4 +1809,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v) } #endif /* _LINUX_ATOMIC_LONG_H */ -// eadf183c3600b8b92b91839dd3be6bcc560c752d +// 4b882bf19018602c10816c52f8b4ae280adc887b diff --git a/include/linux/audit.h b/include/linux/audit.h index 536f8ee8da81..a5be09c8497a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -128,12 +128,6 @@ enum audit_nfcfgop { extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); -/* only for compat system calls */ -extern unsigned compat_write_class[]; -extern unsigned compat_read_class[]; -extern unsigned compat_dir_class[]; -extern unsigned compat_chattr_class[]; -extern unsigned compat_signal_class[]; /* audit_names->type values */ #define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */ @@ -195,6 +189,8 @@ extern int audit_log_subj_ctx(struct audit_buffer *ab, struct lsm_prop *prop); extern int audit_log_obj_ctx(struct audit_buffer *ab, struct lsm_prop *prop); extern int audit_log_task_context(struct audit_buffer *ab); extern void audit_log_task_info(struct audit_buffer *ab); +extern int audit_log_nf_skb(struct audit_buffer *ab, + const struct sk_buff *skb, u8 nfproto); extern int audit_update_lsm_rules(void); @@ -272,6 +268,12 @@ static inline int audit_log_task_context(struct audit_buffer *ab) static inline void audit_log_task_info(struct audit_buffer *ab) { } +static inline int audit_log_nf_skb(struct audit_buffer *ab, + const struct sk_buff *skb, u8 nfproto) +{ + return 0; +} + static inline kuid_t audit_get_loginuid(struct task_struct *tsk) { return INVALID_UID; @@ -316,7 +318,6 @@ extern void __audit_uring_exit(int success, long code); extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3); extern void __audit_syscall_exit(int ret_success, long ret_value); -extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); @@ -380,12 +381,6 @@ static inline void audit_syscall_exit(void *pt_regs) __audit_syscall_exit(success, return_code); } } -static inline struct filename *audit_reusename(const __user char *name) -{ - if (unlikely(!audit_dummy_context())) - return __audit_reusename(name); - return NULL; -} static inline void audit_getname(struct filename *name) { if (unlikely(!audit_dummy_context())) @@ -624,10 +619,6 @@ static inline struct audit_context *audit_context(void) { return NULL; } -static inline struct filename *audit_reusename(const __user char *name) -{ - return NULL; -} static inline void audit_getname(struct filename *name) { } static inline void audit_inode(struct filename *name, diff --git a/include/linux/audit_arch.h b/include/linux/audit_arch.h index 0e34d673ef17..2b8153791e6a 100644 --- a/include/linux/audit_arch.h +++ b/include/linux/audit_arch.h @@ -23,4 +23,11 @@ enum auditsc_class_t { extern int audit_classify_compat_syscall(int abi, unsigned syscall); +/* only for compat system calls */ +extern unsigned compat_write_class[]; +extern unsigned compat_read_class[]; +extern unsigned compat_dir_class[]; +extern unsigned compat_chattr_class[]; +extern unsigned compat_signal_class[]; + #endif diff --git a/include/linux/bio.h b/include/linux/bio.h index c75a9b3672aa..36a3f2275ecd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -256,12 +256,6 @@ static inline struct folio *bio_first_folio_all(struct bio *bio) return page_folio(bio_first_page_all(bio)); } -static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) -{ - WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); - return &bio->bi_io_vec[bio->bi_vcnt - 1]; -} - /** * struct folio_iter - State for iterating all folios in a bio. * @folio: The current folio we're iterating. NULL after the last folio. @@ -403,6 +397,29 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) return iov_iter_npages(iter, max_segs); } +/** + * bio_iov_bounce_nr_vecs - calculate number of bvecs for a bounce bio + * @iter: iter to bounce from + * @op: REQ_OP_* for the bio + * + * Calculates how many bvecs are needed for the next bio to bounce from/to + * @iter. + */ +static inline unsigned short +bio_iov_bounce_nr_vecs(struct iov_iter *iter, blk_opf_t op) +{ + /* + * We still need to bounce bvec iters, so don't special case them + * here unlike in bio_iov_vecs_to_alloc. + * + * For reads we need to use a vector for the bounce buffer, account + * for that here. + */ + if (op_is_write(op)) + return iov_iter_npages(iter, BIO_MAX_VECS); + return iov_iter_npages(iter, BIO_MAX_VECS - 1) + 1; +} + struct request_queue; void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, @@ -414,6 +431,7 @@ static inline void bio_init_inline(struct bio *bio, struct block_device *bdev, } extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); +void bio_reuse(struct bio *bio, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, @@ -456,6 +474,9 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); +int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter); +void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty); + extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h index c0989b5b0407..7869a6e59b6a 100644 --- a/include/linux/bit_spinlock.h +++ b/include/linux/bit_spinlock.h @@ -7,6 +7,18 @@ #include <linux/atomic.h> #include <linux/bug.h> +#include <asm/processor.h> /* for cpu_relax() */ + +/* + * For static context analysis, we need a unique token for each possible bit + * that can be used as a bit_spinlock. The easiest way to do that is to create a + * fake context that we can cast to with the __bitlock(bitnum, addr) macro + * below, which will give us unique instances for each (bit, addr) pair that the + * static analysis can use. + */ +context_lock_struct(__context_bitlock) { }; +#define __bitlock(bitnum, addr) (struct __context_bitlock *)(bitnum + (addr)) + /* * bit-based spin_lock() * @@ -14,6 +26,7 @@ * are significantly faster. */ static __always_inline void bit_spin_lock(int bitnum, unsigned long *addr) + __acquires(__bitlock(bitnum, addr)) { /* * Assuming the lock is uncontended, this never enters @@ -32,13 +45,14 @@ static __always_inline void bit_spin_lock(int bitnum, unsigned long *addr) preempt_disable(); } #endif - __acquire(bitlock); + __acquire(__bitlock(bitnum, addr)); } /* * Return true if it was acquired */ static __always_inline int bit_spin_trylock(int bitnum, unsigned long *addr) + __cond_acquires(true, __bitlock(bitnum, addr)) { preempt_disable(); #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -47,7 +61,7 @@ static __always_inline int bit_spin_trylock(int bitnum, unsigned long *addr) return 0; } #endif - __acquire(bitlock); + __acquire(__bitlock(bitnum, addr)); return 1; } @@ -55,6 +69,7 @@ static __always_inline int bit_spin_trylock(int bitnum, unsigned long *addr) * bit-based spin_unlock() */ static __always_inline void bit_spin_unlock(int bitnum, unsigned long *addr) + __releases(__bitlock(bitnum, addr)) { #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(!test_bit(bitnum, addr)); @@ -63,7 +78,7 @@ static __always_inline void bit_spin_unlock(int bitnum, unsigned long *addr) clear_bit_unlock(bitnum, addr); #endif preempt_enable(); - __release(bitlock); + __release(__bitlock(bitnum, addr)); } /* @@ -72,6 +87,7 @@ static __always_inline void bit_spin_unlock(int bitnum, unsigned long *addr) * protecting the rest of the flags in the word. */ static __always_inline void __bit_spin_unlock(int bitnum, unsigned long *addr) + __releases(__bitlock(bitnum, addr)) { #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(!test_bit(bitnum, addr)); @@ -80,7 +96,7 @@ static __always_inline void __bit_spin_unlock(int bitnum, unsigned long *addr) __clear_bit_unlock(bitnum, addr); #endif preempt_enable(); - __release(bitlock); + __release(__bitlock(bitnum, addr)); } /* diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 126dc5b380af..54aeeef1f0ec 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -8,6 +8,7 @@ #define _LINUX_BITFIELD_H #include <linux/build_bug.h> +#include <linux/compiler.h> #include <linux/typecheck.h> #include <asm/byteorder.h> @@ -243,7 +244,7 @@ __MAKE_OP(64) #define __field_prep(mask, val) \ ({ \ - __auto_type __mask = (mask); \ + auto __mask = (mask); \ typeof(__mask) __val = (val); \ unsigned int __shift = BITS_PER_TYPE(__mask) <= 32 ? \ __ffs(__mask) : __ffs64(__mask); \ @@ -252,7 +253,7 @@ __MAKE_OP(64) #define __field_get(mask, reg) \ ({ \ - __auto_type __mask = (mask); \ + auto __mask = (mask); \ typeof(__mask) __reg = (reg); \ unsigned int __shift = BITS_PER_TYPE(__mask) <= 32 ? \ __ffs(__mask) : __ffs64(__mask); \ diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 58b0c5254a67..f7c3cb4a342f 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -132,6 +132,11 @@ static inline bool bio_has_crypt_ctx(struct bio *bio) return bio->bi_crypt_context; } +static inline struct bio_crypt_ctx *bio_crypt_ctx(struct bio *bio) +{ + return bio->bi_crypt_context; +} + void bio_crypt_set_ctx(struct bio *bio, const struct blk_crypto_key *key, const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], gfp_t gfp_mask); @@ -169,8 +174,35 @@ static inline bool bio_has_crypt_ctx(struct bio *bio) return false; } +static inline struct bio_crypt_ctx *bio_crypt_ctx(struct bio *bio) +{ + return NULL; +} + #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ +bool __blk_crypto_submit_bio(struct bio *bio); + +/** + * blk_crypto_submit_bio - Submit a bio that may have a crypto context + * @bio: bio to submit + * + * If @bio has no crypto context, or the crypt context attached to @bio is + * supported by the underlying device's inline encryption hardware, just submit + * @bio. + * + * Otherwise, try to perform en/decryption for this bio by falling back to the + * kernel crypto API. For encryption this means submitting newly allocated + * bios for the encrypted payload while keeping back the source bio until they + * complete, while for reads the decryption happens in-place by a hooked in + * completion handler. + */ +static inline void blk_crypto_submit_bio(struct bio *bio) +{ + if (!bio_has_crypt_ctx(bio) || __blk_crypto_submit_bio(bio)) + submit_bio(bio); +} + int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask); /** * bio_crypt_clone - clone bio encryption context diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index a6b84206eb94..c15b1ac62765 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -91,7 +91,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, return bio_integrity_intervals(bi, sectors) * bi->metadata_size; } -static inline bool blk_integrity_rq(struct request *rq) +static inline bool blk_integrity_rq(const struct request *rq) { return rq->cmd_flags & REQ_INTEGRITY; } @@ -168,9 +168,9 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, { return 0; } -static inline int blk_integrity_rq(struct request *rq) +static inline bool blk_integrity_rq(const struct request *rq) { - return 0; + return false; } static inline struct bio_vec rq_integrity_vec(struct request *rq) diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index cb88fc791fbd..214c181ff2c9 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -28,7 +28,7 @@ struct blk_dma_iter { bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, struct dma_iova_state *state, struct blk_dma_iter *iter); bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, - struct dma_iova_state *state, struct blk_dma_iter *iter); + struct blk_dma_iter *iter); /** * blk_rq_dma_map_coalesce - were all segments coalesced? diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index cae9e857aea4..18a2388ba581 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -13,6 +13,7 @@ struct blk_mq_tags; struct blk_flush_queue; +struct io_comp_batch; #define BLKDEV_MIN_RQ 4 #define BLKDEV_DEFAULT_RQ 128 @@ -22,7 +23,8 @@ enum rq_end_io_ret { RQ_END_IO_FREE, }; -typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); +typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t, + const struct io_comp_batch *); /* * request flags */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5dc061d318a4..d59553324a84 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -232,6 +232,8 @@ struct bio { atomic_t __bi_remaining; + /* The actual vec list, preserved by bio_reset() */ + struct bio_vec *bi_io_vec; struct bvec_iter bi_iter; union { @@ -275,13 +277,12 @@ struct bio { atomic_t __bi_cnt; /* pin count */ - struct bio_vec *bi_io_vec; /* the actual vec list */ - struct bio_set *bi_pool; }; #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) -#define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT) +#define BIO_MAX_SIZE UINT_MAX /* max value of bi_iter.bi_size */ +#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> SECTOR_SHIFT) static inline struct bio_vec *bio_inline_vecs(struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 72e34acd439c..99ef8cd7673c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -340,14 +340,13 @@ typedef unsigned int __bitwise blk_features_t; /* skip this queue in blk_mq_(un)quiesce_tagset */ #define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13)) +/* atomic writes enabled */ +#define BLK_FEAT_ATOMIC_WRITES ((__force blk_features_t)(1u << 14)) + /* undocumented magic for bcache */ #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) -/* atomic writes enabled */ -#define BLK_FEAT_ATOMIC_WRITES \ - ((__force blk_features_t)(1u << 16)) - /* * Flags automatically inherited when stacking limits. */ @@ -551,7 +550,8 @@ struct request_queue { /* * queue settings */ - unsigned long nr_requests; /* Max # of requests */ + unsigned int nr_requests; /* Max # of requests */ + unsigned int async_depth; /* Max # of async requests */ #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; @@ -681,7 +681,7 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) -#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) +#define blk_queue_rot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_passthrough_stat(q) \ ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH) @@ -1026,7 +1026,7 @@ extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -/* Helper to convert REQ_OP_XXX to its string format XXX */ +/* Convert a request operation REQ_OP_name into the string "name" */ extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); @@ -1044,7 +1044,7 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_queue; /* this is never NULL */ } -/* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ +/* Convert a zone condition BLK_ZONE_COND_name into the string "name" */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); static inline unsigned int bio_zone_no(struct bio *bio) @@ -1462,9 +1462,14 @@ bdev_write_zeroes_unmap_sectors(struct block_device *bdev) return bdev_limits(bdev)->max_wzeroes_unmap_sectors; } +static inline bool bdev_rot(struct block_device *bdev) +{ + return blk_queue_rot(bdev_get_queue(bdev)); +} + static inline bool bdev_nonrot(struct block_device *bdev) { - return blk_queue_nonrot(bdev_get_queue(bdev)); + return !bdev_rot(bdev); } static inline bool bdev_synchronous(struct block_device *bdev) @@ -1822,6 +1827,7 @@ struct io_comp_batch { struct rq_list req_list; bool need_ts; void (*complete)(struct io_comp_batch *); + void *poll_ctx; }; static inline bool blk_atomic_write_start_sect_aligned(sector_t sector, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d1eb5c7729cb..2f535331f926 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -172,7 +172,7 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map); -int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, void *value, u64 flags); @@ -470,7 +470,7 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, - void *value) { + void *value, u64 flags) { return 0; } static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e5be698256d1..cd9b96434904 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -287,6 +287,7 @@ struct bpf_map_owner { enum bpf_prog_type type; bool jited; bool xdp_has_frags; + bool sleepable; u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; const struct btf_type *attach_func_proto; enum bpf_attach_type expected_attach_type; @@ -673,6 +674,22 @@ void bpf_map_free_internal_structs(struct bpf_map *map, void *obj); int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit); +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) +void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id, + u64 flags); +void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt); +#else +static inline void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, + int node_id, u64 flags) +{ + return NULL; +} + +static inline void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt) +{ +} +#endif + extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of @@ -737,7 +754,7 @@ enum bpf_type_flag { MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS), /* PTR was passed from the kernel in a trusted context, and may be - * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions. + * passed to kfuncs or BPF helper functions. * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above. * PTR_UNTRUSTED refers to a kptr that was read directly from a map * without invoking bpf_kptr_xchg(). What we really need to know is @@ -1213,6 +1230,9 @@ enum { #endif }; +#define BPF_TRAMP_COOKIE_INDEX_SHIFT 8 +#define BPF_TRAMP_IS_RETURN_SHIFT 63 + struct bpf_tramp_links { struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; int nr_links; @@ -1293,6 +1313,7 @@ enum bpf_tramp_prog_type { BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ + BPF_TRAMP_FSESSION, }; struct bpf_tramp_image { @@ -1309,14 +1330,17 @@ struct bpf_tramp_image { }; struct bpf_trampoline { - /* hlist for trampoline_table */ - struct hlist_node hlist; + /* hlist for trampoline_key_table */ + struct hlist_node hlist_key; + /* hlist for trampoline_ip_table */ + struct hlist_node hlist_ip; struct ftrace_ops *fops; /* serializes access to fields of this trampoline */ struct mutex mutex; refcount_t refcnt; u32 flags; u64 key; + unsigned long ip; struct { struct btf_func_model model; void *addr; @@ -1418,7 +1442,7 @@ bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, void *src, u64 len, u64 flags); void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, - void *buffer__opt, u64 buffer__szk); + void *buffer__nullable, u64 buffer__szk); static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len) { @@ -1742,8 +1766,12 @@ struct bpf_prog_aux { struct rcu_head rcu; }; struct bpf_stream stream[2]; + struct mutex st_ops_assoc_mutex; + struct bpf_map __rcu *st_ops_assoc; }; +#define BPF_NR_CONTEXTS 4 /* normal, softirq, hardirq, NMI */ + struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ @@ -1759,6 +1787,7 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ + call_session_cookie:1, /* Do we call bpf_session_cookie() */ tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ @@ -1770,7 +1799,7 @@ struct bpf_prog { u8 tag[BPF_TAG_SIZE]; }; struct bpf_prog_stats __percpu *stats; - int __percpu *active; + u8 __percpu *active; /* u8[BPF_NR_CONTEXTS] for recursion protection */ unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); struct bpf_prog_aux *aux; /* Auxiliary fields */ @@ -1855,6 +1884,11 @@ struct bpf_tracing_link { struct bpf_prog *tgt_prog; }; +struct bpf_fsession_link { + struct bpf_tracing_link link; + struct bpf_tramp_link fexit; +}; + struct bpf_raw_tp_link { struct bpf_link link; struct bpf_raw_event_map *btp; @@ -2002,6 +2036,40 @@ struct bpf_struct_ops_common_value { enum bpf_struct_ops_state state; }; +static inline bool bpf_prog_get_recursion_context(struct bpf_prog *prog) +{ +#ifdef CONFIG_ARM64 + u8 rctx = interrupt_context_level(); + u8 *active = this_cpu_ptr(prog->active); + u32 val; + + preempt_disable(); + active[rctx]++; + val = le32_to_cpu(*(__le32 *)active); + preempt_enable(); + if (val != BIT(rctx * 8)) + return false; + + return true; +#else + return this_cpu_inc_return(*(int __percpu *)(prog->active)) == 1; +#endif +} + +static inline void bpf_prog_put_recursion_context(struct bpf_prog *prog) +{ +#ifdef CONFIG_ARM64 + u8 rctx = interrupt_context_level(); + u8 *active = this_cpu_ptr(prog->active); + + preempt_disable(); + active[rctx]--; + preempt_enable(); +#else + this_cpu_dec(*(int __percpu *)(prog->active)); +#endif +} + #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) /* This macro helps developer to register a struct_ops type and generate * type information correctly. Developers should use this macro to register @@ -2044,6 +2112,9 @@ static inline void bpf_module_put(const void *data, struct module *owner) module_put(owner); } int bpf_struct_ops_link_create(union bpf_attr *attr); +int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map); +void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog); +void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux); u32 bpf_struct_ops_id(const void *kdata); #ifdef CONFIG_NET @@ -2091,6 +2162,17 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map) +{ + return -EOPNOTSUPP; +} +static inline void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog) +{ +} +static inline void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux) +{ + return NULL; +} static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) { } @@ -2101,6 +2183,37 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op #endif +static inline int bpf_fsession_cnt(struct bpf_tramp_links *links) +{ + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY]; + int cnt = 0; + + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) { + if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION) + cnt++; + } + + return cnt; +} + +static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_link *link) +{ + return link->link.prog->call_session_cookie; +} + +static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links) +{ + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY]; + int cnt = 0; + + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) { + if (bpf_prog_calls_session_cookie(fentries.links[i])) + cnt++; + } + + return cnt; +} + int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, const struct bpf_ctx_arg_aux *info, u32 cnt); @@ -2540,6 +2653,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG +void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, + struct mem_cgroup **new_memcg); +void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg); void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, @@ -2564,6 +2681,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, kvcalloc(_n, _size, _flags) #define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ __alloc_percpu_gfp(_size, _align, _flags) +static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, + struct mem_cgroup **new_memcg) +{ + *new_memcg = NULL; + *old_memcg = NULL; +} + +static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg) +{ +} #endif static inline int @@ -2764,8 +2892,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); -int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags); +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, u64 flags); int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, @@ -3243,6 +3371,11 @@ static inline void bpf_prog_report_arena_violation(bool write, unsigned long add } #endif /* CONFIG_BPF_SYSCALL */ +static inline bool bpf_net_capable(void) +{ + return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); +} + static __always_inline int bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) { @@ -3832,14 +3965,43 @@ bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image) } #endif +static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type) +{ + switch (map_type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + return true; + default: + return false; + } +} + static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags) { - if (flags & ~allowed_flags) + u32 cpu; + + if ((u32)flags & ~allowed_flags) return -EINVAL; if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) return -EINVAL; + if (!(flags & BPF_F_CPU) && flags >> 32) + return -EINVAL; + + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) { + if (!bpf_map_supports_cpu_flags(map->map_type)) + return -EINVAL; + if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) + return -EINVAL; + + cpu = flags >> 32; + if ((flags & BPF_F_CPU) && cpu >= num_possible_cpus()) + return -ERANGE; + } + return 0; } diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 66432248cd81..85efa9772530 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -15,12 +15,13 @@ #include <linux/types.h> #include <linux/bpf_mem_alloc.h> #include <uapi/linux/btf.h> +#include <asm/rqspinlock.h> #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 struct bpf_local_storage_map_bucket { struct hlist_head list; - raw_spinlock_t lock; + rqspinlock_t lock; }; /* Thp map is not the primary owner of a bpf_local_storage_elem. @@ -67,6 +68,11 @@ struct bpf_local_storage_data { u8 data[] __aligned(8); }; +#define SELEM_MAP_UNLINKED (1 << 0) +#define SELEM_STORAGE_UNLINKED (1 << 1) +#define SELEM_UNLINKED (SELEM_MAP_UNLINKED | SELEM_STORAGE_UNLINKED) +#define SELEM_TOFREE (1 << 2) + /* Linked to bpf_local_storage and bpf_local_storage_map */ struct bpf_local_storage_elem { struct hlist_node map_node; /* Linked to bpf_local_storage_map */ @@ -79,7 +85,9 @@ struct bpf_local_storage_elem { * after raw_spin_unlock */ }; - /* 8 bytes hole */ + atomic_t state; + bool use_kmalloc_nolock; + /* 3 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. */ @@ -88,13 +96,14 @@ struct bpf_local_storage_elem { struct bpf_local_storage { struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; - struct bpf_local_storage_map __rcu *smap; struct hlist_head list; /* List of bpf_local_storage_elem */ void *owner; /* The object that owns the above "list" of * bpf_local_storage_elem. */ struct rcu_head rcu; - raw_spinlock_t lock; /* Protect adding/removing from the "list" */ + rqspinlock_t lock; /* Protect adding/removing from the "list" */ + u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */ + refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */ bool use_kmalloc_nolock; }; @@ -162,11 +171,10 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, return SDATA(selem); } -void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); +u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, - struct bpf_local_storage_cache *cache, - int __percpu *busy_counter); + struct bpf_local_storage_cache *cache); int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, @@ -176,10 +184,11 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now); +int bpf_selem_unlink(struct bpf_local_storage_elem *selem); -void bpf_selem_link_map(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *selem); +int bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem); struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h index 929225f7b095..0b9f4caeeb0a 100644 --- a/include/linux/bpf_mprog.h +++ b/include/linux/bpf_mprog.h @@ -340,4 +340,14 @@ static inline bool bpf_mprog_supported(enum bpf_prog_type type) return false; } } + +static inline bool bpf_mprog_detach_empty(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_SCHED_CLS: + return bpf_net_capable(); + default: + return false; + } +} #endif /* __BPF_MPROG_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 130bcbd66f60..ef8e45a362d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -147,8 +147,12 @@ struct bpf_reg_state { * registers. Example: * r1 = r2; both will have r1->id == r2->id == N * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10 + * r3 = r2; both will have r3->id == r2->id == N + * w3 += 10; r3->id == N | BPF_ADD_CONST32 and r3->off == 10 */ -#define BPF_ADD_CONST (1U << 31) +#define BPF_ADD_CONST64 (1U << 31) +#define BPF_ADD_CONST32 (1U << 30) +#define BPF_ADD_CONST (BPF_ADD_CONST64 | BPF_ADD_CONST32) u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned * from a pointer-cast helper, bpf_sk_fullsock() and @@ -692,12 +696,16 @@ struct bpf_id_pair { struct bpf_idmap { u32 tmp_id_gen; + u32 cnt; struct bpf_id_pair map[BPF_ID_MAP_SIZE]; }; struct bpf_idset { - u32 count; - u32 ids[BPF_ID_MAP_SIZE]; + u32 num_ids; + struct { + u32 id; + u32 cnt; + } entries[BPF_ID_MAP_SIZE]; }; /* see verifier.c:compute_scc_callchain() */ diff --git a/include/linux/btf.h b/include/linux/btf.h index f06976ffb63f..48108471c5b1 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -34,7 +34,7 @@ * * And the following kfunc: * - * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) + * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE) * * All invocations to the kfunc must pass the unmodified, unwalked task: * @@ -66,7 +66,6 @@ * return 0; * } */ -#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ #define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */ @@ -79,6 +78,7 @@ #define KF_ARENA_RET (1 << 13) /* kfunc returns an arena pointer */ #define KF_ARENA_ARG1 (1 << 14) /* kfunc takes an arena pointer as its first argument */ #define KF_ARENA_ARG2 (1 << 15) /* kfunc takes an arena pointer as its second argument */ +#define KF_IMPLICIT_ARGS (1 << 16) /* kfunc has implicit arguments supplied by the verifier */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the @@ -220,6 +220,7 @@ bool btf_is_module(const struct btf *btf); bool btf_is_vmlinux(const struct btf *btf); struct module *btf_try_get_module(const struct btf *btf); u32 btf_nr_types(const struct btf *btf); +u32 btf_named_start_id(const struct btf *btf, bool own); struct btf *btf_base_btf(const struct btf *btf); bool btf_type_is_i32(const struct btf_type *t); bool btf_type_is_i64(const struct btf_type *t); @@ -575,8 +576,8 @@ const char *btf_name_by_offset(const struct btf *btf, u32 offset); const char *btf_str_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); -u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id, - const struct bpf_prog *prog); +u32 *btf_kfunc_flags(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog); +bool btf_kfunc_is_allowed(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog); u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, const struct bpf_prog *prog); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 831c1b4b626c..7acc06b22fb7 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -7,7 +7,10 @@ #define BUILD_ID_SIZE_MAX 20 struct vm_area_struct; +struct file; + int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); +int build_id_parse_file(struct file *file, unsigned char *build_id, __u32 *size); int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index c7f2c63b3bc3..08e5dbe15ca4 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -31,6 +31,12 @@ #define CEPH_INO_CEPH 2 /* hidden .ceph dir */ #define CEPH_INO_GLOBAL_SNAPREALM 3 /* global dummy snaprealm */ +/* + * name for "old" CephFS file systems, + * see ceph.git e2b151d009640114b2565c901d6f41f6cd5ec652 + */ +#define CEPH_OLD_FS_NAME "cephfs" + /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 8d41b917c77d..dbc4162921e9 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -278,16 +278,21 @@ const volatile void * __must_check_fn(const volatile void *val) #define DEFINE_CLASS(_name, _type, _exit, _init, _init_args...) \ typedef _type class_##_name##_t; \ +typedef _type lock_##_name##_t; \ static __always_inline void class_##_name##_destructor(_type *p) \ + __no_context_analysis \ { _type _T = *p; _exit; } \ static __always_inline _type class_##_name##_constructor(_init_args) \ + __no_context_analysis \ { _type t = _init; return t; } #define EXTEND_CLASS(_name, ext, _init, _init_args...) \ +typedef lock_##_name##_t lock_##_name##ext##_t; \ typedef class_##_name##_t class_##_name##ext##_t; \ static __always_inline void class_##_name##ext##_destructor(class_##_name##_t *p) \ { class_##_name##_destructor(p); } \ static __always_inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ + __no_context_analysis \ { class_##_name##_t t = _init; return t; } #define CLASS(_name, var) \ @@ -474,35 +479,80 @@ _label: \ */ #define __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, ...) \ +typedef _type lock_##_name##_t; \ typedef struct { \ _type *lock; \ __VA_ARGS__; \ } class_##_name##_t; \ \ static __always_inline void class_##_name##_destructor(class_##_name##_t *_T) \ + __no_context_analysis \ { \ if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \ } \ \ __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) -#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \ +#define __DEFINE_LOCK_GUARD_1(_name, _type, ...) \ static __always_inline class_##_name##_t class_##_name##_constructor(_type *l) \ + __no_context_analysis \ { \ class_##_name##_t _t = { .lock = l }, *_T = &_t; \ - _lock; \ + __VA_ARGS__; \ return _t; \ } -#define __DEFINE_LOCK_GUARD_0(_name, _lock) \ +#define __DEFINE_LOCK_GUARD_0(_name, ...) \ static __always_inline class_##_name##_t class_##_name##_constructor(void) \ + __no_context_analysis \ { \ class_##_name##_t _t = { .lock = (void*)1 }, \ *_T __maybe_unused = &_t; \ - _lock; \ + __VA_ARGS__; \ return _t; \ } +#define DECLARE_LOCK_GUARD_0_ATTRS(_name, _lock, _unlock) \ +static inline class_##_name##_t class_##_name##_constructor(void) _lock;\ +static inline void class_##_name##_destructor(class_##_name##_t *_T) _unlock; + +/* + * To support Context Analysis, we need to allow the compiler to see the + * acquisition and release of the context lock. However, the "cleanup" helpers + * wrap the lock in a struct passed through separate helper functions, which + * hides the lock alias from the compiler (no inter-procedural analysis). + * + * To make it work, we introduce an explicit alias to the context lock instance + * that is "cleaned" up with a separate cleanup helper. This helper is a dummy + * function that does nothing at runtime, but has the "_unlock" attribute to + * tell the compiler what happens at the end of the scope. + * + * To generalize the pattern, the WITH_LOCK_GUARD_1_ATTRS() macro should be used + * to redefine the constructor, which then also creates the alias variable with + * the right "cleanup" attribute, *after* DECLARE_LOCK_GUARD_1_ATTRS() has been + * used. + * + * Example usage: + * + * DECLARE_LOCK_GUARD_1_ATTRS(mutex, __acquires(_T), __releases(*(struct mutex **)_T)) + * #define class_mutex_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex, _T) + * + * Note: To support the for-loop based scoped helpers, the auxiliary variable + * must be a pointer to the "class" type because it is defined in the same + * statement as the guard variable. However, we initialize it with the lock + * pointer (despite the type mismatch, the compiler's alias analysis still works + * as expected). The "_unlock" attribute receives a pointer to the auxiliary + * variable (a double pointer to the class type), and must be cast and + * dereferenced appropriately. + */ +#define DECLARE_LOCK_GUARD_1_ATTRS(_name, _lock, _unlock) \ +static inline class_##_name##_t class_##_name##_constructor(lock_##_name##_t *_T) _lock;\ +static __always_inline void __class_##_name##_cleanup_ctx(class_##_name##_t **_T) \ + __no_context_analysis _unlock { } +#define WITH_LOCK_GUARD_1_ATTRS(_name, _T) \ + class_##_name##_constructor(_T), \ + *__UNIQUE_ID(unlock) __cleanup(__class_##_name##_cleanup_ctx) = (void *)(unsigned long)(_T) + #define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \ diff --git a/include/linux/cma.h b/include/linux/cma.h index 62d9c1cf6326..2e6931735880 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -57,6 +57,15 @@ extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long e extern void cma_reserve_pages_on_error(struct cma *cma); +#ifdef CONFIG_DMA_CMA +extern bool cma_skip_dt_default_reserved_mem(void); +#else +static inline bool cma_skip_dt_default_reserved_mem(void) +{ + return false; +} +#endif + #ifdef CONFIG_CMA struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); bool cma_free_folio(struct cma *cma, const struct folio *folio); diff --git a/include/linux/compiler-context-analysis.h b/include/linux/compiler-context-analysis.h new file mode 100644 index 000000000000..00c074a2ccb0 --- /dev/null +++ b/include/linux/compiler-context-analysis.h @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Macros and attributes for compiler-based static context analysis. + */ + +#ifndef _LINUX_COMPILER_CONTEXT_ANALYSIS_H +#define _LINUX_COMPILER_CONTEXT_ANALYSIS_H + +#if defined(WARN_CONTEXT_ANALYSIS) && !defined(__CHECKER__) && !defined(__GENKSYMS__) + +/* + * These attributes define new context lock (Clang: capability) types. + * Internal only. + */ +# define __ctx_lock_type(name) __attribute__((capability(#name))) +# define __reentrant_ctx_lock __attribute__((reentrant_capability)) +# define __acquires_ctx_lock(...) __attribute__((acquire_capability(__VA_ARGS__))) +# define __acquires_shared_ctx_lock(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) +# define __try_acquires_ctx_lock(ret, var) __attribute__((try_acquire_capability(ret, var))) +# define __try_acquires_shared_ctx_lock(ret, var) __attribute__((try_acquire_shared_capability(ret, var))) +# define __releases_ctx_lock(...) __attribute__((release_capability(__VA_ARGS__))) +# define __releases_shared_ctx_lock(...) __attribute__((release_shared_capability(__VA_ARGS__))) +# define __returns_ctx_lock(var) __attribute__((lock_returned(var))) + +/* + * The below are used to annotate code being checked. Internal only. + */ +# define __excludes_ctx_lock(...) __attribute__((locks_excluded(__VA_ARGS__))) +# define __requires_ctx_lock(...) __attribute__((requires_capability(__VA_ARGS__))) +# define __requires_shared_ctx_lock(...) __attribute__((requires_shared_capability(__VA_ARGS__))) + +/* + * The "assert_capability" attribute is a bit confusingly named. It does not + * generate a check. Instead, it tells the analysis to *assume* the capability + * is held. This is used for augmenting runtime assertions, that can then help + * with patterns beyond the compiler's static reasoning abilities. + */ +# define __assumes_ctx_lock(...) __attribute__((assert_capability(__VA_ARGS__))) +# define __assumes_shared_ctx_lock(...) __attribute__((assert_shared_capability(__VA_ARGS__))) + +/** + * __guarded_by - struct member and globals attribute, declares variable + * only accessible within active context + * + * Declares that the struct member or global variable is only accessible within + * the context entered by the given context lock. Read operations on the data + * require shared access, while write operations require exclusive access. + * + * .. code-block:: c + * + * struct some_state { + * spinlock_t lock; + * long counter __guarded_by(&lock); + * }; + */ +# define __guarded_by(...) __attribute__((guarded_by(__VA_ARGS__))) + +/** + * __pt_guarded_by - struct member and globals attribute, declares pointed-to + * data only accessible within active context + * + * Declares that the data pointed to by the struct member pointer or global + * pointer is only accessible within the context entered by the given context + * lock. Read operations on the data require shared access, while write + * operations require exclusive access. + * + * .. code-block:: c + * + * struct some_state { + * spinlock_t lock; + * long *counter __pt_guarded_by(&lock); + * }; + */ +# define __pt_guarded_by(...) __attribute__((pt_guarded_by(__VA_ARGS__))) + +/** + * context_lock_struct() - declare or define a context lock struct + * @name: struct name + * + * Helper to declare or define a struct type that is also a context lock. + * + * .. code-block:: c + * + * context_lock_struct(my_handle) { + * int foo; + * long bar; + * }; + * + * struct some_state { + * ... + * }; + * // ... declared elsewhere ... + * context_lock_struct(some_state); + * + * Note: The implementation defines several helper functions that can acquire + * and release the context lock. + */ +# define context_lock_struct(name, ...) \ + struct __ctx_lock_type(name) __VA_ARGS__ name; \ + static __always_inline void __acquire_ctx_lock(const struct name *var) \ + __attribute__((overloadable)) __no_context_analysis __acquires_ctx_lock(var) { } \ + static __always_inline void __acquire_shared_ctx_lock(const struct name *var) \ + __attribute__((overloadable)) __no_context_analysis __acquires_shared_ctx_lock(var) { } \ + static __always_inline bool __try_acquire_ctx_lock(const struct name *var, bool ret) \ + __attribute__((overloadable)) __no_context_analysis __try_acquires_ctx_lock(1, var) \ + { return ret; } \ + static __always_inline bool __try_acquire_shared_ctx_lock(const struct name *var, bool ret) \ + __attribute__((overloadable)) __no_context_analysis __try_acquires_shared_ctx_lock(1, var) \ + { return ret; } \ + static __always_inline void __release_ctx_lock(const struct name *var) \ + __attribute__((overloadable)) __no_context_analysis __releases_ctx_lock(var) { } \ + static __always_inline void __release_shared_ctx_lock(const struct name *var) \ + __attribute__((overloadable)) __no_context_analysis __releases_shared_ctx_lock(var) { } \ + static __always_inline void __assume_ctx_lock(const struct name *var) \ + __attribute__((overloadable)) __assumes_ctx_lock(var) { } \ + static __always_inline void __assume_shared_ctx_lock(const struct name *var) \ + __attribute__((overloadable)) __assumes_shared_ctx_lock(var) { } \ + struct name + +/** + * disable_context_analysis() - disables context analysis + * + * Disables context analysis. Must be paired with a later + * enable_context_analysis(). + */ +# define disable_context_analysis() \ + __diag_push(); \ + __diag_ignore_all("-Wunknown-warning-option", "") \ + __diag_ignore_all("-Wthread-safety", "") \ + __diag_ignore_all("-Wthread-safety-pointer", "") + +/** + * enable_context_analysis() - re-enables context analysis + * + * Re-enables context analysis. Must be paired with a prior + * disable_context_analysis(). + */ +# define enable_context_analysis() __diag_pop() + +/** + * __no_context_analysis - function attribute, disables context analysis + * + * Function attribute denoting that context analysis is disabled for the + * whole function. Prefer use of `context_unsafe()` where possible. + */ +# define __no_context_analysis __attribute__((no_thread_safety_analysis)) + +#else /* !WARN_CONTEXT_ANALYSIS */ + +# define __ctx_lock_type(name) +# define __reentrant_ctx_lock +# define __acquires_ctx_lock(...) +# define __acquires_shared_ctx_lock(...) +# define __try_acquires_ctx_lock(ret, var) +# define __try_acquires_shared_ctx_lock(ret, var) +# define __releases_ctx_lock(...) +# define __releases_shared_ctx_lock(...) +# define __assumes_ctx_lock(...) +# define __assumes_shared_ctx_lock(...) +# define __returns_ctx_lock(var) +# define __guarded_by(...) +# define __pt_guarded_by(...) +# define __excludes_ctx_lock(...) +# define __requires_ctx_lock(...) +# define __requires_shared_ctx_lock(...) +# define __acquire_ctx_lock(var) do { } while (0) +# define __acquire_shared_ctx_lock(var) do { } while (0) +# define __try_acquire_ctx_lock(var, ret) (ret) +# define __try_acquire_shared_ctx_lock(var, ret) (ret) +# define __release_ctx_lock(var) do { } while (0) +# define __release_shared_ctx_lock(var) do { } while (0) +# define __assume_ctx_lock(var) do { (void)(var); } while (0) +# define __assume_shared_ctx_lock(var) do { (void)(var); } while (0) +# define context_lock_struct(name, ...) struct __VA_ARGS__ name +# define disable_context_analysis() +# define enable_context_analysis() +# define __no_context_analysis + +#endif /* WARN_CONTEXT_ANALYSIS */ + +/** + * context_unsafe() - disable context checking for contained code + * + * Disables context checking for contained statements or expression. + * + * .. code-block:: c + * + * struct some_data { + * spinlock_t lock; + * int counter __guarded_by(&lock); + * }; + * + * int foo(struct some_data *d) + * { + * // ... + * // other code that is still checked ... + * // ... + * return context_unsafe(d->counter); + * } + */ +#define context_unsafe(...) \ +({ \ + disable_context_analysis(); \ + __VA_ARGS__; \ + enable_context_analysis() \ +}) + +/** + * __context_unsafe() - function attribute, disable context checking + * @comment: comment explaining why opt-out is safe + * + * Function attribute denoting that context analysis is disabled for the + * whole function. Forces adding an inline comment as argument. + */ +#define __context_unsafe(comment) __no_context_analysis + +/** + * context_unsafe_alias() - helper to insert a context lock "alias barrier" + * @p: pointer aliasing a context lock or object containing context locks + * + * No-op function that acts as a "context lock alias barrier", where the + * analysis rightfully detects that we're switching aliases, but the switch is + * considered safe but beyond the analysis reasoning abilities. + * + * This should be inserted before the first use of such an alias. + * + * Implementation Note: The compiler ignores aliases that may be reassigned but + * their value cannot be determined (e.g. when passing a non-const pointer to an + * alias as a function argument). + */ +#define context_unsafe_alias(p) _context_unsafe_alias((void **)&(p)) +static inline void _context_unsafe_alias(void **p) { } + +/** + * token_context_lock() - declare an abstract global context lock instance + * @name: token context lock name + * + * Helper that declares an abstract global context lock instance @name, but not + * backed by a real data structure (linker error if accidentally referenced). + * The type name is `__ctx_lock_@name`. + */ +#define token_context_lock(name, ...) \ + context_lock_struct(__ctx_lock_##name, ##__VA_ARGS__) {}; \ + extern const struct __ctx_lock_##name *name + +/** + * token_context_lock_instance() - declare another instance of a global context lock + * @ctx: token context lock previously declared with token_context_lock() + * @name: name of additional global context lock instance + * + * Helper that declares an additional instance @name of the same token context + * lock class @ctx. This is helpful where multiple related token contexts are + * declared, to allow using the same underlying type (`__ctx_lock_@ctx`) as + * function arguments. + */ +#define token_context_lock_instance(ctx, name) \ + extern const struct __ctx_lock_##ctx *name + +/* + * Common keywords for static context analysis. + */ + +/** + * __must_hold() - function attribute, caller must hold exclusive context lock + * + * Function attribute declaring that the caller must hold the given context + * lock instance(s) exclusively. + */ +#define __must_hold(...) __requires_ctx_lock(__VA_ARGS__) + +/** + * __must_not_hold() - function attribute, caller must not hold context lock + * + * Function attribute declaring that the caller must not hold the given context + * lock instance(s). + */ +#define __must_not_hold(...) __excludes_ctx_lock(__VA_ARGS__) + +/** + * __acquires() - function attribute, function acquires context lock exclusively + * + * Function attribute declaring that the function acquires the given context + * lock instance(s) exclusively, but does not release them. + */ +#define __acquires(...) __acquires_ctx_lock(__VA_ARGS__) + +/* + * Clang's analysis does not care precisely about the value, only that it is + * either zero or non-zero. So the __cond_acquires() interface might be + * misleading if we say that @ret is the value returned if acquired. Instead, + * provide symbolic variants which we translate. + */ +#define __cond_acquires_impl_true(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) +#define __cond_acquires_impl_false(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) +#define __cond_acquires_impl_nonzero(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) +#define __cond_acquires_impl_0(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) +#define __cond_acquires_impl_nonnull(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) +#define __cond_acquires_impl_NULL(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) + +/** + * __cond_acquires() - function attribute, function conditionally + * acquires a context lock exclusively + * @ret: abstract value returned by function if context lock acquired + * @x: context lock instance pointer + * + * Function attribute declaring that the function conditionally acquires the + * given context lock instance @x exclusively, but does not release it. The + * function return value @ret denotes when the context lock is acquired. + * + * @ret may be one of: true, false, nonzero, 0, nonnull, NULL. + */ +#define __cond_acquires(ret, x) __cond_acquires_impl_##ret(x) + +/** + * __releases() - function attribute, function releases a context lock exclusively + * + * Function attribute declaring that the function releases the given context + * lock instance(s) exclusively. The associated context(s) must be active on + * entry. + */ +#define __releases(...) __releases_ctx_lock(__VA_ARGS__) + +/** + * __acquire() - function to acquire context lock exclusively + * @x: context lock instance pointer + * + * No-op function that acquires the given context lock instance @x exclusively. + */ +#define __acquire(x) __acquire_ctx_lock(x) + +/** + * __release() - function to release context lock exclusively + * @x: context lock instance pointer + * + * No-op function that releases the given context lock instance @x. + */ +#define __release(x) __release_ctx_lock(x) + +/** + * __must_hold_shared() - function attribute, caller must hold shared context lock + * + * Function attribute declaring that the caller must hold the given context + * lock instance(s) with shared access. + */ +#define __must_hold_shared(...) __requires_shared_ctx_lock(__VA_ARGS__) + +/** + * __acquires_shared() - function attribute, function acquires context lock shared + * + * Function attribute declaring that the function acquires the given + * context lock instance(s) with shared access, but does not release them. + */ +#define __acquires_shared(...) __acquires_shared_ctx_lock(__VA_ARGS__) + +/** + * __cond_acquires_shared() - function attribute, function conditionally + * acquires a context lock shared + * @ret: abstract value returned by function if context lock acquired + * @x: context lock instance pointer + * + * Function attribute declaring that the function conditionally acquires the + * given context lock instance @x with shared access, but does not release it. + * The function return value @ret denotes when the context lock is acquired. + * + * @ret may be one of: true, false, nonzero, 0, nonnull, NULL. + */ +#define __cond_acquires_shared(ret, x) __cond_acquires_impl_##ret(x, _shared) + +/** + * __releases_shared() - function attribute, function releases a + * context lock shared + * + * Function attribute declaring that the function releases the given context + * lock instance(s) with shared access. The associated context(s) must be + * active on entry. + */ +#define __releases_shared(...) __releases_shared_ctx_lock(__VA_ARGS__) + +/** + * __acquire_shared() - function to acquire context lock shared + * @x: context lock instance pointer + * + * No-op function that acquires the given context lock instance @x with shared + * access. + */ +#define __acquire_shared(x) __acquire_shared_ctx_lock(x) + +/** + * __release_shared() - function to release context lock shared + * @x: context lock instance pointer + * + * No-op function that releases the given context lock instance @x with shared + * access. + */ +#define __release_shared(x) __release_shared_ctx_lock(x) + +/** + * __acquire_ret() - helper to acquire context lock of return value + * @call: call expression + * @ret_expr: acquire expression that uses __ret + */ +#define __acquire_ret(call, ret_expr) \ + ({ \ + __auto_type __ret = call; \ + __acquire(ret_expr); \ + __ret; \ + }) + +/** + * __acquire_shared_ret() - helper to acquire context lock shared of return value + * @call: call expression + * @ret_expr: acquire shared expression that uses __ret + */ +#define __acquire_shared_ret(call, ret_expr) \ + ({ \ + __auto_type __ret = call; \ + __acquire_shared(ret_expr); \ + __ret; \ + }) + +/* + * Attributes to mark functions returning acquired context locks. + * + * This is purely cosmetic to help readability, and should be used with the + * above macros as follows: + * + * struct foo { spinlock_t lock; ... }; + * ... + * #define myfunc(...) __acquire_ret(_myfunc(__VA_ARGS__), &__ret->lock) + * struct foo *_myfunc(int bar) __acquires_ret; + * ... + */ +#define __acquires_ret __no_context_analysis +#define __acquires_shared_ret __no_context_analysis + +#endif /* _LINUX_COMPILER_CONTEXT_ANALYSIS_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04487c9bd751..af16624b29fd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -190,7 +190,9 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define data_race(expr) \ ({ \ __kcsan_disable_current(); \ + disable_context_analysis(); \ auto __v = (expr); \ + enable_context_analysis(); \ __kcsan_enable_current(); \ __v; \ }) @@ -231,16 +233,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, "must be non-C-string (not NUL-terminated)") /* - * Use __typeof_unqual__() when available. - * - * XXX: Remove test for __CHECKER__ once - * sparse learns about __typeof_unqual__(). - */ -#if CC_HAS_TYPEOF_UNQUAL && !defined(__CHECKER__) -# define USE_TYPEOF_UNQUAL 1 -#endif - -/* * Define TYPEOF_UNQUAL() to use __typeof_unqual__() as typeof * operator when available, to return an unqualified type of the exp. */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index d3318a3c2577..3c936b129860 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -41,6 +41,8 @@ # define BTF_TYPE_TAG(value) /* nothing */ #endif +#include <linux/compiler-context-analysis.h> + /* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ /* address spaces */ @@ -51,14 +53,6 @@ # define __rcu __attribute__((noderef, address_space(__rcu))) static inline void __chk_user_ptr(const volatile void __user *ptr) { } static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } -/* context/locking */ -# define __must_hold(x) __attribute__((context(x,1,1))) -# define __acquires(x) __attribute__((context(x,0,1))) -# define __cond_acquires(x) __attribute__((context(x,0,-1))) -# define __releases(x) __attribute__((context(x,1,0))) -# define __acquire(x) __context__(x,1) -# define __release(x) __context__(x,-1) -# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) /* other */ # define __force __attribute__((force)) # define __nocast __attribute__((nocast)) @@ -79,14 +73,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __chk_user_ptr(x) (void)0 # define __chk_io_ptr(x) (void)0 -/* context/locking */ -# define __must_hold(x) -# define __acquires(x) -# define __cond_acquires(x) -# define __releases(x) -# define __acquire(x) (void)0 -# define __release(x) (void)0 -# define __cond_lock(x,c) (c) /* other */ # define __force # define __nocast @@ -369,7 +355,7 @@ struct ftrace_likely_data { * Optional: only supported since clang >= 18 * * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://github.com/llvm/llvm-project/pull/76348 + * clang: https://clang.llvm.org/docs/AttributeReference.html#counted-by-counted-by-or-null-sized-by-sized-by-or-null * * __bdos on clang < 19.1.2 can erroneously return 0: * https://github.com/llvm/llvm-project/pull/110497 @@ -384,6 +370,22 @@ struct ftrace_likely_data { #endif /* + * Runtime track number of objects pointed to by a pointer member for use by + * CONFIG_FORTIFY_SOURCE and CONFIG_UBSAN_BOUNDS. + * + * Optional: only supported since gcc >= 16 + * Optional: only supported since clang >= 22 + * + * gcc: https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681727.html + * clang: https://clang.llvm.org/docs/AttributeReference.html#counted-by-counted-by-or-null-sized-by-sized-by-or-null + */ +#ifdef CONFIG_CC_HAS_COUNTED_BY_PTR +#define __counted_by_ptr(member) __attribute__((__counted_by__(member))) +#else +#define __counted_by_ptr(member) +#endif + +/* * Optional: only supported since gcc >= 15 * Optional: not supported by Clang * @@ -536,6 +538,37 @@ struct ftrace_likely_data { #endif /* + * Optional: only supported since gcc >= 15, clang >= 19 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html#index-_005f_005fbuiltin_005fcounted_005fby_005fref + * clang: https://clang.llvm.org/docs/LanguageExtensions.html#builtin-counted-by-ref + */ +#if __has_builtin(__builtin_counted_by_ref) +/** + * __flex_counter() - Get pointer to counter member for the given + * flexible array, if it was annotated with __counted_by() + * @FAM: Pointer to flexible array member of an addressable struct instance + * + * For example, with: + * + * struct foo { + * int counter; + * short array[] __counted_by(counter); + * } *p; + * + * __flex_counter(p->array) will resolve to &p->counter. + * + * Note that Clang may not allow this to be assigned to a separate + * variable; it must be used directly. + * + * If p->array is unannotated, this returns (void *)NULL. + */ +#define __flex_counter(FAM) __builtin_counted_by_ref(FAM) +#else +#define __flex_counter(FAM) ((void *)NULL) +#endif + +/* * Some versions of gcc do not mark 'asm goto' volatile: * * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 @@ -562,6 +595,14 @@ struct ftrace_likely_data { #define asm_inline asm #endif +#ifndef __ASSEMBLY__ +/* + * Use __typeof_unqual__() when available. + */ +#if CC_HAS_TYPEOF_UNQUAL || defined(__CHECKER__) +# define USE_TYPEOF_UNQUAL 1 +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) @@ -569,6 +610,7 @@ struct ftrace_likely_data { * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. */ +#ifndef USE_TYPEOF_UNQUAL /* * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' * is not type-compatible with 'signed char', and we define a separate case. @@ -586,6 +628,29 @@ struct ftrace_likely_data { __scalar_type_to_expr_cases(long), \ __scalar_type_to_expr_cases(long long), \ default: (x))) +#else +#define __unqual_scalar_typeof(x) __typeof_unqual__(x) +#endif +#endif /* !__ASSEMBLY__ */ + +/* + * __signed_scalar_typeof(x) - Declare a signed scalar type, leaving + * non-scalar types unchanged. + */ + +#define __scalar_type_to_signed_cases(type) \ + unsigned type: (signed type)0, \ + signed type: (signed type)0 + +#define __signed_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (signed char)0, \ + __scalar_type_to_signed_cases(char), \ + __scalar_type_to_signed_cases(short), \ + __scalar_type_to_signed_cases(int), \ + __scalar_type_to_signed_cases(long), \ + __scalar_type_to_signed_cases(long long), \ + default: (x))) /* Is this type a native word size -- useful for atomic operations */ #define __native_word(t) \ diff --git a/include/linux/console.h b/include/linux/console.h index fc9f5c5c1b04..f882833bedf0 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -492,8 +492,8 @@ static inline bool console_srcu_read_lock_is_held(void) extern int console_srcu_read_lock(void); extern void console_srcu_read_unlock(int cookie); -extern void console_list_lock(void) __acquires(console_mutex); -extern void console_list_unlock(void) __releases(console_mutex); +extern void console_list_lock(void); +extern void console_list_unlock(void); extern struct hlist_head console_list; diff --git a/include/linux/cper.h b/include/linux/cper.h index 5b1236d8c65b..440b35e459e5 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -595,7 +595,8 @@ void cper_mem_err_pack(const struct cper_sec_mem_err *, const char *cper_mem_err_unpack(struct trace_seq *, struct cper_mem_err_compact *); void cper_print_proc_arm(const char *pfx, - const struct cper_sec_proc_arm *proc); + const struct cper_sec_proc_arm *proc, + u32 length); void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc); int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 0465d1e6f72a..cc894fc38971 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -203,6 +203,7 @@ struct cpufreq_freqs { #ifdef CONFIG_CPU_FREQ struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu); +struct cpufreq_policy *cpufreq_cpu_policy(unsigned int cpu); struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); void cpufreq_cpu_put(struct cpufreq_policy *policy); #else @@ -210,6 +211,10 @@ static inline struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) { return NULL; } +static inline struct cpufreq_policy *cpufreq_cpu_policy(unsigned int cpu) +{ + return NULL; +} static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { return NULL; diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h index f7aa20f62b87..286b3ab92e15 100644 --- a/include/linux/cpuhplock.h +++ b/include/linux/cpuhplock.h @@ -13,6 +13,7 @@ struct device; extern int lockdep_is_cpus_held(void); +extern int lockdep_is_cpus_write_held(void); #ifdef CONFIG_HOTPLUG_CPU void cpus_write_lock(void); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index a98d3330385c..a4aa2f1767d0 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -18,6 +18,8 @@ #include <linux/mmu_context.h> #include <linux/jump_label.h> +extern bool lockdep_is_cpuset_held(void); + #ifdef CONFIG_CPUSETS /* @@ -77,7 +79,6 @@ extern void cpuset_unlock(void); extern void cpuset_cpus_allowed_locked(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p); -extern bool cpuset_cpu_is_isolated(int cpu); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -213,11 +214,6 @@ static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p) return false; } -static inline bool cpuset_cpu_is_isolated(int cpu) -{ - return false; -} - static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { return node_possible_map; diff --git a/include/linux/cred.h b/include/linux/cred.h index 343a140a6ba2..ed1609d78cd7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -164,7 +164,6 @@ static inline const struct cred *kernel_cred(void) return rcu_dereference_raw(init_task.cred); } extern int set_security_override(struct cred *, u32); -extern int set_security_override_from_ctx(struct cred *, const char *); extern int set_create_files_as(struct cred *, struct inode *); extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 7cecda29447e..4177c4738282 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -239,18 +239,16 @@ ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, * @cancel: callback to call * @cancel_data: extra data for the callback to call */ -struct debugfs_cancellation { +context_lock_struct(debugfs_cancellation) { struct list_head list; void (*cancel)(struct dentry *, void *); void *cancel_data; }; -void __acquires(cancellation) -debugfs_enter_cancellation(struct file *file, - struct debugfs_cancellation *cancellation); -void __releases(cancellation) -debugfs_leave_cancellation(struct file *file, - struct debugfs_cancellation *cancellation); +void debugfs_enter_cancellation(struct file *file, + struct debugfs_cancellation *cancellation) __acquires(cancellation); +void debugfs_leave_cancellation(struct file *file, + struct debugfs_cancellation *cancellation) __releases(cancellation); #else diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 0864773a57e8..822085bc2d20 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -21,7 +21,7 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask) if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))) return 0; - if (likely(!inode->i_rdev)) + if (!inode->i_rdev) return 0; if (S_ISBLK(inode->i_mode)) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 4809204c674c..8eff2f53fd86 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -395,11 +395,15 @@ bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, int nents); bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, int nents); +bool arch_dma_alloc_direct(struct device *dev); +bool arch_dma_free_direct(struct device *dev, dma_addr_t dma_handle); #else #define arch_dma_map_phys_direct(d, a) (false) #define arch_dma_unmap_phys_direct(d, a) (false) #define arch_dma_map_sg_direct(d, s, n) (false) #define arch_dma_unmap_sg_direct(d, s, n) (false) +#define arch_dma_alloc_direct(d) (false) +#define arch_dma_free_direct(d, a) (false) #endif #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS diff --git a/include/linux/efi.h b/include/linux/efi.h index 2a43094e23f7..664898d09ff5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -406,11 +406,12 @@ void efi_native_runtime_setup(void); #define EFI_CC_FINAL_EVENTS_TABLE_GUID EFI_GUID(0xdd4a4648, 0x2de7, 0x4665, 0x96, 0x4d, 0x21, 0xd9, 0xef, 0x5f, 0xb4, 0x46) /* - * This GUID is used to pass to the kernel proper the struct screen_info - * structure that was populated by the stub based on the GOP protocol instance - * associated with ConOut + * This GUIDs are used to pass to the kernel proper the primary + * display that has been populated by the stub based on the GOP + * instance associated with ConOut. */ -#define LINUX_EFI_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) +#define LINUX_EFI_PRIMARY_DISPLAY_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) + #define LINUX_EFI_ARM_CPU_STATE_TABLE_GUID EFI_GUID(0xef79e4aa, 0x3c3d, 0x4989, 0xb9, 0x02, 0x07, 0xa9, 0x43, 0xe5, 0x50, 0xd2) #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 87efb38b7081..f83ca0abf2cd 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -2,6 +2,7 @@ #ifndef __LINUX_ENTRYCOMMON_H #define __LINUX_ENTRYCOMMON_H +#include <linux/audit.h> #include <linux/irq-entry-common.h> #include <linux/livepatch.h> #include <linux/ptrace.h> @@ -36,8 +37,8 @@ SYSCALL_WORK_SYSCALL_EMU | \ SYSCALL_WORK_SYSCALL_AUDIT | \ SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ + SYSCALL_WORK_SYSCALL_RSEQ_SLICE | \ ARCH_SYSCALL_WORK_ENTER) - #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_AUDIT | \ @@ -45,7 +46,84 @@ SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ ARCH_SYSCALL_WORK_EXIT) -long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work); +/** + * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper + * + * Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry(). + * + * This allows architecture specific ptrace_report_syscall_entry() + * implementations. If not defined by the architecture this falls back to + * to ptrace_report_syscall_entry(). + */ +static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs); + +#ifndef arch_ptrace_report_syscall_entry +static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs) +{ + return ptrace_report_syscall_entry(regs); +} +#endif + +bool syscall_user_dispatch(struct pt_regs *regs); +long trace_syscall_enter(struct pt_regs *regs, long syscall); +void trace_syscall_exit(struct pt_regs *regs, long ret); + +static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) +{ + if (unlikely(audit_context())) { + unsigned long args[6]; + + syscall_get_arguments(current, regs, args); + audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); + } +} + +static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work) +{ + long syscall, ret = 0; + + /* + * Handle Syscall User Dispatch. This must comes first, since + * the ABI here can be something that doesn't make sense for + * other syscall_work features. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (syscall_user_dispatch(regs)) + return -1L; + } + + /* + * User space got a time slice extension granted and relinquishes + * the CPU. The work stops the slice timer to avoid an extra round + * through hrtimer_interrupt(). + */ + if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE) + rseq_syscall_enter_work(syscall_get_nr(current, regs)); + + /* Handle ptrace */ + if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { + ret = arch_ptrace_report_syscall_entry(regs); + if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) + return -1L; + } + + /* Do seccomp after ptrace, to catch any tracer changes. */ + if (work & SYSCALL_WORK_SECCOMP) { + ret = __secure_computing(); + if (ret == -1L) + return ret; + } + + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) + syscall = trace_syscall_enter(regs, syscall); + + syscall_enter_audit(regs, syscall); + + return ret ? : syscall; +} /** * syscall_enter_from_user_mode_work - Check and handle work before invoking @@ -75,7 +153,7 @@ static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *re unsigned long work = READ_ONCE(current_thread_info()->syscall_work); if (work & SYSCALL_WORK_ENTER) - syscall = syscall_trace_enter(regs, syscall, work); + syscall = syscall_trace_enter(regs, work); return syscall; } @@ -112,6 +190,37 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l return ret; } +/* + * If SYSCALL_EMU is set, then the only reason to report is when + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall + * instruction has been already reported in syscall_enter_from_user_mode(). + */ +static __always_inline bool report_single_step(unsigned long work) +{ + if (work & SYSCALL_WORK_SYSCALL_EMU) + return false; + + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; +} + +/** + * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit() + * + * This allows architecture specific ptrace_report_syscall_exit() + * implementations. If not defined by the architecture this falls back to + * to ptrace_report_syscall_exit(). + */ +static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs, + int step); + +#ifndef arch_ptrace_report_syscall_exit +static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs, + int step) +{ + ptrace_report_syscall_exit(regs, step); +} +#endif + /** * syscall_exit_work - Handle work before returning to user mode * @regs: Pointer to current pt_regs @@ -119,20 +228,40 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l * * Do one-time syscall specific work. */ -void syscall_exit_work(struct pt_regs *regs, unsigned long work); +static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work) +{ + bool step; + + /* + * If the syscall was rolled back due to syscall user dispatching, + * then the tracers below are not invoked for the same reason as + * the entry side was not invoked in syscall_trace_enter(): The ABI + * of these syscalls is unknown. + */ + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { + if (unlikely(current->syscall_dispatch.on_dispatch)) { + current->syscall_dispatch.on_dispatch = false; + return; + } + } + + audit_syscall_exit(regs); + + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) + trace_syscall_exit(regs, syscall_get_return_value(current, regs)); + + step = report_single_step(work); + if (step || work & SYSCALL_WORK_SYSCALL_TRACE) + arch_ptrace_report_syscall_exit(regs, step); +} /** - * syscall_exit_to_user_mode_work - Handle work before returning to user mode + * syscall_exit_to_user_mode_work - Handle one time work before returning to user mode * @regs: Pointer to currents pt_regs * - * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling - * exit_to_user_mode() to perform the final transition to user mode. + * Step 1 of syscall_exit_to_user_mode() with the same calling convention. * - * Calling convention is the same as for syscall_exit_to_user_mode() and it - * returns with all work handled and interrupts disabled. The caller must - * invoke exit_to_user_mode() before actually switching to user mode to - * make the final state transitions. Interrupts must stay disabled between - * return from this function and the invocation of exit_to_user_mode(). + * The caller must invoke steps 2-3 of syscall_exit_to_user_mode() afterwards. */ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) { @@ -155,15 +284,13 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) */ if (unlikely(work & SYSCALL_WORK_EXIT)) syscall_exit_work(regs, work); - local_irq_disable_exit_to_user(); - syscall_exit_to_user_mode_prepare(regs); } /** * syscall_exit_to_user_mode - Handle work before returning to user mode * @regs: Pointer to currents pt_regs * - * Invoked with interrupts enabled and fully valid regs. Returns with all + * Invoked with interrupts enabled and fully valid @regs. Returns with all * work handled, interrupts disabled such that the caller can immediately * switch to user mode. Called from architecture specific syscall and ret * from fork code. @@ -176,6 +303,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) * - ptrace (single stepping) * * 2) Preparatory work + * - Disable interrupts * - Exit to user mode loop (common TIF handling). Invokes * arch_exit_to_user_mode_work() for architecture specific TIF work * - Architecture specific one time work arch_exit_to_user_mode_prepare() @@ -184,14 +312,17 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the * functionality in exit_to_user_mode(). * - * This is a combination of syscall_exit_to_user_mode_work() (1,2) and - * exit_to_user_mode(). This function is preferred unless there is a - * compelling architectural reason to use the separate functions. + * This is a combination of syscall_exit_to_user_mode_work() (1), disabling + * interrupts followed by syscall_exit_to_user_mode_prepare() (2) and + * exit_to_user_mode() (3). This function is preferred unless there is a + * compelling architectural reason to invoke the functions separately. */ static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) { instrumentation_begin(); syscall_exit_to_user_mode_work(regs); + local_irq_disable_exit_to_user(); + syscall_exit_to_user_mode_prepare(regs); instrumentation_end(); exit_to_user_mode(); } diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index f0cf2714ec52..262e24d83313 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -201,9 +201,9 @@ struct handle_to_path_ctx { * @commit_metadata: commit metadata changes to stable storage * * See Documentation/filesystems/nfs/exporting.rst for details on how to use - * this interface correctly. + * this interface correctly and the definition of the flags. * - * encode_fh: + * @encode_fh: * @encode_fh should store in the file handle fragment @fh (using at most * @max_len bytes) information that can be used by @decode_fh to recover the * file referred to by the &struct dentry @de. If @flag has CONNECTABLE bit @@ -215,7 +215,7 @@ struct handle_to_path_ctx { * greater than @max_len*4 bytes). On error @max_len contains the minimum * size(in 4 byte unit) needed to encode the file handle. * - * fh_to_dentry: + * @fh_to_dentry: * @fh_to_dentry is given a &struct super_block (@sb) and a file handle * fragment (@fh, @fh_len). It should return a &struct dentry which refers * to the same file that the file handle fragment refers to. If it cannot, @@ -227,31 +227,44 @@ struct handle_to_path_ctx { * created with d_alloc_root. The caller can then find any other extant * dentries by following the d_alias links. * - * fh_to_parent: + * @fh_to_parent: * Same as @fh_to_dentry, except that it returns a pointer to the parent * dentry if it was encoded into the filehandle fragment by @encode_fh. * - * get_name: + * @get_name: * @get_name should find a name for the given @child in the given @parent * directory. The name should be stored in the @name (with the - * understanding that it is already pointing to a %NAME_MAX+1 sized + * understanding that it is already pointing to a %NAME_MAX + 1 sized * buffer. get_name() should return %0 on success, a negative error code * or error. @get_name will be called without @parent->i_rwsem held. * - * get_parent: + * @get_parent: * @get_parent should find the parent directory for the given @child which * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * - * permission: + * @permission: * Allow filesystems to specify a custom permission function. * - * open: + * @open: * Allow filesystems to specify a custom open function. * - * commit_metadata: + * @commit_metadata: * @commit_metadata should commit metadata changes to stable storage. * + * @get_uuid: + * Get a filesystem unique signature exposed to clients. + * + * @map_blocks: + * Map and, if necessary, allocate blocks for a layout. + * + * @commit_blocks: + * Commit blocks in a layout once the client is done with them. + * + * @flags: + * Allows the filesystem to communicate to nfsd that it may want to do things + * differently when dealing with it. + * * Locking rules: * get_parent is called with child->d_inode->i_rwsem down * get_name is not (which is possibly inconsistent) diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 2f5e5588ee07..d2c9740e26a8 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -242,7 +242,14 @@ bool locks_owner_has_blockers(struct file_lock_context *flctx, static inline struct file_lock_context * locks_inode_context(const struct inode *inode) { - return smp_load_acquire(&inode->i_flctx); + /* + * Paired with smp_store_release in locks_get_lock_context(). + * + * Ensures ->i_flctx will be visible if we spotted the flag. + */ + if (likely(!(smp_load_acquire(&inode->i_opflags) & IOP_FLCTX))) + return NULL; + return READ_ONCE(inode->i_flctx); } #else /* !CONFIG_FILE_LOCKING */ @@ -469,7 +476,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode) * could end up racing with tasks trying to set a new lease on this * file. */ - flctx = READ_ONCE(inode->i_flctx); + flctx = locks_inode_context(inode); if (!flctx) return 0; smp_mb(); @@ -488,7 +495,7 @@ static inline int break_deleg(struct inode *inode, unsigned int flags) * could end up racing with tasks trying to set a new lease on this * file. */ - flctx = READ_ONCE(inode->i_flctx); + flctx = locks_inode_context(inode); if (!flctx) return 0; smp_mb(); @@ -533,8 +540,11 @@ static inline int break_deleg_wait(struct delegated_inode *di) static inline int break_layout(struct inode *inode, bool wait) { + struct file_lock_context *flctx; + smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) { + flctx = locks_inode_context(inode); + if (flctx && !list_empty_careful(&flctx->flc_lease)) { unsigned int flags = LEASE_BREAK_LAYOUT; if (!wait) diff --git a/include/linux/filter.h b/include/linux/filter.h index fd54fed8f95f..4e1cb4f91f49 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1167,6 +1167,7 @@ bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); bool bpf_jit_supports_private_stack(void); bool bpf_jit_supports_timed_may_goto(void); +bool bpf_jit_supports_fsession(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); u64 arch_bpf_timed_may_goto(void); diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index a55ca771286b..5747bd191bf1 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -66,19 +66,33 @@ int qcom_scm_set_warm_boot_addr(void *entry); void qcom_scm_cpu_power_down(u32 flags); int qcom_scm_set_remote_state(u32 state, u32 id); -struct qcom_scm_pas_metadata { +struct qcom_scm_pas_context { + struct device *dev; + u32 pas_id; + phys_addr_t mem_phys; + size_t mem_size; void *ptr; dma_addr_t phys; ssize_t size; + bool use_tzmem; }; -int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size, - struct qcom_scm_pas_metadata *ctx); -void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); -int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); -int qcom_scm_pas_auth_and_reset(u32 peripheral); -int qcom_scm_pas_shutdown(u32 peripheral); -bool qcom_scm_pas_supported(u32 peripheral); +struct qcom_scm_pas_context *devm_qcom_scm_pas_context_alloc(struct device *dev, + u32 pas_id, + phys_addr_t mem_phys, + size_t mem_size); +int qcom_scm_pas_init_image(u32 pas_id, const void *metadata, size_t size, + struct qcom_scm_pas_context *ctx); +void qcom_scm_pas_metadata_release(struct qcom_scm_pas_context *ctx); +int qcom_scm_pas_mem_setup(u32 pas_id, phys_addr_t addr, phys_addr_t size); +int qcom_scm_pas_auth_and_reset(u32 pas_id); +int qcom_scm_pas_shutdown(u32 pas_id); +bool qcom_scm_pas_supported(u32 pas_id); +struct resource_table *qcom_scm_pas_get_rsc_table(struct qcom_scm_pas_context *ctx, + void *input_rt, size_t input_rt_size, + size_t *output_rt_size); + +int qcom_scm_pas_prepare_and_auth_reset(struct qcom_scm_pas_context *ctx); int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); diff --git a/include/linux/firmware/xlnx-zynqmp-crypto.h b/include/linux/firmware/xlnx-zynqmp-crypto.h new file mode 100644 index 000000000000..56595ab37c43 --- /dev/null +++ b/include/linux/firmware/xlnx-zynqmp-crypto.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Firmware layer for XilSECURE APIs. + * + * Copyright (C) 2014-2022 Xilinx, Inc. + * Copyright (C) 2022-2025 Advanced Micro Devices, Inc. + */ + +#ifndef __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ +#define __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ + +/** + * struct xlnx_feature - Feature data + * @family: Family code of platform + * @subfamily: Subfamily code of platform + * @feature_id: Feature id of module + * @data: Collection of all supported platform data + */ +struct xlnx_feature { + u32 family; + u32 feature_id; + void *data; +}; + +/* xilSecure API commands module id + api id */ +#define XSECURE_API_AES_INIT 0x509 +#define XSECURE_API_AES_OP_INIT 0x50a +#define XSECURE_API_AES_UPDATE_AAD 0x50b +#define XSECURE_API_AES_ENCRYPT_UPDATE 0x50c +#define XSECURE_API_AES_ENCRYPT_FINAL 0x50d +#define XSECURE_API_AES_DECRYPT_UPDATE 0x50e +#define XSECURE_API_AES_DECRYPT_FINAL 0x50f +#define XSECURE_API_AES_KEY_ZERO 0x510 +#define XSECURE_API_AES_WRITE_KEY 0x511 + +#if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) +int zynqmp_pm_aes_engine(const u64 address, u32 *out); +int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); +void *xlnx_get_crypto_dev_data(struct xlnx_feature *feature_map); +int versal_pm_aes_key_write(const u32 keylen, + const u32 keysrc, const u64 keyaddr); +int versal_pm_aes_key_zero(const u32 keysrc); +int versal_pm_aes_op_init(const u64 hw_req); +int versal_pm_aes_update_aad(const u64 aad_addr, const u32 aad_len); +int versal_pm_aes_enc_update(const u64 in_params, const u64 in_addr); +int versal_pm_aes_dec_update(const u64 in_params, const u64 in_addr); +int versal_pm_aes_dec_final(const u64 gcm_addr); +int versal_pm_aes_enc_final(const u64 gcm_addr); +int versal_pm_aes_init(void); + +#else +static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, + const u32 flags) +{ + return -ENODEV; +} + +static inline void *xlnx_get_crypto_dev_data(struct xlnx_feature *feature_map) +{ + return ERR_PTR(-ENODEV); +} + +static inline int versal_pm_aes_key_write(const u32 keylen, + const u32 keysrc, const u64 keyaddr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_key_zero(const u32 keysrc) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_op_init(const u64 hw_req) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_update_aad(const u64 aad_addr, + const u32 aad_len) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_enc_update(const u64 in_params, + const u64 in_addr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_dec_update(const u64 in_params, + const u64 in_addr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_enc_final(const u64 gcm_addr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_dec_final(const u64 gcm_addr) +{ + return -ENODEV; +} + +static inline int versal_pm_aes_init(void) +{ + return -ENODEV; +} + +#endif + +#endif /* __FIRMWARE_XLNX_ZYNQMP_CRYPTO_H__ */ diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 15fdbd089bbf..d70dcd462b44 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -17,6 +17,7 @@ #include <linux/err.h> #include <linux/firmware/xlnx-zynqmp-ufs.h> +#include <linux/firmware/xlnx-zynqmp-crypto.h> #define ZYNQMP_PM_VERSION_MAJOR 1 #define ZYNQMP_PM_VERSION_MINOR 0 @@ -589,9 +590,7 @@ int zynqmp_pm_release_node(const u32 node); int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); -int zynqmp_pm_aes_engine(const u64 address, u32 *out); int zynqmp_pm_efuse_access(const u64 address, u32 *out); -int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_get_status(u32 *value); int zynqmp_pm_fpga_get_config_status(u32 *value); @@ -772,22 +771,11 @@ static inline int zynqmp_pm_set_requirement(const u32 node, return -ENODEV; } -static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) -{ - return -ENODEV; -} - static inline int zynqmp_pm_efuse_access(const u64 address, u32 *out) { return -ENODEV; } -static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, - const u32 flags) -{ - return -ENODEV; -} - static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags) { diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index b3b53f8c1b28..171982e53c9a 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,7 +2,6 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ -#include <linux/bitfield.h> #include <linux/bug.h> #include <linux/const.h> #include <linux/limits.h> @@ -10,10 +9,9 @@ #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) -#define FORTIFY_REASON_DIR(r) FIELD_GET(BIT(0), r) -#define FORTIFY_REASON_FUNC(r) FIELD_GET(GENMASK(7, 1), r) -#define FORTIFY_REASON(func, write) (FIELD_PREP(BIT(0), write) | \ - FIELD_PREP(GENMASK(7, 1), func)) +#define FORTIFY_REASON_DIR(r) ((r) & 1) +#define FORTIFY_REASON_FUNC(r) ((r) >> 1) +#define FORTIFY_REASON(func, write) ((func) << 1 | (write)) /* Overridden by KUnit tests. */ #ifndef fortify_panic diff --git a/include/linux/fs.h b/include/linux/fs.h index f5c9cf28c4dc..2e4d1e8b0e71 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -631,6 +631,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_MGTIME 0x0020 #define IOP_CACHED_LINK 0x0040 #define IOP_FASTPERM_MAY_EXEC 0x0080 +#define IOP_FLCTX 0x0100 /* * Inode state bits. Protected by inode->i_lock @@ -1717,6 +1718,13 @@ static inline struct timespec64 inode_set_ctime(struct inode *inode, struct timespec64 simple_inode_init_ts(struct inode *inode); +static inline int inode_time_dirty_flag(struct inode *inode) +{ + if (inode->i_sb->s_flags & SB_LAZYTIME) + return I_DIRTY_TIME; + return I_DIRTY_SYNC; +} + /* * Snapshotting support. */ @@ -1855,6 +1863,8 @@ struct dir_context { * INT_MAX unlimited */ int count; + /* @actor supports these flags in d_type high bits */ + unsigned int dt_flags_mask; }; /* If OR-ed with d_type, pending signals are not checked */ @@ -1983,6 +1993,11 @@ int wrap_directory_iterator(struct file *, struct dir_context *, static int shared_##x(struct file *file , struct dir_context *ctx) \ { return wrap_directory_iterator(file, ctx, x); } +enum fs_update_time { + FS_UPD_ATIME, + FS_UPD_CMTIME, +}; + struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); @@ -2010,7 +2025,9 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); - int (*update_time)(struct inode *, int); + int (*update_time)(struct inode *inode, enum fs_update_time type, + unsigned int flags); + void (*sync_lazytime)(struct inode *inode); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); @@ -2237,16 +2254,8 @@ static inline void inode_dec_link_count(struct inode *inode) mark_inode_dirty(inode); } -enum file_time_flags { - S_ATIME = 1, - S_MTIME = 2, - S_CTIME = 4, - S_VERSION = 8, -}; - extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); -int inode_update_time(struct inode *inode, int flags); static inline void file_accessed(struct file *file) { @@ -2274,8 +2283,6 @@ struct file_system_type { #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; - struct dentry *(*mount) (struct file_system_type *, int, - const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; @@ -2399,8 +2406,10 @@ static inline void super_set_sysfs_name_generic(struct super_block *sb, const ch extern void ihold(struct inode * inode); extern void iput(struct inode *); void iput_not_last(struct inode *); -int inode_update_timestamps(struct inode *inode, int flags); -int generic_update_time(struct inode *, int); +int inode_update_time(struct inode *inode, enum fs_update_time type, + unsigned int flags); +int generic_update_time(struct inode *inode, enum fs_update_time type, + unsigned int flags); /* /sys/fs */ extern struct kobject *fs_kobj; @@ -2409,14 +2418,19 @@ extern struct kobject *fs_kobj; /* fs/open.c */ struct audit_names; -struct filename { + +struct __filename_head { const char *name; /* pointer to actual string */ - const __user char *uptr; /* original userland pointer */ - atomic_t refcnt; + int refcnt; struct audit_names *aname; - const char iname[]; +}; +#define EMBEDDED_NAME_MAX (192 - sizeof(struct __filename_head)) +struct filename { + struct __filename_head; + const char iname[EMBEDDED_NAME_MAX]; }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); +static_assert(sizeof(struct filename) % 64 == 0); static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { @@ -2457,7 +2471,7 @@ struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_open_nonotify(const struct path *path, int flags, const struct cred *cred); -struct file *dentry_create(const struct path *path, int flags, umode_t mode, +struct file *dentry_create(struct path *path, int flags, umode_t mode, const struct cred *cred); const struct path *backing_file_user_path(const struct file *f); @@ -2511,11 +2525,23 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f extern void putname(struct filename *name); DEFINE_FREE(putname, struct filename *, if (!IS_ERR_OR_NULL(_T)) putname(_T)) -static inline struct filename *refname(struct filename *name) -{ - atomic_inc(&name->refcnt); - return name; -} +struct delayed_filename { + struct filename *__incomplete_filename; // don't touch +}; +#define INIT_DELAYED_FILENAME(ptr) \ + ((void)(*(ptr) = (struct delayed_filename){})) +int delayed_getname(struct delayed_filename *, const char __user *); +int delayed_getname_uflags(struct delayed_filename *v, const char __user *, int); +void dismiss_delayed_filename(struct delayed_filename *); +int putname_to_delayed(struct delayed_filename *, struct filename *); +struct filename *complete_getname(struct delayed_filename *); + +DEFINE_CLASS(filename, struct filename *, putname(_T), getname(p), const char __user *p) +EXTEND_CLASS(filename, _kernel, getname_kernel(p), const char *p) +EXTEND_CLASS(filename, _flags, getname_flags(p, f), const char __user *p, unsigned int f) +EXTEND_CLASS(filename, _uflags, getname_uflags(p, f), const char __user *p, unsigned int f) +EXTEND_CLASS(filename, _maybe_null, getname_maybe_null(p, f), const char __user *p, unsigned int f) +EXTEND_CLASS(filename, _complete_delayed, complete_getname(p), struct delayed_filename *p) extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); @@ -2534,10 +2560,8 @@ static inline int finish_open_simple(struct file *file, int error) extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(void); -extern struct kmem_cache *names_cachep; - -#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) -#define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) +#define __getname() kmalloc(PATH_MAX, GFP_KERNEL) +#define __putname(name) kfree(name) void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); @@ -2657,6 +2681,11 @@ static inline int path_permission(const struct path *path, int mask) int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode); +int may_delete_dentry(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *victim, bool isdir); +int may_create_dentry(struct mnt_idmap *idmap, + struct inode *dir, struct dentry *child); + static inline bool execute_ok(struct inode *inode) { return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); @@ -3217,7 +3246,6 @@ extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, const struct inode *context_inode); -extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); @@ -3524,7 +3552,9 @@ static inline bool dir_emit(struct dir_context *ctx, const char *name, int namelen, u64 ino, unsigned type) { - return ctx->actor(ctx, name, namelen, ctx->pos, ino, type); + unsigned int dt_mask = S_DT_MASK | ctx->dt_flags_mask; + + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type & dt_mask); } static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) { diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h index 6bd3009e09b3..fa7638b81246 100644 --- a/include/linux/fs/super_types.h +++ b/include/linux/fs/super_types.h @@ -35,6 +35,7 @@ struct user_namespace; struct workqueue_struct; struct writeback_control; struct xattr_handler; +struct fserror_event; extern struct super_block *blockdev_superblock; @@ -96,7 +97,6 @@ struct super_operations { const void *owner); int (*unfreeze_fs)(struct super_block *sb); int (*statfs)(struct dentry *dentry, struct kstatfs *kstatfs); - int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin)(struct super_block *sb); int (*show_options)(struct seq_file *seq, struct dentry *dentry); @@ -124,6 +124,9 @@ struct super_operations { */ int (*remove_bdev)(struct super_block *sb, struct block_device *bdev); void (*shutdown)(struct super_block *sb); + + /* Report a filesystem error */ + void (*report_error)(const struct fserror_event *event); }; struct super_block { @@ -268,6 +271,9 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ long s_min_writeback_pages; + + /* number of fserrors that are being sent to fsnotify/filesystems */ + refcount_t s_pending_errors; } __randomize_layout; /* diff --git a/include/linux/fserror.h b/include/linux/fserror.h new file mode 100644 index 000000000000..5e1ad78c346e --- /dev/null +++ b/include/linux/fserror.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2025 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef _LINUX_FSERROR_H__ +#define _LINUX_FSERROR_H__ + +void fserror_mount(struct super_block *sb); +void fserror_unmount(struct super_block *sb); + +enum fserror_type { + /* pagecache I/O failed */ + FSERR_BUFFERED_READ, + FSERR_BUFFERED_WRITE, + + /* direct I/O failed */ + FSERR_DIRECTIO_READ, + FSERR_DIRECTIO_WRITE, + + /* out of band media error reported */ + FSERR_DATA_LOST, + + /* filesystem metadata */ + FSERR_METADATA, +}; + +struct fserror_event { + struct work_struct work; + struct super_block *sb; + struct inode *inode; + loff_t pos; + u64 len; + enum fserror_type type; + + /* negative error number */ + int error; +}; + +void fserror_report(struct super_block *sb, struct inode *inode, + enum fserror_type type, loff_t pos, u64 len, int error, + gfp_t gfp); + +static inline void fserror_report_io(struct inode *inode, + enum fserror_type type, loff_t pos, + u64 len, int error, gfp_t gfp) +{ + fserror_report(inode->i_sb, inode, type, pos, len, error, gfp); +} + +static inline void fserror_report_data_lost(struct inode *inode, loff_t pos, + u64 len, gfp_t gfp) +{ + fserror_report(inode->i_sb, inode, FSERR_DATA_LOST, pos, len, -EIO, + gfp); +} + +static inline void fserror_report_file_metadata(struct inode *inode, int error, + gfp_t gfp) +{ + fserror_report(inode->i_sb, inode, FSERR_METADATA, 0, 0, error, gfp); +} + +static inline void fserror_report_metadata(struct super_block *sb, int error, + gfp_t gfp) +{ + fserror_report(sb, NULL, FSERR_METADATA, 0, 0, error, gfp); +} + +static inline void fserror_report_shutdown(struct super_block *sb, gfp_t gfp) +{ + fserror_report(sb, NULL, FSERR_METADATA, 0, 0, -ESHUTDOWN, gfp); +} + +#endif /* _LINUX_FSERROR_H__ */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a3a8989e3268..705db0a6d995 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -82,6 +82,7 @@ static inline void early_trace_init(void) { } struct module; struct ftrace_hash; +struct ftrace_func_entry; #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ defined(CONFIG_DYNAMIC_FTRACE) @@ -359,7 +360,6 @@ enum { FTRACE_OPS_FL_DIRECT = BIT(17), FTRACE_OPS_FL_SUBOP = BIT(18), FTRACE_OPS_FL_GRAPH = BIT(19), - FTRACE_OPS_FL_JMP = BIT(20), }; #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -403,9 +403,17 @@ enum ftrace_ops_cmd { * Negative on failure. The return value is dependent on the * callback. */ -typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd); +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, unsigned long ip, enum ftrace_ops_cmd cmd); #ifdef CONFIG_DYNAMIC_FTRACE + +#define FTRACE_HASH_DEFAULT_BITS 10 + +struct ftrace_hash *alloc_ftrace_hash(int size_bits); +void free_ftrace_hash(struct ftrace_hash *hash); +struct ftrace_func_entry *add_ftrace_hash_entry_direct(struct ftrace_hash *hash, + unsigned long ip, unsigned long direct); + /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { struct ftrace_hash __rcu *notrace_hash; @@ -535,6 +543,10 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr); int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr); +int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash); +int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash); +int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock); + void ftrace_stub_direct_tramp(void); #else @@ -561,6 +573,21 @@ static inline int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned l return -ENODEV; } +static inline int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash) +{ + return -ENODEV; +} + +static inline int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash) +{ + return -ENODEV; +} + +static inline int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock) +{ + return -ENODEV; +} + /* * This must be implemented by the architecture. * It is the way the ftrace direct_ops helper, when called diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index 15627ceea9bc..386fa48c4a95 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -33,6 +33,31 @@ struct ftrace_regs; #define ftrace_regs_get_frame_pointer(fregs) \ frame_pointer(&arch_ftrace_regs(fregs)->regs) +static __always_inline void +ftrace_partial_regs_update(struct ftrace_regs *fregs, struct pt_regs *regs) { } + +#else + +/* + * ftrace_partial_regs_update - update the original ftrace_regs from regs + * @fregs: The ftrace_regs to update from @regs + * @regs: The partial regs from ftrace_partial_regs() that was updated + * + * Some architectures have the partial regs living in the ftrace_regs + * structure, whereas other architectures need to make a different copy + * of the @regs. If a partial @regs is retrieved by ftrace_partial_regs() and + * if the code using @regs updates a field (like the instruction pointer or + * stack pointer) it may need to propagate that change to the original @fregs + * it retrieved the partial @regs from. Use this function to guarantee that + * update happens. + */ +static __always_inline void +ftrace_partial_regs_update(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + ftrace_regs_set_instruction_pointer(fregs, instruction_pointer(regs)); + ftrace_regs_set_return_value(fregs, regs_return_value(regs)); +} + #endif /* HAVE_ARCH_FTRACE_REGS */ /* This can be overridden by the architectures */ diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h deleted file mode 100644 index c304dcdb4eac..000000000000 --- a/include/linux/getcpu.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_GETCPU_H -#define _LINUX_GETCPU_H 1 - -/* Cache for getcpu() to speed it up. Results might be a short time - out of date, but will be faster. - - User programs should not refer to the contents of this structure. - I repeat they should not refer to it. If they do they will break - in future kernels. - - It is only a private cache for vgetcpu(). It will change in future kernels. - The user program must store this information per thread (__thread) - If you want 100% accurate information pass NULL instead. */ -struct getcpu_cache { - unsigned long blob[128 / sizeof(long)]; -}; - -#endif diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index ca1ec437a3ca..51a6dc2b97e9 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -447,12 +447,16 @@ struct hisi_qp_ops { int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm); }; +struct instance_backlog { + struct list_head list; + spinlock_t lock; +}; + struct hisi_qp { u32 qp_id; u16 sq_depth; u16 cq_depth; u8 alg_type; - u8 req_type; struct qm_dma qdma; void *sqe; @@ -462,7 +466,6 @@ struct hisi_qp { struct hisi_qp_status qp_status; struct hisi_qp_ops *hw_ops; - void *qp_ctx; void (*req_cb)(struct hisi_qp *qp, void *data); void (*event_cb)(struct hisi_qp *qp); @@ -471,6 +474,11 @@ struct hisi_qp { bool is_in_kernel; u16 pasid; struct uacce_queue *uacce_q; + + u32 ref_count; + spinlock_t qp_lock; + struct instance_backlog backlog; + const void **msg; }; static inline int vfs_num_set(const char *val, const struct kernel_param *kp) @@ -563,6 +571,7 @@ void hisi_qm_reset_done(struct pci_dev *pdev); int hisi_qm_wait_mb_ready(struct hisi_qm *qm); int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, bool op); +int hisi_qm_mb_read(struct hisi_qm *qm, u64 *base, u8 cmd, u16 queue); struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, @@ -575,7 +584,7 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool); int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, - u8 alg_type, int node, struct hisi_qp **qps); + u8 *alg_type, int node, struct hisi_qp **qps); void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0de12f14d6a4..74adbd4e7003 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -112,12 +112,6 @@ static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t t timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta)); } -static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) -{ - timer->node.expires = tv64; - timer->_softexpires = tv64; -} - static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = ktime_add_safe(timer->node.expires, time); @@ -140,15 +134,6 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) return timer->_softexpires; } -static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) -{ - return timer->node.expires; -} -static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) -{ - return timer->_softexpires; -} - static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { return ktime_to_ns(timer->node.expires); diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index aa49ffa130e5..02b010df6570 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -6,26 +6,6 @@ #include <linux/timerqueue.h> #include <linux/seqlock.h> -#ifdef CONFIG_HIGH_RES_TIMERS - -/* - * The resolution of the clocks. The resolution value is returned in - * the clock_getres() system call to give application programmers an - * idea of the (in)accuracy of timers. Timer values are rounded up to - * this resolution values. - */ -# define HIGH_RES_NSEC 1 -# define KTIME_HIGH_RES (HIGH_RES_NSEC) -# define MONOTONIC_RES_NSEC HIGH_RES_NSEC -# define KTIME_MONOTONIC_RES KTIME_HIGH_RES - -#else - -# define MONOTONIC_RES_NSEC LOW_RES_NSEC -# define KTIME_MONOTONIC_RES KTIME_LOW_RES - -#endif - #ifdef CONFIG_64BIT # define __hrtimer_clock_base_align ____cacheline_aligned #else diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index b424555753b1..b77bc55a4cf3 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -15,6 +15,7 @@ #include <linux/completion.h> #include <linux/kref.h> #include <linux/types.h> +#include <linux/workqueue_types.h> /** * struct hwrng - Hardware Random Number Generator driver @@ -48,6 +49,7 @@ struct hwrng { /* internal. */ struct list_head list; struct kref ref; + struct work_struct cleanup_work; struct completion cleanup_done; struct completion dying; }; diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 9fcb6410a584..971d53349b6f 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -25,7 +25,7 @@ * @I3C_ERROR_M2: M2 error * * These are the standard error codes as defined by the I3C specification. - * When -EIO is returned by the i3c_device_do_priv_xfers() or + * When -EIO is returned by the i3c_device_do_i3c_xfers() or * i3c_device_send_hdr_cmds() one can check the error code in * &struct_i3c_xfer.err or &struct i3c_hdr_cmd.err to get a better idea of * what went wrong. @@ -79,9 +79,6 @@ struct i3c_xfer { enum i3c_error_code err; }; -/* keep back compatible */ -#define i3c_priv_xfer i3c_xfer - /** * enum i3c_dcr - I3C DCR values * @I3C_DCR_GENERIC_DEVICE: generic I3C device @@ -308,15 +305,23 @@ static __always_inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, i3c_i2c_driver_unregister, \ __i2cdrv) +#if IS_ENABLED(CONFIG_I3C) int i3c_device_do_xfers(struct i3c_device *dev, struct i3c_xfer *xfers, int nxfers, enum i3c_xfer_mode mode); +u32 i3c_device_get_supported_xfer_mode(struct i3c_device *dev); +#else +static inline int +i3c_device_do_xfers(struct i3c_device *dev, struct i3c_xfer *xfers, + int nxfers, enum i3c_xfer_mode mode) +{ + return -EOPNOTSUPP; +} -static inline int i3c_device_do_priv_xfers(struct i3c_device *dev, - struct i3c_xfer *xfers, - int nxfers) +static inline u32 i3c_device_get_supported_xfer_mode(struct i3c_device *dev) { - return i3c_device_do_xfers(dev, xfers, nxfers, I3C_SDR); + return 0; } +#endif int i3c_device_do_setdasa(struct i3c_device *dev); @@ -358,6 +363,5 @@ int i3c_device_request_ibi(struct i3c_device *dev, void i3c_device_free_ibi(struct i3c_device *dev); int i3c_device_enable_ibi(struct i3c_device *dev); int i3c_device_disable_ibi(struct i3c_device *dev); -u32 i3c_device_get_supported_xfer_mode(struct i3c_device *dev); #endif /* I3C_DEV_H */ diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 58d01ed4cce7..592b646f6134 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -462,6 +462,8 @@ struct i3c_bus { * @enable_hotjoin: enable hot join event detect. * @disable_hotjoin: disable hot join event detect. * @set_speed: adjust I3C open drain mode timing. + * @set_dev_nack_retry: configure device NACK retry count for the master + * controller. */ struct i3c_master_controller_ops { int (*bus_init)(struct i3c_master_controller *master); @@ -491,6 +493,8 @@ struct i3c_master_controller_ops { int (*enable_hotjoin)(struct i3c_master_controller *master); int (*disable_hotjoin)(struct i3c_master_controller *master); int (*set_speed)(struct i3c_master_controller *master, enum i3c_open_drain_speed speed); + int (*set_dev_nack_retry)(struct i3c_master_controller *master, + unsigned long dev_nack_retry_cnt); }; /** @@ -505,6 +509,8 @@ struct i3c_master_controller_ops { * @secondary: true if the master is a secondary master * @init_done: true when the bus initialization is done * @hotjoin: true if the master support hotjoin + * @rpm_allowed: true if Runtime PM allowed + * @rpm_ibi_allowed: true if IBI and Hot-Join allowed while runtime suspended * @boardinfo.i3c: list of I3C boardinfo objects * @boardinfo.i2c: list of I2C boardinfo objects * @boardinfo: board-level information attached to devices connected on the bus @@ -514,6 +520,7 @@ struct i3c_master_controller_ops { * in a thread context. Typical examples are Hot Join processing which * requires taking the bus lock in maintenance, which in turn, can only * be done from a sleep-able context + * @dev_nack_retry_count: retry count when slave device nack * * A &struct i3c_master_controller has to be registered to the I3C subsystem * through i3c_master_register(). None of &struct i3c_master_controller fields @@ -528,12 +535,15 @@ struct i3c_master_controller { unsigned int secondary : 1; unsigned int init_done : 1; unsigned int hotjoin: 1; + unsigned int rpm_allowed: 1; + unsigned int rpm_ibi_allowed: 1; struct { struct list_head i3c; struct list_head i2c; } boardinfo; struct i3c_bus bus; struct workqueue_struct *wq; + unsigned int dev_nack_retry_count; }; /** @@ -595,6 +605,7 @@ int i3c_master_get_free_addr(struct i3c_master_controller *master, int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master, u8 addr); int i3c_master_do_daa(struct i3c_master_controller *master); +int i3c_master_do_daa_ext(struct i3c_master_controller *master, bool rstdaa); struct i3c_dma *i3c_master_dma_map_single(struct device *dev, void *ptr, size_t len, bool force_bounce, enum dma_data_direction dir); diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h index 92045d18cbfc..28776ee28d8e 100644 --- a/include/linux/init_syscalls.h +++ b/include/linux/init_syscalls.h @@ -17,3 +17,4 @@ int __init init_mkdir(const char *pathname, umode_t mode); int __init init_rmdir(const char *pathname); int __init init_utimes(char *filename, struct timespec64 *ts); int __init init_dup(struct file *file); +int __init init_pivot_root(const char *new_root, const char *put_old); diff --git a/include/linux/initrd.h b/include/linux/initrd.h index f1a1f4c92ded..7e5d26c8136f 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -3,8 +3,6 @@ #ifndef __LINUX_INITRD_H #define __LINUX_INITRD_H -#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ - /* starting block # of image */ extern int rd_image_start; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 266f2b39213a..6cd26ffb0505 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -181,9 +181,8 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev_id); extern int __must_check -__request_percpu_irq(unsigned int irq, irq_handler_t handler, - unsigned long flags, const char *devname, - const cpumask_t *affinity, void __percpu *percpu_dev_id); +request_percpu_irq_affinity(unsigned int irq, irq_handler_t handler, const char *devname, + const cpumask_t *affinity, void __percpu *percpu_dev_id); extern int __must_check request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags, @@ -193,17 +192,8 @@ static inline int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id) { - return __request_percpu_irq(irq, handler, 0, - devname, NULL, percpu_dev_id); -} - -static inline int __must_check -request_percpu_irq_affinity(unsigned int irq, irq_handler_t handler, - const char *devname, const cpumask_t *affinity, - void __percpu *percpu_dev_id) -{ - return __request_percpu_irq(irq, handler, 0, - devname, affinity, percpu_dev_id); + return request_percpu_irq_affinity(irq, handler, devname, + NULL, percpu_dev_id); } extern int __must_check @@ -228,7 +218,7 @@ static inline int __must_check devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { - return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags, + return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags | IRQF_COND_ONESHOT, devname, dev_id); } @@ -871,12 +861,6 @@ static inline void init_irq_proc(void) } #endif -#ifdef CONFIG_IRQ_TIMINGS -void irq_timings_enable(void); -void irq_timings_disable(void); -u64 irq_timings_next_event(u64 now); -#endif - struct seq_file; int show_interrupts(struct seq_file *p, void *v); int arch_show_interrupts(struct seq_file *p, int prec); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 85fe4e6b275c..d1aa4edfc2a5 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -12,6 +12,7 @@ void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); const char *io_uring_get_opcode(u8 opcode); bool io_is_uring_fops(struct file *file); +int __io_uring_fork(struct task_struct *tsk); static inline void io_uring_files_cancel(void) { @@ -25,9 +26,16 @@ static inline void io_uring_task_cancel(void) } static inline void io_uring_free(struct task_struct *tsk) { - if (tsk->io_uring) + if (tsk->io_uring || tsk->io_uring_restrict) __io_uring_free(tsk); } +static inline int io_uring_fork(struct task_struct *tsk) +{ + if (tsk->io_uring_restrict) + return __io_uring_fork(tsk); + + return 0; +} #else static inline void io_uring_task_cancel(void) { @@ -46,6 +54,10 @@ static inline bool io_is_uring_fops(struct file *file) { return false; } +static inline int io_uring_fork(struct task_struct *tsk) +{ + return 0; +} #endif #endif diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index a3e8ddc9b380..3e4a82a6f817 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -219,12 +219,26 @@ struct io_rings { struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; }; +struct io_bpf_filter; +struct io_bpf_filters { + refcount_t refs; /* ref for ->bpf_filters */ + spinlock_t lock; /* protects ->bpf_filters modifications */ + struct io_bpf_filter __rcu **filters; + struct rcu_head rcu_head; +}; + struct io_restriction { DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); DECLARE_BITMAP(sqe_op, IORING_OP_LAST); + struct io_bpf_filters *bpf_filters; + /* ->bpf_filters needs COW on modification */ + bool bpf_filters_cow; u8 sqe_flags_allowed; u8 sqe_flags_required; - bool registered; + /* IORING_OP_* restrictions exist */ + bool op_registered; + /* IORING_REGISTER_* restrictions exist */ + bool reg_registered; }; struct io_submit_link { @@ -259,7 +273,8 @@ struct io_ring_ctx { struct { unsigned int flags; unsigned int drain_next: 1; - unsigned int restricted: 1; + unsigned int op_restricted: 1; + unsigned int reg_restricted: 1; unsigned int off_timeout_used: 1; unsigned int drain_active: 1; unsigned int has_evfd: 1; @@ -274,6 +289,8 @@ struct io_ring_ctx { struct task_struct *submitter_task; struct io_rings *rings; + /* cache of ->restrictions.bpf_filters->filters */ + struct io_bpf_filter __rcu **bpf_filters; struct percpu_ref refs; clockid_t clockid; @@ -316,7 +333,7 @@ struct io_ring_ctx { * manipulate the list, hence no extra locking is needed there. */ bool poll_multi_queue; - struct io_wq_work_list iopoll_list; + struct list_head iopoll_list; struct io_file_table file_table; struct io_rsrc_data buf_table; @@ -444,6 +461,9 @@ struct io_ring_ctx { struct list_head defer_list; unsigned nr_drained; + /* protected by ->completion_lock */ + unsigned nr_req_allocated; + #ifdef CONFIG_NET_RX_BUSY_POLL struct list_head napi_list; /* track busy poll napi_id */ spinlock_t napi_lock; /* napi_list lock */ @@ -456,10 +476,6 @@ struct io_ring_ctx { DECLARE_HASHTABLE(napi_ht, 4); #endif - /* protected by ->completion_lock */ - unsigned evfd_last_cq_tail; - unsigned nr_req_allocated; - /* * Protection for resize vs mmap races - both the mmap and resize * side will need to grab this lock, to prevent either side from @@ -714,15 +730,21 @@ struct io_kiocb { atomic_t refs; bool cancel_seq_set; - struct io_task_work io_task_work; + + union { + struct io_task_work io_task_work; + /* For IOPOLL setup queues, with hybrid polling */ + u64 iopoll_start; + }; + union { /* * for polled requests, i.e. IORING_OP_POLL_ADD and async armed * poll */ struct hlist_node hash_node; - /* For IOPOLL setup queues, with hybrid polling */ - u64 iopoll_start; + /* IOPOLL completion handling */ + struct list_head iopoll_node; /* for private io_kiocb freeing */ struct rcu_head rcu_head; }; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6bb941707d12..99b7209dabd7 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -345,9 +345,9 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private); void iomap_read_folio(const struct iomap_ops *ops, - struct iomap_read_folio_ctx *ctx); + struct iomap_read_folio_ctx *ctx, void *private); void iomap_readahead(const struct iomap_ops *ops, - struct iomap_read_folio_ctx *ctx); + struct iomap_read_folio_ctx *ctx, void *private); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); @@ -566,6 +566,15 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_FSBLOCK_ALIGNED (1 << 3) +/* + * Bounce buffer instead of using zero copy access. + * + * This is needed if the device needs stable data to checksum or generate + * parity. The file system must hook into the I/O submission and offload + * completions to user context for reads when this is set. + */ +#define IOMAP_DIO_BOUNCE (1 << 4) + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); @@ -599,7 +608,7 @@ static inline void iomap_bio_read_folio(struct folio *folio, .cur_folio = folio, }; - iomap_read_folio(ops, &ctx); + iomap_read_folio(ops, &ctx, NULL); } static inline void iomap_bio_readahead(struct readahead_control *rac, @@ -610,7 +619,7 @@ static inline void iomap_bio_readahead(struct readahead_control *rac, .rac = rac, }; - iomap_readahead(ops, &ctx); + iomap_readahead(ops, &ctx, NULL); } #endif /* CONFIG_BLOCK */ diff --git a/include/linux/irq.h b/include/linux/irq.h index 4a9f1d7b08c3..951acbdb9f84 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -459,6 +459,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * checks against the supplied affinity mask are not * required. This is used for CPU hotplug where the * target CPU is not yet set in the cpu_online_mask. + * @irq_pre_redirect: Optional function to be invoked before redirecting + * an interrupt via irq_work. Called only on CONFIG_SMP. * @irq_retrigger: resend an IRQ to the CPU * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @irq_set_wake: enable/disable power-management wake-on of an IRQ @@ -503,6 +505,7 @@ struct irq_chip { void (*irq_eoi)(struct irq_data *data); int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); + void (*irq_pre_redirect)(struct irq_data *data); int (*irq_retrigger)(struct irq_data *data); int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); int (*irq_set_wake)(struct irq_data *data, unsigned int on); @@ -595,9 +598,6 @@ enum { #define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS -struct irqaction; -extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); - #ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE extern void irq_cpu_online(void); extern void irq_cpu_offline(void); @@ -658,7 +658,7 @@ extern void handle_fasteoi_nmi(struct irq_desc *desc); extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); extern int irq_chip_pm_get(struct irq_data *data); -extern int irq_chip_pm_put(struct irq_data *data); +extern void irq_chip_pm_put(struct irq_data *data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY extern void handle_fasteoi_ack_irq(struct irq_desc *desc); extern void handle_fasteoi_mask_irq(struct irq_desc *desc); @@ -687,6 +687,13 @@ extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); extern int irq_chip_request_resources_parent(struct irq_data *data); extern void irq_chip_release_resources_parent(struct irq_data *data); +#ifdef CONFIG_SMP +void irq_chip_pre_redirect_parent(struct irq_data *data); +#endif +#endif + +#ifdef CONFIG_SMP +int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force); #endif /* Disable or mask interrupts during a kernel kexec */ diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index 68ddcdb1cec5..3da1ad80fc9d 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -265,6 +265,12 @@ #define GICV5_IWB_WENABLE_STATUSR_IDLE BIT(0) +#define GICV5_GSI_IC_TYPE GENMASK(31, 29) +#define GICV5_GSI_IWB_TYPE 0x7 + +#define GICV5_GSI_IWB_FRAME_ID GENMASK(28, 16) +#define GICV5_GSI_IWB_WIRE GENMASK(15, 0) + /* * Global Data structures and functions */ @@ -344,6 +350,7 @@ void __init gicv5_init_lpi_domain(void); void __init gicv5_free_lpi_domain(void); int gicv5_irs_of_probe(struct device_node *parent); +int gicv5_irs_acpi_probe(void); void gicv5_irs_remove(void); int gicv5_irs_enable(void); void gicv5_irs_its_probe(void); @@ -391,4 +398,5 @@ int gicv5_alloc_lpi(void); void gicv5_free_lpi(u32 lpi); void __init gicv5_its_of_probe(struct device_node *parent); +void __init gicv5_its_acpi_probe(void); #endif diff --git a/include/linux/irqchip/irq-renesas-rzt2h.h b/include/linux/irqchip/irq-renesas-rzt2h.h new file mode 100644 index 000000000000..853fd5ee0b22 --- /dev/null +++ b/include/linux/irqchip/irq-renesas-rzt2h.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Renesas RZ/T2H Interrupt Control Unit (ICU) + * + * Copyright (C) 2025 Renesas Electronics Corporation. + */ + +#ifndef __LINUX_IRQ_RENESAS_RZT2H +#define __LINUX_IRQ_RENESAS_RZT2H + +#include <linux/platform_device.h> + +#define RZT2H_ICU_DMAC_REQ_NO_DEFAULT 0x3ff + +#ifdef CONFIG_RENESAS_RZT2H_ICU +void rzt2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, u8 dmac_channel, + u16 req_no); +#else +static inline void rzt2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, + u8 dmac_channel, u16 req_no) { } +#endif + +#endif /* __LINUX_IRQ_RENESAS_RZT2H */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 17902861de76..dae9a9b93665 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -2,9 +2,10 @@ #ifndef _LINUX_IRQDESC_H #define _LINUX_IRQDESC_H -#include <linux/rcupdate.h> +#include <linux/irq_work.h> #include <linux/kobject.h> #include <linux/mutex.h> +#include <linux/rcupdate.h> /* * Core internal functions to deal with irq descriptors @@ -30,6 +31,17 @@ struct irqstat { }; /** + * struct irq_redirect - interrupt redirection metadata + * @work: Harg irq_work item for handler execution on a different CPU + * @target_cpu: CPU to run irq handler on in case the current CPU is not part + * of the irq affinity mask + */ +struct irq_redirect { + struct irq_work work; + unsigned int target_cpu; +}; + +/** * struct irq_desc - interrupt descriptor * @irq_common_data: per irq and chip data passed down to chip functions * @kstat_irqs: irq stats per cpu @@ -46,6 +58,7 @@ struct irqstat { * @threads_handled: stats field for deferred spurious detection of threaded handlers * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers * @lock: locking for SMP + * @redirect: Facility for redirecting interrupts via irq_work * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes * @pending_mask: pending rebalanced interrupts @@ -83,6 +96,7 @@ struct irq_desc { raw_spinlock_t lock; struct cpumask *percpu_enabled; #ifdef CONFIG_SMP + struct irq_redirect redirect; const struct cpumask *affinity_hint; struct irq_affinity_notify *affinity_notify; #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -185,6 +199,7 @@ int generic_handle_irq_safe(unsigned int irq); int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq); +bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); #endif /* Test to see if a driver has successfully requested an irq */ diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 62f81bbeb490..73c25d40846c 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -257,7 +257,8 @@ static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device #ifdef CONFIG_IRQ_DOMAIN struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, - const char *name, phys_addr_t *pa); + const char *name, phys_addr_t *pa, + struct fwnode_handle *parent); enum { IRQCHIP_FWNODE_REAL, @@ -267,18 +268,39 @@ enum { static inline struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) { - return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL); + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL, NULL); +} + +static inline +struct fwnode_handle *irq_domain_alloc_named_parented_fwnode(const char *name, + struct fwnode_handle *parent) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL, parent); } static inline struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name, - NULL); + NULL, NULL); +} + +static inline +struct fwnode_handle *irq_domain_alloc_named_id_parented_fwnode(const char *name, int id, + struct fwnode_handle *parent) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name, + NULL, parent); } static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) { - return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, pa); + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, pa, NULL); +} + +static inline struct fwnode_handle *irq_domain_alloc_parented_fwnode(phys_addr_t *pa, + struct fwnode_handle *parent) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, pa, parent); } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f5eaf76198f3..a53a00d36228 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1815,7 +1815,4 @@ static inline int jbd2_handle_buffer_credits(handle_t *handle) #endif /* __KERNEL__ */ -#define EFSBADCRC EBADMSG /* Bad CRC detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* _LINUX_JBD2_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9c6ac4b62eb9..338a1921a50a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -641,6 +641,17 @@ kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, __kasan_unpoison_vmap_areas(vms, nr_vms, flags); } +void __kasan_vrealloc(const void *start, unsigned long old_size, + unsigned long new_size); + +static __always_inline void kasan_vrealloc(const void *start, + unsigned long old_size, + unsigned long new_size) +{ + if (kasan_enabled()) + __kasan_vrealloc(start, old_size, new_size); +} + #else /* CONFIG_KASAN_VMALLOC */ static inline void kasan_populate_early_vm_area_shadow(void *start, @@ -670,6 +681,9 @@ kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, kasan_vmalloc_flags_t flags) { } +static inline void kasan_vrealloc(const void *start, unsigned long old_size, + unsigned long new_size) { } + #endif /* CONFIG_KASAN_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ diff --git a/include/linux/kref.h b/include/linux/kref.h index 88e82ab1367c..9bc6abe57572 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -81,6 +81,7 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *mutex) + __cond_acquires(true, mutex) { if (refcount_dec_and_mutex_lock(&kref->refcount, mutex)) { release(kref); @@ -102,6 +103,7 @@ static inline int kref_put_mutex(struct kref *kref, static inline int kref_put_lock(struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) + __cond_acquires(true, lock) { if (refcount_dec_and_lock(&kref->refcount, lock)) { release(kref); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8d27403888ce..c92c1149ee6e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -100,6 +100,7 @@ void kthread_unpark(struct task_struct *k); void kthread_parkme(void); void kthread_exit(long result) __noreturn; void kthread_complete_and_exit(struct completion *, long) __noreturn; +int kthreads_update_housekeeping(void); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index ae1b541446c9..df9eebe6afca 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -144,11 +144,13 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) } static inline void hlist_bl_lock(struct hlist_bl_head *b) + __acquires(__bitlock(0, b)) { bit_spin_lock(0, (unsigned long *)b); } static inline void hlist_bl_unlock(struct hlist_bl_head *b) + __releases(__bitlock(0, b)) { __bit_spin_unlock(0, (unsigned long *)b); } diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 772919e8096a..ba9e3988c07c 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -175,6 +175,9 @@ int klp_enable_patch(struct klp_patch *); int klp_module_coming(struct module *mod); void klp_module_going(struct module *mod); +void *klp_find_section_by_name(const struct module *mod, const char *name, + size_t *sec_size); + void klp_copy_process(struct task_struct *child); void klp_update_patch_state(struct task_struct *task); diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index b0e6ab329b00..b8830148a859 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -14,13 +14,13 @@ * local_lock - Acquire a per CPU local lock * @lock: The lock variable */ -#define local_lock(lock) __local_lock(this_cpu_ptr(lock)) +#define local_lock(lock) __local_lock(__this_cpu_local_lock(lock)) /** * local_lock_irq - Acquire a per CPU local lock and disable interrupts * @lock: The lock variable */ -#define local_lock_irq(lock) __local_lock_irq(this_cpu_ptr(lock)) +#define local_lock_irq(lock) __local_lock_irq(__this_cpu_local_lock(lock)) /** * local_lock_irqsave - Acquire a per CPU local lock, save and disable @@ -29,19 +29,19 @@ * @flags: Storage for interrupt flags */ #define local_lock_irqsave(lock, flags) \ - __local_lock_irqsave(this_cpu_ptr(lock), flags) + __local_lock_irqsave(__this_cpu_local_lock(lock), flags) /** * local_unlock - Release a per CPU local lock * @lock: The lock variable */ -#define local_unlock(lock) __local_unlock(this_cpu_ptr(lock)) +#define local_unlock(lock) __local_unlock(__this_cpu_local_lock(lock)) /** * local_unlock_irq - Release a per CPU local lock and enable interrupts * @lock: The lock variable */ -#define local_unlock_irq(lock) __local_unlock_irq(this_cpu_ptr(lock)) +#define local_unlock_irq(lock) __local_unlock_irq(__this_cpu_local_lock(lock)) /** * local_unlock_irqrestore - Release a per CPU local lock and restore @@ -50,7 +50,7 @@ * @flags: Interrupt flags to restore */ #define local_unlock_irqrestore(lock, flags) \ - __local_unlock_irqrestore(this_cpu_ptr(lock), flags) + __local_unlock_irqrestore(__this_cpu_local_lock(lock), flags) /** * local_trylock_init - Runtime initialize a lock instance @@ -66,7 +66,7 @@ * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define local_trylock(lock) __local_trylock(this_cpu_ptr(lock)) +#define local_trylock(lock) __local_trylock(__this_cpu_local_lock(lock)) #define local_lock_is_locked(lock) __local_lock_is_locked(lock) @@ -81,27 +81,44 @@ * HARDIRQ context on PREEMPT_RT. */ #define local_trylock_irqsave(lock, flags) \ - __local_trylock_irqsave(this_cpu_ptr(lock), flags) - -DEFINE_GUARD(local_lock, local_lock_t __percpu*, - local_lock(_T), - local_unlock(_T)) -DEFINE_GUARD(local_lock_irq, local_lock_t __percpu*, - local_lock_irq(_T), - local_unlock_irq(_T)) + __local_trylock_irqsave(__this_cpu_local_lock(lock), flags) + +DEFINE_LOCK_GUARD_1(local_lock, local_lock_t __percpu, + local_lock(_T->lock), + local_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1(local_lock_irq, local_lock_t __percpu, + local_lock_irq(_T->lock), + local_unlock_irq(_T->lock)) DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu, local_lock_irqsave(_T->lock, _T->flags), local_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) #define local_lock_nested_bh(_lock) \ - __local_lock_nested_bh(this_cpu_ptr(_lock)) + __local_lock_nested_bh(__this_cpu_local_lock(_lock)) #define local_unlock_nested_bh(_lock) \ - __local_unlock_nested_bh(this_cpu_ptr(_lock)) - -DEFINE_GUARD(local_lock_nested_bh, local_lock_t __percpu*, - local_lock_nested_bh(_T), - local_unlock_nested_bh(_T)) + __local_unlock_nested_bh(__this_cpu_local_lock(_lock)) + +DEFINE_LOCK_GUARD_1(local_lock_nested_bh, local_lock_t __percpu, + local_lock_nested_bh(_T->lock), + local_unlock_nested_bh(_T->lock)) + +DEFINE_LOCK_GUARD_1(local_lock_init, local_lock_t, local_lock_init(_T->lock), /* */) + +DECLARE_LOCK_GUARD_1_ATTRS(local_lock, __acquires(_T), __releases(*(local_lock_t __percpu **)_T)) +#define class_local_lock_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(local_lock, _T) +DECLARE_LOCK_GUARD_1_ATTRS(local_lock_irq, __acquires(_T), __releases(*(local_lock_t __percpu **)_T)) +#define class_local_lock_irq_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(local_lock_irq, _T) +DECLARE_LOCK_GUARD_1_ATTRS(local_lock_irqsave, __acquires(_T), __releases(*(local_lock_t __percpu **)_T)) +#define class_local_lock_irqsave_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(local_lock_irqsave, _T) +DECLARE_LOCK_GUARD_1_ATTRS(local_lock_nested_bh, __acquires(_T), __releases(*(local_lock_t __percpu **)_T)) +#define class_local_lock_nested_bh_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(local_lock_nested_bh, _T) +DECLARE_LOCK_GUARD_1_ATTRS(local_lock_init, __acquires(_T), __releases(*(local_lock_t **)_T)) +#define class_local_lock_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(local_lock_init, _T) + +DEFINE_LOCK_GUARD_1(local_trylock_init, local_trylock_t, local_trylock_init(_T->lock), /* */) +DECLARE_LOCK_GUARD_1_ATTRS(local_trylock_init, __acquires(_T), __releases(*(local_trylock_t **)_T)) +#define class_local_trylock_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(local_trylock_init, _T) #endif diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 8f82b4eb542f..eff711bf973f 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -4,25 +4,30 @@ #endif #include <linux/percpu-defs.h> +#include <linux/irqflags.h> #include <linux/lockdep.h> +#include <linux/debug_locks.h> +#include <asm/current.h> #ifndef CONFIG_PREEMPT_RT -typedef struct { +context_lock_struct(local_lock) { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; struct task_struct *owner; #endif -} local_lock_t; +}; +typedef struct local_lock local_lock_t; /* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ -typedef struct { +context_lock_struct(local_trylock) { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; struct task_struct *owner; #endif u8 acquired; -} local_trylock_t; +}; +typedef struct local_trylock local_trylock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ @@ -84,7 +89,10 @@ do { \ local_lock_debug_init(lock); \ } while (0) -#define __local_trylock_init(lock) __local_lock_init((local_lock_t *)lock) +#define __local_trylock_init(lock) \ +do { \ + __local_lock_init((local_lock_t *)lock); \ +} while (0) #define __spinlock_nested_bh_init(lock) \ do { \ @@ -117,22 +125,25 @@ do { \ do { \ preempt_disable(); \ __local_lock_acquire(lock); \ + __acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ __local_lock_acquire(lock); \ + __acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ __local_lock_acquire(lock); \ + __acquire(lock); \ } while (0) #define __local_trylock(lock) \ - ({ \ + __try_acquire_ctx_lock(lock, ({ \ local_trylock_t *__tl; \ \ preempt_disable(); \ @@ -146,10 +157,10 @@ do { \ (local_lock_t *)__tl); \ } \ !!__tl; \ - }) + })) #define __local_trylock_irqsave(lock, flags) \ - ({ \ + __try_acquire_ctx_lock(lock, ({ \ local_trylock_t *__tl; \ \ local_irq_save(flags); \ @@ -163,7 +174,7 @@ do { \ (local_lock_t *)__tl); \ } \ !!__tl; \ - }) + })) /* preemption or migration must be disabled before calling __local_lock_is_locked */ #define __local_lock_is_locked(lock) READ_ONCE(this_cpu_ptr(lock)->acquired) @@ -186,18 +197,21 @@ do { \ #define __local_unlock(lock) \ do { \ + __release(lock); \ __local_lock_release(lock); \ preempt_enable(); \ } while (0) #define __local_unlock_irq(lock) \ do { \ + __release(lock); \ __local_lock_release(lock); \ local_irq_enable(); \ } while (0) #define __local_unlock_irqrestore(lock, flags) \ do { \ + __release(lock); \ __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) @@ -206,13 +220,20 @@ do { \ do { \ lockdep_assert_in_softirq(); \ local_lock_acquire((lock)); \ + __acquire(lock); \ } while (0) #define __local_unlock_nested_bh(lock) \ - local_lock_release((lock)) + do { \ + __release(lock); \ + local_lock_release((lock)); \ + } while (0) #else /* !CONFIG_PREEMPT_RT */ +#include <linux/sched.h> +#include <linux/spinlock.h> + /* * On PREEMPT_RT local_lock maps to a per CPU spinlock, which protects the * critical section while staying preemptible. @@ -267,7 +288,7 @@ do { \ } while (0) #define __local_trylock(lock) \ - ({ \ + __try_acquire_ctx_lock(lock, context_unsafe(({ \ int __locked; \ \ if (in_nmi() | in_hardirq()) { \ @@ -279,17 +300,40 @@ do { \ migrate_enable(); \ } \ __locked; \ - }) + }))) #define __local_trylock_irqsave(lock, flags) \ - ({ \ + __try_acquire_ctx_lock(lock, ({ \ typecheck(unsigned long, flags); \ flags = 0; \ __local_trylock(lock); \ - }) + })) /* migration must be disabled before calling __local_lock_is_locked */ #define __local_lock_is_locked(__lock) \ (rt_mutex_owner(&this_cpu_ptr(__lock)->lock) == current) #endif /* CONFIG_PREEMPT_RT */ + +#if defined(WARN_CONTEXT_ANALYSIS) +/* + * Because the compiler only knows about the base per-CPU variable, use this + * helper function to make the compiler think we lock/unlock the @base variable, + * and hide the fact we actually pass the per-CPU instance to lock/unlock + * functions. + */ +static __always_inline local_lock_t *__this_cpu_local_lock(local_lock_t __percpu *base) + __returns_ctx_lock(base) __attribute__((overloadable)) +{ + return this_cpu_ptr(base); +} +#ifndef CONFIG_PREEMPT_RT +static __always_inline local_trylock_t *__this_cpu_local_lock(local_trylock_t __percpu *base) + __returns_ctx_lock(base) __attribute__((overloadable)) +{ + return this_cpu_ptr(base); +} +#endif /* CONFIG_PREEMPT_RT */ +#else /* WARN_CONTEXT_ANALYSIS */ +#define __this_cpu_local_lock(base) this_cpu_ptr(base) +#endif /* WARN_CONTEXT_ANALYSIS */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index dd634103b014..621566345406 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -282,16 +282,16 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); do { WARN_ON_ONCE(debug_locks && !(cond)); } while (0) #define lockdep_assert_held(l) \ - lockdep_assert(lockdep_is_held(l) != LOCK_STATE_NOT_HELD) + do { lockdep_assert(lockdep_is_held(l) != LOCK_STATE_NOT_HELD); __assume_ctx_lock(l); } while (0) #define lockdep_assert_not_held(l) \ lockdep_assert(lockdep_is_held(l) != LOCK_STATE_HELD) #define lockdep_assert_held_write(l) \ - lockdep_assert(lockdep_is_held_type(l, 0)) + do { lockdep_assert(lockdep_is_held_type(l, 0)); __assume_ctx_lock(l); } while (0) #define lockdep_assert_held_read(l) \ - lockdep_assert(lockdep_is_held_type(l, 1)) + do { lockdep_assert(lockdep_is_held_type(l, 1)); __assume_shared_ctx_lock(l); } while (0) #define lockdep_assert_held_once(l) \ lockdep_assert_once(lockdep_is_held(l) != LOCK_STATE_NOT_HELD) @@ -389,10 +389,10 @@ extern int lockdep_is_held(const void *); #define lockdep_assert(c) do { } while (0) #define lockdep_assert_once(c) do { } while (0) -#define lockdep_assert_held(l) do { (void)(l); } while (0) +#define lockdep_assert_held(l) __assume_ctx_lock(l) #define lockdep_assert_not_held(l) do { (void)(l); } while (0) -#define lockdep_assert_held_write(l) do { (void)(l); } while (0) -#define lockdep_assert_held_read(l) do { (void)(l); } while (0) +#define lockdep_assert_held_write(l) __assume_ctx_lock(l) +#define lockdep_assert_held_read(l) __assume_shared_ctx_lock(l) #define lockdep_assert_held_once(l) do { (void)(l); } while (0) #define lockdep_assert_none_held_once() do { } while (0) diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 815d871fadfc..6ded24cdb4a8 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -49,9 +49,7 @@ static inline void lockref_init(struct lockref *lockref) void lockref_get(struct lockref *lockref); int lockref_put_return(struct lockref *lockref); bool lockref_get_not_zero(struct lockref *lockref); -bool lockref_put_or_lock(struct lockref *lockref); -#define lockref_put_or_lock(_lockref) \ - (!__cond_lock((_lockref)->lock, !lockref_put_or_lock(_lockref))) +bool lockref_put_or_lock(struct lockref *lockref) __cond_acquires(false, &lockref->lock); void lockref_mark_dead(struct lockref *lockref); bool lockref_get_not_dead(struct lockref *lockref); diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index b92008641242..d48bf0ad26f4 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -73,7 +73,7 @@ struct lsm_static_calls_table { /** * struct lsm_id - Identify a Linux Security Module. - * @lsm: name of the LSM, must be approved by the LSM maintainers + * @name: name of the LSM, must be approved by the LSM maintainers * @id: LSM ID number from uapi/linux/lsm.h * * Contains the information that identifies the LSM. @@ -164,7 +164,7 @@ enum lsm_order { * @initcall_core: LSM callback for core_initcall() setup, optional * @initcall_subsys: LSM callback for subsys_initcall() setup, optional * @initcall_fs: LSM callback for fs_initcall setup, optional - * @nitcall_device: LSM callback for device_initcall() setup, optional + * @initcall_device: LSM callback for device_initcall() setup, optional * @initcall_late: LSM callback for late_initcall() setup, optional */ struct lsm_info { diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index e1555e06e7e5..07c1bfbdb8c4 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -70,14 +70,33 @@ struct cmdq_cb_data { struct cmdq_pkt *pkt; }; +struct cmdq_mbox_priv { + u8 shift_pa; + dma_addr_t mminfra_offset; +}; + struct cmdq_pkt { void *va_base; dma_addr_t pa_base; size_t cmd_buf_size; /* command occupied size */ size_t buf_size; /* real buffer size */ + struct cmdq_mbox_priv priv; /* for generating instruction */ }; /** + * cmdq_get_mbox_priv() - get the private data of mailbox channel + * @chan: mailbox channel + * @priv: pointer to store the private data of mailbox channel + * + * While generating the GCE instruction to command buffer, the private data + * of GCE hardware may need to be referenced, such as the shift bits of + * physical address. + * + * This function should be called before generating the GCE instruction. + */ +void cmdq_get_mbox_priv(struct mbox_chan *chan, struct cmdq_mbox_priv *priv); + +/** * cmdq_get_shift_pa() - get the shift bits of physical address * @chan: mailbox channel * diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0651865a4564..67f154de10bc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -949,7 +949,11 @@ static inline void mod_memcg_page_state(struct page *page, rcu_read_unlock(); } +unsigned long memcg_events(struct mem_cgroup *memcg, int event); unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx); +unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item); +bool memcg_stat_item_valid(int idx); +bool memcg_vm_event_item_valid(enum vm_event_item idx); unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx); unsigned long lruvec_page_state_local(struct lruvec *lruvec, enum node_stat_item idx); @@ -1037,6 +1041,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm) return id; } +void mem_cgroup_flush_workqueue(void); + extern int mem_cgroup_init(void); #else /* CONFIG_MEMCG */ @@ -1373,6 +1379,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) return 0; } +static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item) +{ + return 0; +} + +static inline bool memcg_stat_item_valid(int idx) +{ + return false; +} + +static inline bool memcg_vm_event_item_valid(enum vm_event_item idx) +{ + return false; +} + static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { @@ -1436,6 +1457,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm) return 0; } +static inline void mem_cgroup_flush_workqueue(void) { } + static inline int mem_cgroup_init(void) { return 0; } #endif /* CONFIG_MEMCG */ diff --git a/include/linux/memfd.h b/include/linux/memfd.h index cc74de3dbcfe..c328a7b356d0 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -17,6 +17,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); * to by vm_flags_ptr. */ int memfd_check_seals_mmap(struct file *file, vm_flags_t *vm_flags_ptr); +struct file *memfd_alloc_file(const char *name, unsigned int flags); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { @@ -31,6 +32,11 @@ static inline int memfd_check_seals_mmap(struct file *file, { return 0; } + +static inline struct file *memfd_alloc_file(const char *name, unsigned int flags) +{ + return ERR_PTR(-EINVAL); +} #endif #endif /* __LINUX_MEMFD_H */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 713ec0435b48..e3c2ccf872a8 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -224,7 +224,8 @@ static inline bool is_fsdax_page(const struct page *page) } #ifdef CONFIG_ZONE_DEVICE -void zone_device_page_init(struct page *page, unsigned int order); +void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap, + unsigned int order); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); @@ -234,9 +235,11 @@ bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); -static inline void zone_device_folio_init(struct folio *folio, unsigned int order) +static inline void zone_device_folio_init(struct folio *folio, + struct dev_pagemap *pgmap, + unsigned int order) { - zone_device_page_init(&folio->page, order); + zone_device_page_init(&folio->page, pgmap, order); if (order) folio_set_large_rmappable(folio); } diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 5f70d3b5d1b1..097ef4dfcdac 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -667,7 +667,7 @@ static inline int wm8350_register_irq(struct wm8350 *wm8350, int irq, return -ENODEV; return request_threaded_irq(irq + wm8350->irq_base, NULL, - handler, flags, name, data); + handler, flags | IRQF_ONESHOT, name, data); } static inline void wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data) diff --git a/include/linux/mm.h b/include/linux/mm.h index f0d5be9dc736..73d3500d388e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2979,15 +2979,8 @@ static inline pud_t pud_mkspecial(pud_t pud) } #endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ -extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl); -static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl) -{ - pte_t *ptep; - __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl)); - return ptep; -} +extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl); #ifdef __PAGETABLE_P4D_FOLDED static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, @@ -3341,31 +3334,15 @@ static inline bool pagetable_pte_ctor(struct mm_struct *mm, return true; } -pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); -static inline pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, - pmd_t *pmdvalp) -{ - pte_t *pte; +pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); - __cond_lock(RCU, pte = ___pte_offset_map(pmd, addr, pmdvalp)); - return pte; -} static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr) { return __pte_offset_map(pmd, addr, NULL); } -pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp); -static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp) -{ - pte_t *pte; - - __cond_lock(RCU, __cond_lock(*ptlp, - pte = __pte_offset_map_lock(mm, pmd, addr, ptlp))); - return pte; -} +pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, spinlock_t **ptlp); pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, spinlock_t **ptlp); diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index ac01dc4eb2ce..ed3dd0f3fe19 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -24,7 +24,7 @@ static inline void leave_mm(void) { } #ifndef task_cpu_possible_mask # define task_cpu_possible_mask(p) cpu_possible_mask # define task_cpu_possible(cpu, p) true -# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_TICK) +# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_DOMAIN) #else # define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p)) #endif diff --git a/include/linux/module.h b/include/linux/module.h index d80c3ea57472..20ddfd97630d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -151,16 +151,10 @@ extern void cleanup_module(void); #define __init_or_module #define __initdata_or_module #define __initconst_or_module -#define __INIT_OR_MODULE .text -#define __INITDATA_OR_MODULE .data -#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits #else #define __init_or_module __init #define __initdata_or_module __initdata #define __initconst_or_module __initconst -#define __INIT_OR_MODULE __INIT -#define __INITDATA_OR_MODULE __INITDATA -#define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ struct module_kobject *lookup_or_create_module_kobject(const char *name); @@ -770,8 +764,6 @@ static inline bool is_livepatch_module(struct module *mod) #endif } -void set_module_sig_enforced(void); - void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data); #else /* !CONFIG_MODULES... */ @@ -866,10 +858,6 @@ static inline bool module_requested_async_probing(struct module *module) } -static inline void set_module_sig_enforced(void) -{ -} - /* Dereference module function descriptor */ static inline void *dereference_module_function_descriptor(struct module *mod, void *ptr) @@ -925,6 +913,8 @@ static inline bool retpoline_module_ok(bool has_retpoline) #ifdef CONFIG_MODULE_SIG bool is_module_sig_enforced(void); +void set_module_sig_enforced(void); + static inline bool module_sig_ok(struct module *module) { return module->sig_ok; @@ -935,6 +925,10 @@ static inline bool is_module_sig_enforced(void) return false; } +static inline void set_module_sig_enforced(void) +{ +} + static inline bool module_sig_ok(struct module *module) { return true; diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 915f32f7d888..c03db3c2fd40 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -355,8 +355,8 @@ static inline void kernel_param_unlock(struct module *mod) /** * __core_param_cb - similar like core_param, with a set/get ops instead of type. * @name: the name of the cmdline and sysfs parameter (often the same as var) - * @var: the variable * @ops: the set & get operations for this parameter. + * @arg: the variable * @perm: visibility in sysfs * * Ideally this should be called 'core_param_cb', but the name has been @@ -390,7 +390,7 @@ static inline void kernel_param_unlock(struct module *mod) * @name1: parameter name 1 * @name2: parameter name 2 * - * Returns true if the two parameter names are equal. + * Returns: true if the two parameter names are equal. * Dashes (-) are considered equal to underscores (_). */ extern bool parameq(const char *name1, const char *name2); @@ -402,6 +402,10 @@ extern bool parameq(const char *name1, const char *name2); * @n: the length to compare * * Similar to parameq(), except it compares @n characters. + * + * Returns: true if the first @n characters of the two parameter names + * are equal. + * Dashes (-) are considered equal to underscores (_). */ extern bool parameqn(const char *name1, const char *name2, size_t n); diff --git a/include/linux/msi.h b/include/linux/msi.h index 8003e3218c46..fa41eed62868 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -49,12 +49,12 @@ typedef struct arch_msi_msg_data { #endif /** - * msi_msg - Representation of a MSI message + * struct msi_msg - Representation of a MSI message * @address_lo: Low 32 bits of msi message address - * @arch_addrlo: Architecture specific shadow of @address_lo + * @arch_addr_lo: Architecture specific shadow of @address_lo * @address_hi: High 32 bits of msi message address * (only used when device supports it) - * @arch_addrhi: Architecture specific shadow of @address_hi + * @arch_addr_hi: Architecture specific shadow of @address_hi * @data: MSI message data (usually 16 bits) * @arch_data: Architecture specific shadow of @data */ @@ -91,7 +91,7 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, struct msi_msg *msg); /** - * pci_msi_desc - PCI/MSI specific MSI descriptor data + * struct pci_msi_desc - PCI/MSI specific MSI descriptor data * * @msi_mask: [PCI MSI] MSI cached mask bits * @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits @@ -101,6 +101,7 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, * @can_mask: [PCI MSI/X] Masking supported? * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq + * @msi_attrib: [PCI MSI/X] Compound struct of MSI/X attributes * @mask_pos: [PCI MSI] Mask register position * @mask_base: [PCI MSI-X] Mask register base address */ @@ -169,7 +170,7 @@ struct msi_desc_data { * Only used if iommu_msi_shift != 0 * @iommu_msi_shift: Indicates how many bits of the original address should be * preserved when using iommu_msi_iova. - * @sysfs_attr: Pointer to sysfs device attribute + * @sysfs_attrs: Pointer to sysfs device attribute * * @write_msi_msg: Callback that may be called when the MSI message * address or data changes @@ -220,7 +221,7 @@ enum msi_desc_filter { /** * struct msi_dev_domain - The internals of MSI domain info per device * @store: Xarray for storing MSI descriptor pointers - * @irqdomain: Pointer to a per device interrupt domain + * @domain: Pointer to a per device interrupt domain */ struct msi_dev_domain { struct xarray store; @@ -702,7 +703,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); -u32 pci_msi_map_rid_ctlr_node(struct pci_dev *pdev, struct device_node **node); +u32 pci_msi_map_rid_ctlr_node(struct irq_domain *domain, struct pci_dev *pdev, + struct fwnode_handle **node); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); void pci_msix_prepare_desc(struct irq_domain *domain, msi_alloc_info_t *arg, struct msi_desc *desc); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bf535f0118bb..ecaa0440f6ec 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -182,13 +182,13 @@ static inline int __must_check __devm_mutex_init(struct device *dev, struct mute * Also see Documentation/locking/mutex-design.rst. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); +extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass) __acquires(lock); extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, - unsigned int subclass); + unsigned int subclass) __cond_acquires(0, lock); extern int __must_check _mutex_lock_killable(struct mutex *lock, - unsigned int subclass, struct lockdep_map *nest_lock); -extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass); + unsigned int subclass, struct lockdep_map *nest_lock) __cond_acquires(0, lock); +extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) __acquires(lock); #define mutex_lock(lock) mutex_lock_nested(lock, 0) #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) @@ -211,10 +211,10 @@ do { \ _mutex_lock_killable(lock, subclass, NULL) #else -extern void mutex_lock(struct mutex *lock); -extern int __must_check mutex_lock_interruptible(struct mutex *lock); -extern int __must_check mutex_lock_killable(struct mutex *lock); -extern void mutex_lock_io(struct mutex *lock); +extern void mutex_lock(struct mutex *lock) __acquires(lock); +extern int __must_check mutex_lock_interruptible(struct mutex *lock) __cond_acquires(0, lock); +extern int __must_check mutex_lock_killable(struct mutex *lock) __cond_acquires(0, lock); +extern void mutex_lock_io(struct mutex *lock) __acquires(lock); # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) @@ -232,7 +232,7 @@ extern void mutex_lock_io(struct mutex *lock); */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); +extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock) __cond_acquires(true, lock); #define mutex_trylock_nest_lock(lock, nest_lock) \ ( \ @@ -242,17 +242,27 @@ extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest #define mutex_trylock(lock) _mutex_trylock_nest_lock(lock, NULL) #else -extern int mutex_trylock(struct mutex *lock); +extern int mutex_trylock(struct mutex *lock) __cond_acquires(true, lock); #define mutex_trylock_nest_lock(lock, nest_lock) mutex_trylock(lock) #endif -extern void mutex_unlock(struct mutex *lock); +extern void mutex_unlock(struct mutex *lock) __releases(lock); -extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) __cond_acquires(true, lock); -DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) -DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) -DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T), _RET == 0) +DEFINE_LOCK_GUARD_1(mutex, struct mutex, mutex_lock(_T->lock), mutex_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(mutex, _try, mutex_trylock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(mutex, _intr, mutex_lock_interruptible(_T->lock), _RET == 0) +DEFINE_LOCK_GUARD_1(mutex_init, struct mutex, mutex_init(_T->lock), /* */) + +DECLARE_LOCK_GUARD_1_ATTRS(mutex, __acquires(_T), __releases(*(struct mutex **)_T)) +#define class_mutex_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex, _T) +DECLARE_LOCK_GUARD_1_ATTRS(mutex_try, __acquires(_T), __releases(*(struct mutex **)_T)) +#define class_mutex_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_try, _T) +DECLARE_LOCK_GUARD_1_ATTRS(mutex_intr, __acquires(_T), __releases(*(struct mutex **)_T)) +#define class_mutex_intr_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_intr, _T) +DECLARE_LOCK_GUARD_1_ATTRS(mutex_init, __acquires(_T), __releases(*(struct mutex **)_T)) +#define class_mutex_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_init, _T) extern unsigned long mutex_get_owner(struct mutex *lock); diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h index fdf7f515fde8..80975935ec48 100644 --- a/include/linux/mutex_types.h +++ b/include/linux/mutex_types.h @@ -38,7 +38,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ -struct mutex { +context_lock_struct(mutex) { atomic_long_t owner; raw_spinlock_t wait_lock; #ifdef CONFIG_MUTEX_SPIN_ON_OWNER @@ -59,7 +59,7 @@ struct mutex { */ #include <linux/rtmutex.h> -struct mutex { +context_lock_struct(mutex) { struct rt_mutex_base rtmutex; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h index b332b019b29c..0014fbc1c626 100644 --- a/include/linux/ns/ns_common_types.h +++ b/include/linux/ns/ns_common_types.h @@ -108,11 +108,13 @@ extern const struct proc_ns_operations utsns_operations; * @ns_tree: namespace tree nodes and active reference count */ struct ns_common { + struct { + refcount_t __ns_ref; /* do not use directly */ + } ____cacheline_aligned_in_smp; u32 ns_type; struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; - refcount_t __ns_ref; /* do not use directly */ union { struct ns_tree; struct rcu_head ns_rcu; diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 4d103ac8f5c7..b8710c825d64 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -162,8 +162,7 @@ void nubus_seq_write_rsrc_mem(struct seq_file *m, unsigned char *nubus_dirptr(const struct nubus_dirent *nd); /* Declarations relating to driver model objects */ -int nubus_parent_device_register(void); -int nubus_device_register(struct nubus_board *board); +int nubus_device_register(struct device *parent, struct nubus_board *board); int nubus_driver_register(struct nubus_driver *ndrv); void nubus_driver_unregister(struct nubus_driver *ndrv); int nubus_proc_show(struct seq_file *m, void *data); diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 1c2bc0281807..2a64d8cecaae 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -11,6 +11,30 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *); +struct of_imap_parser { + struct device_node *node; + const __be32 *imap; + const __be32 *imap_end; + u32 parent_offset; +}; + +struct of_imap_item { + struct of_phandle_args parent_args; + u32 child_imap_count; + u32 child_imap[16]; /* Arbitrary size. + * Should be #address-cells + #interrupt-cells but + * avoid using allocation and so, expect that 16 + * should be enough + */ +}; + +/* + * If the iterator is exited prematurely (break, goto, return) of_node_put() has + * to be called on item.parent_args.np + */ +#define for_each_of_imap_item(parser, item) \ + for (; of_imap_parser_one(parser, item);) + /* * Workarounds only applied to 32bit powermac machines */ @@ -49,6 +73,11 @@ extern int of_irq_get_byname(struct device_node *dev, const char *name); extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); extern struct device_node *of_irq_find_parent(struct device_node *child); +extern int of_imap_parser_init(struct of_imap_parser *parser, + struct device_node *node, + struct of_imap_item *item); +extern struct of_imap_item *of_imap_parser_one(struct of_imap_parser *parser, + struct of_imap_item *item); extern struct irq_domain *of_msi_get_domain(struct device *dev, const struct device_node *np, enum irq_domain_bus_token token); @@ -92,7 +121,17 @@ static inline void *of_irq_find_parent(struct device_node *child) { return NULL; } - +static inline int of_imap_parser_init(struct of_imap_parser *parser, + struct device_node *node, + struct of_imap_item *item) +{ + return -ENOSYS; +} +static inline struct of_imap_item *of_imap_parser_one(struct of_imap_parser *parser, + struct of_imap_item *item) +{ + return NULL; +} static inline struct irq_domain *of_msi_get_domain(struct device *dev, struct device_node *np, enum irq_domain_bus_token token) diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 6de479ebbe5d..ebce402854de 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -145,6 +145,11 @@ enum OID { OID_id_rsassa_pkcs1_v1_5_with_sha3_384, /* 2.16.840.1.101.3.4.3.15 */ OID_id_rsassa_pkcs1_v1_5_with_sha3_512, /* 2.16.840.1.101.3.4.3.16 */ + /* NIST FIPS-204 ML-DSA */ + OID_id_ml_dsa_44, /* 2.16.840.1.101.3.4.3.17 */ + OID_id_ml_dsa_65, /* 2.16.840.1.101.3.4.3.18 */ + OID_id_ml_dsa_87, /* 2.16.840.1.101.3.4.3.19 */ + OID__NR }; diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 736f633b2d5f..6220a2000df8 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -552,4 +552,46 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) (__member_size((name)->array) / sizeof(*(name)->array) + \ __must_be_array((name)->array)) +/** + * typeof_flex_counter() - Return the type of the counter variable of a given + * flexible array member annotated by __counted_by(). + * @FAM: Instance of flexible array member within a given struct. + * + * Returns: "size_t" if no annotation exists. + */ +#define typeof_flex_counter(FAM) \ + typeof(_Generic(__flex_counter(FAM), \ + void *: (size_t)0, \ + default: *__flex_counter(FAM))) + +/** + * overflows_flex_counter_type() - Check if the counter associated with the + * given flexible array member can represent + * a value. + * @TYPE: Type of the struct that contains the @FAM. + * @FAM: Member name of the FAM within @TYPE. + * @COUNT: Value to check against the __counted_by annotated @FAM's counter. + * + * Returns: true if @COUNT can be represented in the @FAM's counter. When + * @FAM is not annotated with __counted_by(), always returns true. + */ +#define overflows_flex_counter_type(TYPE, FAM, COUNT) \ + (!overflows_type(COUNT, typeof_flex_counter(((TYPE *)NULL)->FAM))) + +/** + * __set_flex_counter() - Set the counter associated with the given flexible + * array member that has been annoated by __counted_by(). + * @FAM: Instance of flexible array member within a given struct. + * @COUNT: Value to store to the __counted_by annotated @FAM_PTR's counter. + * + * This is a no-op if no annotation exists. Count needs to be checked with + * overflows_flex_counter_type() before using this function. + */ +#define __set_flex_counter(FAM, COUNT) \ +({ \ + *_Generic(__flex_counter(FAM), \ + void *: &(size_t){ 0 }, \ + default: __flex_counter(FAM)) = (COUNT); \ +}) + #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 078225b514d4..c0c54baadf04 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -12,7 +12,8 @@ #include <linux/acpi.h> #ifdef CONFIG_ACPI -extern acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev); +extern acpi_status pci_acpi_add_root_pm_notifier(struct acpi_device *dev, + struct acpi_pci_root *pci_root); static inline acpi_status pci_acpi_remove_bus_pm_notifier(struct acpi_device *dev) { return acpi_remove_pm_notifier(dev); diff --git a/include/linux/pci-ide.h b/include/linux/pci-ide.h index 37a1ad9501b0..ae07d9f699c0 100644 --- a/include/linux/pci-ide.h +++ b/include/linux/pci-ide.h @@ -26,7 +26,7 @@ enum pci_ide_partner_select { /** * struct pci_ide_partner - Per port pair Selective IDE Stream settings * @rid_start: Partner Port Requester ID range start - * @rid_end: Partner Port Requester ID range end + * @rid_end: Partner Port Requester ID range end (inclusive) * @stream_index: Selective IDE Stream Register Block selection * @mem_assoc: PCI bus memory address association for targeting peer partner * @pref_assoc: PCI bus prefetchable memory address association for @@ -82,7 +82,6 @@ struct pci_ide_regs { * @host_bridge_stream: allocated from host bridge @ide_stream_ida pool * @stream_id: unique Stream ID (within Partner Port pairing) * @name: name of the established Selective IDE Stream in sysfs - * @tsm_dev: For TSM established IDE, the TSM device context * * Negative @stream_id values indicate "uninitialized" on the * expectation that with TSM established IDE the TSM owns the stream_id @@ -94,7 +93,6 @@ struct pci_ide { u8 host_bridge_stream; int stream_id; const char *name; - struct tsm_dev *tsm_dev; }; /* diff --git a/include/linux/pci.h b/include/linux/pci.h index b5cc0c2b9906..b30631673b5b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -377,6 +377,13 @@ struct pci_dev { 0xffffffff. You only need to change this if your device has broken DMA or supports 64-bit transfers. */ + u64 msi_addr_mask; /* Mask of the bits of bus address for + MSI that this device implements. + Normally set based on device + capabilities. You only need to + change this if your device claims + to support 64-bit MSI but implements + fewer than 64 address bits. */ struct device_dma_parameters dma_parms; @@ -441,7 +448,6 @@ struct pci_dev { unsigned int is_busmaster:1; /* Is busmaster */ unsigned int no_msi:1; /* May not use MSI */ - unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ unsigned int block_cfg_access:1; /* Config space access blocked */ unsigned int broken_parity_status:1; /* Generates false positive parity */ unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */ @@ -1206,6 +1212,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, struct list_head *resources); int pci_host_probe(struct pci_host_bridge *bridge); +void pci_probe_flush_workqueue(void); int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax); int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax); void pci_bus_release_busn_res(struct pci_bus *b); @@ -2079,6 +2086,8 @@ static inline int pci_has_flag(int flag) { return 0; } _PCI_NOP_ALL(read, *) _PCI_NOP_ALL(write,) +static inline void pci_probe_flush_workqueue(void) { } + static inline struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 288f5235649a..c8cb010d655e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -161,6 +161,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); __percpu_init_rwsem(sem, #sem, &rwsem_key); \ }) +#define percpu_rwsem_is_write_held(sem) lockdep_is_held_type(sem, 0) #define percpu_rwsem_is_held(sem) lockdep_is_held(sem) #define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9ded2e582c60..48d851fbd8ea 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -305,6 +305,7 @@ struct perf_event_pmu_context; #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 #define PERF_PMU_CAP_AUX_PAUSE 0x0200 #define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 +#define PERF_PMU_CAP_MEDIATED_VPMU 0x0800 /** * pmu::scope @@ -998,6 +999,11 @@ struct perf_event_groups { u64 index; }; +struct perf_time_ctx { + u64 time; + u64 stamp; + u64 offset; +}; /** * struct perf_event_context - event context structure @@ -1036,9 +1042,12 @@ struct perf_event_context { /* * Context clock, runs when context enabled. */ - u64 time; - u64 timestamp; - u64 timeoffset; + struct perf_time_ctx time; + + /* + * Context clock, runs when in the guest mode. + */ + struct perf_time_ctx timeguest; /* * These fields let us detect when two contexts have both @@ -1171,9 +1180,8 @@ struct bpf_perf_event_data_kern { * This is a per-cpu dynamically allocated data structure. */ struct perf_cgroup_info { - u64 time; - u64 timestamp; - u64 timeoffset; + struct perf_time_ctx time; + struct perf_time_ctx timeguest; int active; }; @@ -1669,6 +1677,8 @@ struct perf_guest_info_callbacks { unsigned int (*state)(void); unsigned long (*get_ip)(void); unsigned int (*handle_intel_pt_intr)(void); + + void (*handle_mediated_pmi)(void); }; #ifdef CONFIG_GUEST_PERF_EVENTS @@ -1678,6 +1688,7 @@ extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip); DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); +DECLARE_STATIC_CALL(__perf_guest_handle_mediated_pmi, *perf_guest_cbs->handle_mediated_pmi); static inline unsigned int perf_guest_state(void) { @@ -1694,6 +1705,11 @@ static inline unsigned int perf_guest_handle_intel_pt_intr(void) return static_call(__perf_guest_handle_intel_pt_intr)(); } +static inline void perf_guest_handle_mediated_pmi(void) +{ + static_call(__perf_guest_handle_mediated_pmi)(); +} + extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); @@ -1914,6 +1930,13 @@ extern int perf_event_account_interrupt(struct perf_event *event); extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); +#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU +int perf_create_mediated_pmu(void); +void perf_release_mediated_pmu(void); +void perf_load_guest_context(void); +void perf_put_guest_context(void); +#endif + #else /* !CONFIG_PERF_EVENTS: */ static inline void * diff --git a/include/linux/platform_data/hwmon-s3c.h b/include/linux/platform_data/hwmon-s3c.h deleted file mode 100644 index 7d21e0c41037..000000000000 --- a/include/linux/platform_data/hwmon-s3c.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2005 Simtec Electronics - * Ben Dooks <ben@simtec.co.uk> - * http://armlinux.simtec.co.uk/ - * - * S3C - HWMon interface for ADC -*/ - -#ifndef __HWMON_S3C_H__ -#define __HWMON_S3C_H__ - -/** - * s3c_hwmon_chcfg - channel configuration - * @name: The name to give this channel. - * @mult: Multiply the ADC value read by this. - * @div: Divide the value from the ADC by this. - * - * The value read from the ADC is converted to a value that - * hwmon expects (mV) by result = (value_read * @mult) / @div. - */ -struct s3c_hwmon_chcfg { - const char *name; - unsigned int mult; - unsigned int div; -}; - -/** - * s3c_hwmon_pdata - HWMON platform data - * @in: One configuration for each possible channel used. - */ -struct s3c_hwmon_pdata { - struct s3c_hwmon_chcfg *in[8]; -}; - -#endif /* __HWMON_S3C_H__ */ diff --git a/include/linux/platform_data/mipi-i3c-hci.h b/include/linux/platform_data/mipi-i3c-hci.h new file mode 100644 index 000000000000..ab7395f455f9 --- /dev/null +++ b/include/linux/platform_data/mipi-i3c-hci.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef INCLUDE_PLATFORM_DATA_MIPI_I3C_HCI_H +#define INCLUDE_PLATFORM_DATA_MIPI_I3C_HCI_H + +#include <linux/compiler_types.h> + +/** + * struct mipi_i3c_hci_platform_data - Platform-dependent data for mipi_i3c_hci + * @base_regs: Register set base address (to support multi-bus instances) + */ +struct mipi_i3c_hci_platform_data { + void __iomem *base_regs; +}; + +#endif diff --git a/include/linux/pm.h b/include/linux/pm.h index 98a899858ece..afcaaa37a812 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -681,10 +681,10 @@ struct dev_pm_info { struct list_head entry; struct completion completion; struct wakeup_source *wakeup; + bool work_in_progress; /* Owned by the PM core */ bool wakeup_path:1; bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ - bool work_in_progress:1; /* Owned by the PM core */ bool smart_suspend:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index e86f3b731da2..9e1892525eac 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -44,8 +44,9 @@ posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, } #endif -int posix_acl_to_xattr(struct user_namespace *user_ns, - const struct posix_acl *acl, void *buffer, size_t size); +extern void *posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, + size_t *sizep, gfp_t gfp); + static inline const char *posix_acl_xattr_name(int type) { switch (type) { diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c5b30054cd01..7729fef249e1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -31,6 +31,16 @@ #include <asm/processor.h> #include <linux/context_tracking_irq.h> +token_context_lock(RCU, __reentrant_ctx_lock); +token_context_lock_instance(RCU, RCU_SCHED); +token_context_lock_instance(RCU, RCU_BH); + +/* + * A convenience macro that can be used for RCU-protected globals or struct + * members; adds type qualifier __rcu, and also enforces __guarded_by(RCU). + */ +#define __rcu_guarded __rcu __guarded_by(RCU) + #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) @@ -175,36 +185,7 @@ void rcu_tasks_torture_stats_print(char *tt, char *tf); # define synchronize_rcu_tasks synchronize_rcu # endif -# ifdef CONFIG_TASKS_TRACE_RCU -// Bits for ->trc_reader_special.b.need_qs field. -#define TRC_NEED_QS 0x1 // Task needs a quiescent state. -#define TRC_NEED_QS_CHECKED 0x2 // Task has been checked for needing quiescent state. - -u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new); -void rcu_tasks_trace_qs_blkd(struct task_struct *t); - -# define rcu_tasks_trace_qs(t) \ - do { \ - int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ - \ - if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \ - likely(!___rttq_nesting)) { \ - rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \ - } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ - !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ - rcu_tasks_trace_qs_blkd(t); \ - } \ - } while (0) -void rcu_tasks_trace_torture_stats_print(char *tt, char *tf); -# else -# define rcu_tasks_trace_qs(t) do { } while (0) -# endif - -#define rcu_tasks_qs(t, preempt) \ -do { \ - rcu_tasks_classic_qs((t), (preempt)); \ - rcu_tasks_trace_qs(t); \ -} while (0) +#define rcu_tasks_qs(t, preempt) rcu_tasks_classic_qs((t), (preempt)) # ifdef CONFIG_TASKS_RUDE_RCU void synchronize_rcu_tasks_rude(void); @@ -425,7 +406,8 @@ static inline void rcu_preempt_sleep_check(void) { } // See RCU_LOCKDEP_WARN() for an explanation of the double call to // debug_lockdep_rcu_enabled(). -static inline bool lockdep_assert_rcu_helper(bool c) +static __always_inline bool lockdep_assert_rcu_helper(bool c, const struct __ctx_lock_RCU *ctx) + __assumes_shared_ctx_lock(RCU) __assumes_shared_ctx_lock(ctx) { return debug_lockdep_rcu_enabled() && (c || !rcu_is_watching() || !rcu_lockdep_current_cpu_online()) && @@ -438,7 +420,7 @@ static inline bool lockdep_assert_rcu_helper(bool c) * Splats if lockdep is enabled and there is no rcu_read_lock() in effect. */ #define lockdep_assert_in_rcu_read_lock() \ - WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map))) + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map), RCU)) /** * lockdep_assert_in_rcu_read_lock_bh - WARN if not protected by rcu_read_lock_bh() @@ -448,7 +430,7 @@ static inline bool lockdep_assert_rcu_helper(bool c) * actual rcu_read_lock_bh() is required. */ #define lockdep_assert_in_rcu_read_lock_bh() \ - WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_bh_lock_map))) + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_bh_lock_map), RCU_BH)) /** * lockdep_assert_in_rcu_read_lock_sched - WARN if not protected by rcu_read_lock_sched() @@ -458,7 +440,7 @@ static inline bool lockdep_assert_rcu_helper(bool c) * instead an actual rcu_read_lock_sched() is required. */ #define lockdep_assert_in_rcu_read_lock_sched() \ - WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_sched_lock_map))) + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_sched_lock_map), RCU_SCHED)) /** * lockdep_assert_in_rcu_reader - WARN if not within some type of RCU reader @@ -476,17 +458,17 @@ static inline bool lockdep_assert_rcu_helper(bool c) WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map) && \ !lock_is_held(&rcu_bh_lock_map) && \ !lock_is_held(&rcu_sched_lock_map) && \ - preemptible())) + preemptible(), RCU)) #else /* #ifdef CONFIG_PROVE_RCU */ #define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c)) #define rcu_sleep_check() do { } while (0) -#define lockdep_assert_in_rcu_read_lock() do { } while (0) -#define lockdep_assert_in_rcu_read_lock_bh() do { } while (0) -#define lockdep_assert_in_rcu_read_lock_sched() do { } while (0) -#define lockdep_assert_in_rcu_reader() do { } while (0) +#define lockdep_assert_in_rcu_read_lock() __assume_shared_ctx_lock(RCU) +#define lockdep_assert_in_rcu_read_lock_bh() __assume_shared_ctx_lock(RCU_BH) +#define lockdep_assert_in_rcu_read_lock_sched() __assume_shared_ctx_lock(RCU_SCHED) +#define lockdep_assert_in_rcu_reader() __assume_shared_ctx_lock(RCU) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -506,11 +488,11 @@ static inline bool lockdep_assert_rcu_helper(bool c) #endif /* #else #ifdef __CHECKER__ */ #define __unrcu_pointer(p, local) \ -({ \ +context_unsafe( \ typeof(*p) *local = (typeof(*p) *__force)(p); \ rcu_check_sparse(p, __rcu); \ - ((typeof(*p) __force __kernel *)(local)); \ -}) + ((typeof(*p) __force __kernel *)(local)) \ +) /** * unrcu_pointer - mark a pointer as not being RCU protected * @p: pointer needing to lose its __rcu property @@ -586,7 +568,7 @@ static inline bool lockdep_assert_rcu_helper(bool c) * other macros that it invokes. */ #define rcu_assign_pointer(p, v) \ -do { \ +context_unsafe( \ uintptr_t _r_a_p__v = (uintptr_t)(v); \ rcu_check_sparse(p, __rcu); \ \ @@ -594,7 +576,7 @@ do { \ WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ else \ smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ -} while (0) +) /** * rcu_replace_pointer() - replace an RCU pointer, returning its old value @@ -861,9 +843,10 @@ do { \ * only when acquiring spinlocks that are subject to priority inheritance. */ static __always_inline void rcu_read_lock(void) + __acquires_shared(RCU) { __rcu_read_lock(); - __acquire(RCU); + __acquire_shared(RCU); rcu_lock_acquire(&rcu_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock() used illegally while idle"); @@ -891,11 +874,12 @@ static __always_inline void rcu_read_lock(void) * See rcu_read_lock() for more information. */ static inline void rcu_read_unlock(void) + __releases_shared(RCU) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ - __release(RCU); + __release_shared(RCU); __rcu_read_unlock(); } @@ -914,9 +898,11 @@ static inline void rcu_read_unlock(void) * was invoked from some other task. */ static inline void rcu_read_lock_bh(void) + __acquires_shared(RCU) __acquires_shared(RCU_BH) { local_bh_disable(); - __acquire(RCU_BH); + __acquire_shared(RCU); + __acquire_shared(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_bh() used illegally while idle"); @@ -928,11 +914,13 @@ static inline void rcu_read_lock_bh(void) * See rcu_read_lock_bh() for more information. */ static inline void rcu_read_unlock_bh(void) + __releases_shared(RCU) __releases_shared(RCU_BH) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); - __release(RCU_BH); + __release_shared(RCU_BH); + __release_shared(RCU); local_bh_enable(); } @@ -952,9 +940,11 @@ static inline void rcu_read_unlock_bh(void) * rcu_read_lock_sched() was invoked from an NMI handler. */ static inline void rcu_read_lock_sched(void) + __acquires_shared(RCU) __acquires_shared(RCU_SCHED) { preempt_disable(); - __acquire(RCU_SCHED); + __acquire_shared(RCU); + __acquire_shared(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_sched() used illegally while idle"); @@ -962,9 +952,11 @@ static inline void rcu_read_lock_sched(void) /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_lock_sched_notrace(void) + __acquires_shared(RCU) __acquires_shared(RCU_SCHED) { preempt_disable_notrace(); - __acquire(RCU_SCHED); + __acquire_shared(RCU); + __acquire_shared(RCU_SCHED); } /** @@ -973,22 +965,27 @@ static inline notrace void rcu_read_lock_sched_notrace(void) * See rcu_read_lock_sched() for more information. */ static inline void rcu_read_unlock_sched(void) + __releases_shared(RCU) __releases_shared(RCU_SCHED) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); - __release(RCU_SCHED); + __release_shared(RCU_SCHED); + __release_shared(RCU); preempt_enable(); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_unlock_sched_notrace(void) + __releases_shared(RCU) __releases_shared(RCU_SCHED) { - __release(RCU_SCHED); + __release_shared(RCU_SCHED); + __release_shared(RCU); preempt_enable_notrace(); } static __always_inline void rcu_read_lock_dont_migrate(void) + __acquires_shared(RCU) { if (IS_ENABLED(CONFIG_PREEMPT_RCU)) migrate_disable(); @@ -996,6 +993,7 @@ static __always_inline void rcu_read_lock_dont_migrate(void) } static inline void rcu_read_unlock_migrate(void) + __releases_shared(RCU) { rcu_read_unlock(); if (IS_ENABLED(CONFIG_PREEMPT_RCU)) @@ -1041,10 +1039,10 @@ static inline void rcu_read_unlock_migrate(void) * ordering guarantees for either the CPU or the compiler. */ #define RCU_INIT_POINTER(p, v) \ - do { \ + context_unsafe( \ rcu_check_sparse(p, __rcu); \ WRITE_ONCE(p, RCU_INITIALIZER(v)); \ - } while (0) + ) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer @@ -1192,18 +1190,7 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) extern int rcu_expedited; extern int rcu_normal; -DEFINE_LOCK_GUARD_0(rcu, - do { - rcu_read_lock(); - /* - * sparse doesn't call the cleanup function, - * so just release immediately and don't track - * the context. We don't need to anyway, since - * the whole point of the guard is to not need - * the explicit unlock. - */ - __release(RCU); - } while (0), - rcu_read_unlock()) +DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock()) +DECLARE_LOCK_GUARD_0_ATTRS(rcu, __acquires_shared(RCU), __releases_shared(RCU)) #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index e6c44eb428ab..cee89e51e45c 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -12,27 +12,74 @@ #include <linux/rcupdate.h> #include <linux/cleanup.h> -extern struct lockdep_map rcu_trace_lock_map; +#ifdef CONFIG_TASKS_TRACE_RCU +extern struct srcu_struct rcu_tasks_trace_srcu_struct; +#endif // #ifdef CONFIG_TASKS_TRACE_RCU -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) static inline int rcu_read_lock_trace_held(void) { - return lock_is_held(&rcu_trace_lock_map); + return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct); } -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) static inline int rcu_read_lock_trace_held(void) { return 1; } -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) #ifdef CONFIG_TASKS_TRACE_RCU -void rcu_read_unlock_trace_special(struct task_struct *t); +/** + * rcu_read_lock_tasks_trace - mark beginning of RCU-trace read-side critical section + * + * When synchronize_rcu_tasks_trace() is invoked by one task, then that + * task is guaranteed to block until all other tasks exit their read-side + * critical sections. Similarly, if call_rcu_trace() is invoked on one + * task while other tasks are within RCU read-side critical sections, + * invocation of the corresponding RCU callback is deferred until after + * the all the other tasks exit their critical sections. + * + * For more details, please see the documentation for + * srcu_read_lock_fast(). For a description of how implicit RCU + * readers provide the needed ordering for architectures defining the + * ARCH_WANTS_NO_INSTR Kconfig option (and thus promising never to trace + * code where RCU is not watching), please see the __srcu_read_lock_fast() + * (non-kerneldoc) header comment. Otherwise, the smp_mb() below provided + * the needed ordering. + */ +static inline struct srcu_ctr __percpu *rcu_read_lock_tasks_trace(void) +{ + struct srcu_ctr __percpu *ret = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); + + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Provide ordering on noinstr-incomplete architectures. + return ret; +} + +/** + * rcu_read_unlock_tasks_trace - mark end of RCU-trace read-side critical section + * @scp: return value from corresponding rcu_read_lock_tasks_trace(). + * + * Pairs with the preceding call to rcu_read_lock_tasks_trace() that + * returned the value passed in via scp. + * + * For more details, please see the documentation for rcu_read_unlock(). + * For memory-ordering information, please see the header comment for the + * rcu_read_lock_tasks_trace() function. + */ +static inline void rcu_read_unlock_tasks_trace(struct srcu_ctr __percpu *scp) +{ + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Provide ordering on noinstr-incomplete architectures. + __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); + srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); +} /** * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section @@ -50,12 +97,15 @@ static inline void rcu_read_lock_trace(void) { struct task_struct *t = current; - WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1); - barrier(); - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && - t->trc_reader_special.b.need_mb) - smp_mb(); // Pairs with update-side barriers - rcu_lock_acquire(&rcu_trace_lock_map); + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); + if (t->trc_reader_nesting++) { + // In case we interrupted a Tasks Trace RCU reader. + return; + } + barrier(); // nesting before scp to protect against interrupt handler. + t->trc_reader_scp = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Placeholder for more selective ordering } /** @@ -69,26 +119,88 @@ static inline void rcu_read_lock_trace(void) */ static inline void rcu_read_unlock_trace(void) { - int nesting; + struct srcu_ctr __percpu *scp; struct task_struct *t = current; - rcu_lock_release(&rcu_trace_lock_map); - nesting = READ_ONCE(t->trc_reader_nesting) - 1; - barrier(); // Critical section before disabling. - // Disable IPI-based setting of .need_qs. - WRITE_ONCE(t->trc_reader_nesting, INT_MIN + nesting); - if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) { - WRITE_ONCE(t->trc_reader_nesting, nesting); - return; // We assume shallow reader nesting. + scp = t->trc_reader_scp; + barrier(); // scp before nesting to protect against interrupt handler. + if (!--t->trc_reader_nesting) { + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Placeholder for more selective ordering + __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); } - WARN_ON_ONCE(nesting != 0); - rcu_read_unlock_trace_special(t); + srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); } -void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); -void synchronize_rcu_tasks_trace(void); -void rcu_barrier_tasks_trace(void); -struct task_struct *get_rcu_tasks_trace_gp_kthread(void); +/** + * call_rcu_tasks_trace() - Queue a callback trace task-based grace period + * @rhp: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a trace rcu-tasks + * grace period elapses, in other words after all currently executing + * trace rcu-tasks read-side critical sections have completed. These + * read-side critical sections are delimited by calls to rcu_read_lock_trace() + * and rcu_read_unlock_trace(). + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. + */ +static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) +{ + call_srcu(&rcu_tasks_trace_srcu_struct, rhp, func); +} + +/** + * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period + * + * Control will return to the caller some time after a trace rcu-tasks + * grace period has elapsed, in other words after all currently executing + * trace rcu-tasks read-side critical sections have elapsed. These read-side + * critical sections are delimited by calls to rcu_read_lock_trace() + * and rcu_read_unlock_trace(). + * + * This is a very specialized primitive, intended only for a few uses in + * tracing and other situations requiring manipulation of function preambles + * and profiling hooks. The synchronize_rcu_tasks_trace() function is not + * (yet) intended for heavy use from multiple CPUs. + * + * See the description of synchronize_rcu() for more detailed information + * on memory ordering guarantees. + */ +static inline void synchronize_rcu_tasks_trace(void) +{ + synchronize_srcu(&rcu_tasks_trace_srcu_struct); +} + +/** + * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks. + * + * Note that rcu_barrier_tasks_trace() is not obligated to actually wait, + * for example, if there are no pending callbacks. + */ +static inline void rcu_barrier_tasks_trace(void) +{ + srcu_barrier(&rcu_tasks_trace_srcu_struct); +} + +/** + * rcu_tasks_trace_expedite_current - Expedite the current Tasks Trace RCU grace period + * + * Cause the current Tasks Trace RCU grace period to become expedited. + * The grace period following the current one might also be expedited. + * If there is no current grace period, one might be created. If the + * current grace period is currently sleeping, that sleep will complete + * before expediting will take effect. + */ +static inline void rcu_tasks_trace_expedite_current(void) +{ + srcu_expedite_current(&rcu_tasks_trace_srcu_struct); +} + +// Placeholders to enable stepwise transition. +void __init rcu_tasks_trace_suppress_unused(void); + #else /* * The BPF JIT forms these addresses even when it doesn't call these diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 80dc023ac2bf..3da377ffb0c2 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -478,9 +478,9 @@ static inline void refcount_dec(refcount_t *r) extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); -extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock); -extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock); +extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(true, lock); +extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(true, lock); extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, - unsigned long *flags) __cond_acquires(lock); + unsigned long *flags) __cond_acquires(true, lock); #endif /* _LINUX_REFCOUNT_H */ diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 54701668b3df..006e57fd7ca5 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -53,6 +53,7 @@ enum resctrl_res_level { RDT_RESOURCE_L2, RDT_RESOURCE_MBA, RDT_RESOURCE_SMBA, + RDT_RESOURCE_PERF_PKG, /* Must be the last */ RDT_NUM_RESOURCES, @@ -131,15 +132,24 @@ enum resctrl_domain_type { * @list: all instances of this resource * @id: unique id for this instance * @type: type of this instance + * @rid: resource id for this instance * @cpu_mask: which CPUs share this resource */ struct rdt_domain_hdr { struct list_head list; int id; enum resctrl_domain_type type; + enum resctrl_res_level rid; struct cpumask cpu_mask; }; +static inline bool domain_header_is_valid(struct rdt_domain_hdr *hdr, + enum resctrl_domain_type type, + enum resctrl_res_level rid) +{ + return !WARN_ON_ONCE(hdr->type != type || hdr->rid != rid); +} + /** * struct rdt_ctrl_domain - group of CPUs sharing a resctrl control resource * @hdr: common header for different domain types @@ -169,7 +179,7 @@ struct mbm_cntr_cfg { }; /** - * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource + * struct rdt_l3_mon_domain - group of CPUs sharing RDT_RESOURCE_L3 monitoring * @hdr: common header for different domain types * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold @@ -183,7 +193,7 @@ struct mbm_cntr_cfg { * @cntr_cfg: array of assignable counters' configuration (indexed * by counter ID) */ -struct rdt_mon_domain { +struct rdt_l3_mon_domain { struct rdt_domain_hdr hdr; unsigned int ci_id; unsigned long *rmid_busy_llc; @@ -261,6 +271,7 @@ enum resctrl_scope { RESCTRL_L2_CACHE = 2, RESCTRL_L3_CACHE = 3, RESCTRL_L3_NODE, + RESCTRL_PACKAGE, }; /** @@ -284,7 +295,7 @@ enum resctrl_schema_fmt { * events of monitor groups created via mkdir. */ struct resctrl_mon { - int num_rmid; + u32 num_rmid; unsigned int mbm_cfg_mask; int num_mbm_cntrs; bool mbm_cntr_assignable; @@ -358,10 +369,10 @@ struct resctrl_cpu_defaults { }; struct resctrl_mon_config_info { - struct rdt_resource *r; - struct rdt_mon_domain *d; - u32 evtid; - u32 mon_config; + struct rdt_resource *r; + struct rdt_l3_mon_domain *d; + u32 evtid; + u32 mon_config; }; /** @@ -403,7 +414,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); -void resctrl_enable_mon_event(enum resctrl_event_id eventid); +bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu, + unsigned int binary_bits, void *arch_priv); bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); @@ -498,22 +510,31 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr); void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr); void resctrl_online_cpu(unsigned int cpu); void resctrl_offline_cpu(unsigned int cpu); +/* + * Architecture hook called at beginning of first file system mount attempt. + * No locks are held. + */ +void resctrl_arch_pre_mount(void); + /** * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid * for this resource and domain. * @r: resource that the counter should be read from. - * @d: domain that the counter should be read from. + * @hdr: Header of domain that the counter should be read from. * @closid: closid that matches the rmid. Depending on the architecture, the * counter may match traffic of both @closid and @rmid, or @rmid * only. * @rmid: rmid of the counter to read. * @eventid: eventid to read, e.g. L3 occupancy. + * @arch_priv: Architecture private data for this event. + * The @arch_priv provided by the architecture via + * resctrl_enable_mon_event(). * @val: result of the counter read in bytes. * @arch_mon_ctx: An architecture specific value from * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies @@ -529,9 +550,9 @@ void resctrl_offline_cpu(unsigned int cpu); * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, u32 closid, u32 rmid, enum resctrl_event_id eventid, - u64 *val, void *arch_mon_ctx); + void *arch_priv, u64 *val, void *arch_mon_ctx); /** * resctrl_arch_rmid_read_context_check() - warn about invalid contexts @@ -576,7 +597,7 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid); @@ -589,7 +610,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d); /** * resctrl_arch_reset_all_ctrls() - Reset the control for each CLOSID to its @@ -615,7 +636,7 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); * * This can be called from any CPU. */ -void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, enum resctrl_event_id evtid, u32 rmid, u32 closid, u32 cntr_id, bool assign); @@ -638,7 +659,7 @@ void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ -int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 closid, u32 rmid, int cntr_id, enum resctrl_event_id eventid, u64 *val); @@ -653,7 +674,7 @@ int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, * * This can be called from any CPU. */ -void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, u32 closid, u32 rmid, int cntr_id, enum resctrl_event_id eventid); diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index acfe07860b34..a5f56faa18d2 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -50,6 +50,17 @@ enum resctrl_event_id { QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, + /* Intel Telemetry Events */ + PMT_EVENT_ENERGY, + PMT_EVENT_ACTIVITY, + PMT_EVENT_STALLS_LLC_HIT, + PMT_EVENT_C1_RES, + PMT_EVENT_UNHALTED_CORE_CYCLES, + PMT_EVENT_STALLS_LLC_MISS, + PMT_EVENT_AUTO_C6_RES, + PMT_EVENT_UNHALTED_REF_CYCLES, + PMT_EVENT_UOPS_RETIRED, + /* Must be the last */ QOS_NUM_EVENTS, }; diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index 67d2bf579942..9b262109726d 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -6,6 +6,7 @@ #define __LINUX_RESTART_BLOCK_H #include <linux/compiler.h> +#include <linux/time64.h> #include <linux/types.h> struct __kernel_timespec; @@ -50,8 +51,7 @@ struct restart_block { struct pollfd __user *ufds; int nfds; int has_timeout; - unsigned long tv_sec; - unsigned long tv_nsec; + struct timespec64 end_time; } poll; }; }; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 08e664b21f5a..133ccb39137a 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -245,16 +245,17 @@ void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); -int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU); +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires_shared(RCU); static inline void rhashtable_walk_start(struct rhashtable_iter *iter) + __acquires_shared(RCU) { (void)rhashtable_walk_start_check(iter); } void *rhashtable_walk_next(struct rhashtable_iter *iter); void *rhashtable_walk_peek(struct rhashtable_iter *iter); -void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); +void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases_shared(RCU); void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), @@ -325,6 +326,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert( static inline unsigned long rht_lock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt) + __acquires(__bitlock(0, bkt)) { unsigned long flags; @@ -337,6 +339,7 @@ static inline unsigned long rht_lock(struct bucket_table *tbl, static inline unsigned long rht_lock_nested(struct bucket_table *tbl, struct rhash_lock_head __rcu **bucket, unsigned int subclass) + __acquires(__bitlock(0, bucket)) { unsigned long flags; @@ -349,6 +352,7 @@ static inline unsigned long rht_lock_nested(struct bucket_table *tbl, static inline void rht_unlock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt, unsigned long flags) + __releases(__bitlock(0, bkt)) { lock_map_release(&tbl->dep_map); bit_spin_unlock(0, (unsigned long *)bkt); @@ -424,13 +428,14 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt, struct rhash_head *obj, unsigned long flags) + __releases(__bitlock(0, bkt)) { if (rht_is_a_nulls(obj)) obj = NULL; lock_map_release(&tbl->dep_map); rcu_assign_pointer(*bkt, (void *)obj); preempt_enable(); - __release(bitlock); + __release(__bitlock(0, bkt)); local_irq_restore(flags); } @@ -612,6 +617,7 @@ static __always_inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params, const enum rht_lookup_freq freq) + __must_hold_shared(RCU) { struct rhashtable_compare_arg arg = { .ht = ht, @@ -666,6 +672,7 @@ restart: static __always_inline void *rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) + __must_hold_shared(RCU) { struct rhash_head *he = __rhashtable_lookup(ht, key, params, RHT_LOOKUP_NORMAL); @@ -676,6 +683,7 @@ static __always_inline void *rhashtable_lookup( static __always_inline void *rhashtable_lookup_likely( struct rhashtable *ht, const void *key, const struct rhashtable_params params) + __must_hold_shared(RCU) { struct rhash_head *he = __rhashtable_lookup(ht, key, params, RHT_LOOKUP_LIKELY); @@ -727,6 +735,7 @@ static __always_inline void *rhashtable_lookup_fast( static __always_inline struct rhlist_head *rhltable_lookup( struct rhltable *hlt, const void *key, const struct rhashtable_params params) + __must_hold_shared(RCU) { struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params, RHT_LOOKUP_NORMAL); @@ -737,6 +746,7 @@ static __always_inline struct rhlist_head *rhltable_lookup( static __always_inline struct rhlist_head *rhltable_lookup_likely( struct rhltable *hlt, const void *key, const struct rhashtable_params params) + __must_hold_shared(RCU) { struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params, RHT_LOOKUP_LIKELY); diff --git a/include/linux/rseq.h b/include/linux/rseq.h index 2266f4dc77b6..7a01a0760405 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -163,4 +163,15 @@ void rseq_syscall(struct pt_regs *regs); static inline void rseq_syscall(struct pt_regs *regs) { } #endif /* !CONFIG_DEBUG_RSEQ */ +#ifdef CONFIG_RSEQ_SLICE_EXTENSION +void rseq_syscall_enter_work(long syscall); +int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3); +#else /* CONFIG_RSEQ_SLICE_EXTENSION */ +static inline void rseq_syscall_enter_work(long syscall) { } +static inline int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3) +{ + return -ENOTSUPP; +} +#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ + #endif /* _LINUX_RSEQ_H */ diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index a36b472627de..cbc4a791618b 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -15,6 +15,11 @@ struct rseq_stats { unsigned long cs; unsigned long clear; unsigned long fixup; + unsigned long s_granted; + unsigned long s_expired; + unsigned long s_revoked; + unsigned long s_yielded; + unsigned long s_aborted; }; DECLARE_PER_CPU(struct rseq_stats, rseq_stats); @@ -37,6 +42,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_stats); #ifdef CONFIG_RSEQ #include <linux/jump_label.h> #include <linux/rseq.h> +#include <linux/sched/signal.h> #include <linux/uaccess.h> #include <linux/tracepoint-defs.h> @@ -75,6 +81,147 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled); #define rseq_inline __always_inline #endif +#ifdef CONFIG_RSEQ_SLICE_EXTENSION +DECLARE_STATIC_KEY_TRUE(rseq_slice_extension_key); + +static __always_inline bool rseq_slice_extension_enabled(void) +{ + return static_branch_likely(&rseq_slice_extension_key); +} + +extern unsigned int rseq_slice_ext_nsecs; +bool __rseq_arm_slice_extension_timer(void); + +static __always_inline bool rseq_arm_slice_extension_timer(void) +{ + if (!rseq_slice_extension_enabled()) + return false; + + if (likely(!current->rseq.slice.state.granted)) + return false; + + return __rseq_arm_slice_extension_timer(); +} + +static __always_inline void rseq_slice_clear_grant(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_RSEQ_STATS) && t->rseq.slice.state.granted) + rseq_stat_inc(rseq_stats.s_revoked); + t->rseq.slice.state.granted = false; +} + +static __always_inline bool rseq_grant_slice_extension(bool work_pending) +{ + struct task_struct *curr = current; + struct rseq_slice_ctrl usr_ctrl; + union rseq_slice_state state; + struct rseq __user *rseq; + + if (!rseq_slice_extension_enabled()) + return false; + + /* If not enabled or not a return from interrupt, nothing to do. */ + state = curr->rseq.slice.state; + state.enabled &= curr->rseq.event.user_irq; + if (likely(!state.state)) + return false; + + rseq = curr->rseq.usrptr; + scoped_user_rw_access(rseq, efault) { + + /* + * Quick check conditions where a grant is not possible or + * needs to be revoked. + * + * 1) Any TIF bit which needs to do extra work aside of + * rescheduling prevents a grant. + * + * 2) A previous rescheduling request resulted in a slice + * extension grant. + */ + if (unlikely(work_pending || state.granted)) { + /* Clear user control unconditionally. No point for checking */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + rseq_slice_clear_grant(curr); + return false; + } + + unsafe_get_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault); + if (likely(!(usr_ctrl.request))) + return false; + + /* Grant the slice extention */ + usr_ctrl.request = 0; + usr_ctrl.granted = 1; + unsafe_put_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault); + } + + rseq_stat_inc(rseq_stats.s_granted); + + curr->rseq.slice.state.granted = true; + /* Store expiry time for arming the timer on the way out */ + curr->rseq.slice.expires = data_race(rseq_slice_ext_nsecs) + ktime_get_mono_fast_ns(); + /* + * This is racy against a remote CPU setting TIF_NEED_RESCHED in + * several ways: + * + * 1) + * CPU0 CPU1 + * clear_tsk() + * set_tsk() + * clear_preempt() + * Raise scheduler IPI on CPU0 + * --> IPI + * fold_need_resched() -> Folds correctly + * 2) + * CPU0 CPU1 + * set_tsk() + * clear_tsk() + * clear_preempt() + * Raise scheduler IPI on CPU0 + * --> IPI + * fold_need_resched() <- NOOP as TIF_NEED_RESCHED is false + * + * #1 is not any different from a regular remote reschedule as it + * sets the previously not set bit and then raises the IPI which + * folds it into the preempt counter + * + * #2 is obviously incorrect from a scheduler POV, but it's not + * differently incorrect than the code below clearing the + * reschedule request with the safety net of the timer. + * + * The important part is that the clearing is protected against the + * scheduler IPI and also against any other interrupt which might + * end up waking up a task and setting the bits in the middle of + * the operation: + * + * clear_tsk() + * ---> Interrupt + * wakeup_on_this_cpu() + * set_tsk() + * set_preempt() + * clear_preempt() + * + * which would be inconsistent state. + */ + scoped_guard(irq) { + clear_tsk_need_resched(curr); + clear_preempt_need_resched(); + } + return true; + +efault: + force_sig(SIGSEGV); + return false; +} + +#else /* CONFIG_RSEQ_SLICE_EXTENSION */ +static inline bool rseq_slice_extension_enabled(void) { return false; } +static inline bool rseq_arm_slice_extension_timer(void) { return false; } +static inline void rseq_slice_clear_grant(struct task_struct *t) { } +static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } +#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ + bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); bool rseq_debug_validate_ids(struct task_struct *t); @@ -359,8 +506,15 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault); if (csaddr) unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); + + /* Open coded, so it's in the same user access region */ + if (rseq_slice_extension_enabled()) { + /* Unconditionally clear it, no point in conditionals */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + } } + rseq_slice_clear_grant(t); /* Cache the new values */ t->rseq.ids.cpu_cid = ids->cpu_cid; rseq_stat_inc(rseq_stats.ids); @@ -456,8 +610,17 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t */ u64 csaddr; - if (unlikely(get_user_inline(csaddr, &rseq->rseq_cs))) - return false; + scoped_user_rw_access(rseq, efault) { + unsafe_get_user(csaddr, &rseq->rseq_cs, efault); + + /* Open coded, so it's in the same user access region */ + if (rseq_slice_extension_enabled()) { + /* Unconditionally clear it, no point in conditionals */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + } + } + + rseq_slice_clear_grant(t); if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) { if (unlikely(!rseq_update_user_cs(t, regs, csaddr))) @@ -473,6 +636,8 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t u32 node_id = cpu_to_node(ids.cpu_id); return rseq_update_usr(t, regs, &ids, node_id); +efault: + return false; } static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs) @@ -527,17 +692,19 @@ static __always_inline void clear_tif_rseq(void) { } static __always_inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work) { - if (likely(!test_tif_rseq(ti_work))) - return false; - - if (unlikely(__rseq_exit_to_user_mode_restart(regs))) { - current->rseq.event.slowpath = true; - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); - return true; + if (unlikely(test_tif_rseq(ti_work))) { + if (unlikely(__rseq_exit_to_user_mode_restart(regs))) { + current->rseq.event.slowpath = true; + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + return true; + } + clear_tif_rseq(); } - - clear_tif_rseq(); - return false; + /* + * Arm the slice extension timer if nothing to do anymore and the + * task really goes out to user space. + */ + return rseq_arm_slice_extension_timer(); } #else /* CONFIG_GENERIC_ENTRY */ @@ -611,6 +778,7 @@ static inline void rseq_syscall_exit_to_user_mode(void) { } static inline void rseq_irqentry_exit_to_user_mode(void) { } static inline void rseq_exit_to_user_mode_legacy(void) { } static inline void rseq_debug_syscall_return(struct pt_regs *regs) { } +static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } #endif /* !CONFIG_RSEQ */ #endif /* _LINUX_RSEQ_ENTRY_H */ diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index 332dc14b81c9..da5fa6f40294 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -73,12 +73,39 @@ struct rseq_ids { }; /** + * union rseq_slice_state - Status information for rseq time slice extension + * @state: Compound to access the overall state + * @enabled: Time slice extension is enabled for the task + * @granted: Time slice extension was granted to the task + */ +union rseq_slice_state { + u16 state; + struct { + u8 enabled; + u8 granted; + }; +}; + +/** + * struct rseq_slice - Status information for rseq time slice extension + * @state: Time slice extension state + * @expires: The time when a grant expires + * @yielded: Indicator for rseq_slice_yield() + */ +struct rseq_slice { + union rseq_slice_state state; + u64 expires; + u8 yielded; +}; + +/** * struct rseq_data - Storage for all rseq related data * @usrptr: Pointer to the registered user space RSEQ memory * @len: Length of the RSEQ region - * @sig: Signature of critial section abort IPs + * @sig: Signature of critical section abort IPs * @event: Storage for event management * @ids: Storage for cached CPU ID and MM CID + * @slice: Storage for time slice extension data */ struct rseq_data { struct rseq __user *usrptr; @@ -86,6 +113,9 @@ struct rseq_data { u32 sig; struct rseq_event event; struct rseq_ids ids; +#ifdef CONFIG_RSEQ_SLICE_EXTENSION + struct rseq_slice slice; +#endif }; #else /* CONFIG_RSEQ */ @@ -121,8 +151,7 @@ struct mm_cid_pcpu { /** * struct mm_mm_cid - Storage for per MM CID data * @pcpu: Per CPU storage for CIDs associated to a CPU - * @percpu: Set, when CIDs are in per CPU mode - * @transit: Set to MM_CID_TRANSIT during a mode change transition phase + * @mode: Indicates per CPU and transition mode * @max_cids: The exclusive maximum CID value for allocation and convergence * @irq_work: irq_work to handle the affinity mode change case * @work: Regular work to handle the affinity mode change case @@ -139,8 +168,7 @@ struct mm_cid_pcpu { struct mm_mm_cid { /* Hotpath read mostly members */ struct mm_cid_pcpu __percpu *pcpu; - unsigned int percpu; - unsigned int transit; + unsigned int mode; unsigned int max_cids; /* Rarely used. Moves @lock and @mutex into the second cacheline */ diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 5b87c6f4a243..3390d21c95dd 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -29,16 +29,16 @@ do { \ #endif #ifdef CONFIG_DEBUG_SPINLOCK - extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock); + extern void do_raw_read_lock(rwlock_t *lock) __acquires_shared(lock); extern int do_raw_read_trylock(rwlock_t *lock); - extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock); + extern void do_raw_read_unlock(rwlock_t *lock) __releases_shared(lock); extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock); extern int do_raw_write_trylock(rwlock_t *lock); extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else -# define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) +# define do_raw_read_lock(rwlock) do {__acquire_shared(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) # define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock) -# define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) +# define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release_shared(lock); } while (0) # define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0) # define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock) # define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) @@ -49,8 +49,8 @@ do { \ * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various * methods are defined as nops in the case they are not required. */ -#define read_trylock(lock) __cond_lock(lock, _raw_read_trylock(lock)) -#define write_trylock(lock) __cond_lock(lock, _raw_write_trylock(lock)) +#define read_trylock(lock) _raw_read_trylock(lock) +#define write_trylock(lock) _raw_write_trylock(lock) #define write_lock(lock) _raw_write_lock(lock) #define read_lock(lock) _raw_read_lock(lock) @@ -112,12 +112,7 @@ do { \ } while (0) #define write_unlock_bh(lock) _raw_write_unlock_bh(lock) -#define write_trylock_irqsave(lock, flags) \ -({ \ - local_irq_save(flags); \ - write_trylock(lock) ? \ - 1 : ({ local_irq_restore(flags); 0; }); \ -}) +#define write_trylock_irqsave(lock, flags) _raw_write_trylock_irqsave(lock, &(flags)) #ifdef arch_rwlock_is_contended #define rwlock_is_contended(lock) \ diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index 31d3d1116323..61a852609eab 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -15,24 +15,24 @@ * Released under the General Public License (GPL). */ -void __lockfunc _raw_read_lock(rwlock_t *lock) __acquires(lock); +void __lockfunc _raw_read_lock(rwlock_t *lock) __acquires_shared(lock); void __lockfunc _raw_write_lock(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass) __acquires(lock); -void __lockfunc _raw_read_lock_bh(rwlock_t *lock) __acquires(lock); +void __lockfunc _raw_read_lock_bh(rwlock_t *lock) __acquires_shared(lock); void __lockfunc _raw_write_lock_bh(rwlock_t *lock) __acquires(lock); -void __lockfunc _raw_read_lock_irq(rwlock_t *lock) __acquires(lock); +void __lockfunc _raw_read_lock_irq(rwlock_t *lock) __acquires_shared(lock); void __lockfunc _raw_write_lock_irq(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock) __acquires(lock); -int __lockfunc _raw_read_trylock(rwlock_t *lock); -int __lockfunc _raw_write_trylock(rwlock_t *lock); -void __lockfunc _raw_read_unlock(rwlock_t *lock) __releases(lock); +int __lockfunc _raw_read_trylock(rwlock_t *lock) __cond_acquires_shared(true, lock); +int __lockfunc _raw_write_trylock(rwlock_t *lock) __cond_acquires(true, lock); +void __lockfunc _raw_read_unlock(rwlock_t *lock) __releases_shared(lock); void __lockfunc _raw_write_unlock(rwlock_t *lock) __releases(lock); -void __lockfunc _raw_read_unlock_bh(rwlock_t *lock) __releases(lock); +void __lockfunc _raw_read_unlock_bh(rwlock_t *lock) __releases_shared(lock); void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) __releases(lock); -void __lockfunc _raw_read_unlock_irq(rwlock_t *lock) __releases(lock); +void __lockfunc _raw_read_unlock_irq(rwlock_t *lock) __releases_shared(lock); void __lockfunc _raw_write_unlock_irq(rwlock_t *lock) __releases(lock); void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) @@ -137,6 +137,16 @@ static inline int __raw_write_trylock(rwlock_t *lock) return 0; } +static inline bool _raw_write_trylock_irqsave(rwlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) __no_context_analysis +{ + local_irq_save(*flags); + if (_raw_write_trylock(lock)) + return true; + local_irq_restore(*flags); + return false; +} + /* * If lockdep is enabled then we use the non-preemption spin-ops * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are @@ -145,6 +155,7 @@ static inline int __raw_write_trylock(rwlock_t *lock) #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) static inline void __raw_read_lock(rwlock_t *lock) + __acquires_shared(lock) __no_context_analysis { preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); @@ -152,6 +163,7 @@ static inline void __raw_read_lock(rwlock_t *lock) } static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock) + __acquires_shared(lock) __no_context_analysis { unsigned long flags; @@ -163,6 +175,7 @@ static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock) } static inline void __raw_read_lock_irq(rwlock_t *lock) + __acquires_shared(lock) __no_context_analysis { local_irq_disable(); preempt_disable(); @@ -171,6 +184,7 @@ static inline void __raw_read_lock_irq(rwlock_t *lock) } static inline void __raw_read_lock_bh(rwlock_t *lock) + __acquires_shared(lock) __no_context_analysis { __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); @@ -178,6 +192,7 @@ static inline void __raw_read_lock_bh(rwlock_t *lock) } static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock) + __acquires(lock) __no_context_analysis { unsigned long flags; @@ -189,6 +204,7 @@ static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock) } static inline void __raw_write_lock_irq(rwlock_t *lock) + __acquires(lock) __no_context_analysis { local_irq_disable(); preempt_disable(); @@ -197,6 +213,7 @@ static inline void __raw_write_lock_irq(rwlock_t *lock) } static inline void __raw_write_lock_bh(rwlock_t *lock) + __acquires(lock) __no_context_analysis { __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); @@ -204,6 +221,7 @@ static inline void __raw_write_lock_bh(rwlock_t *lock) } static inline void __raw_write_lock(rwlock_t *lock) + __acquires(lock) __no_context_analysis { preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); @@ -211,6 +229,7 @@ static inline void __raw_write_lock(rwlock_t *lock) } static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass) + __acquires(lock) __no_context_analysis { preempt_disable(); rwlock_acquire(&lock->dep_map, subclass, 0, _RET_IP_); @@ -220,6 +239,7 @@ static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass) #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */ static inline void __raw_write_unlock(rwlock_t *lock) + __releases(lock) { rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); @@ -227,6 +247,7 @@ static inline void __raw_write_unlock(rwlock_t *lock) } static inline void __raw_read_unlock(rwlock_t *lock) + __releases_shared(lock) { rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); @@ -235,6 +256,7 @@ static inline void __raw_read_unlock(rwlock_t *lock) static inline void __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) + __releases_shared(lock) { rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); @@ -243,6 +265,7 @@ __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) } static inline void __raw_read_unlock_irq(rwlock_t *lock) + __releases_shared(lock) { rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); @@ -251,6 +274,7 @@ static inline void __raw_read_unlock_irq(rwlock_t *lock) } static inline void __raw_read_unlock_bh(rwlock_t *lock) + __releases_shared(lock) { rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); @@ -259,6 +283,7 @@ static inline void __raw_read_unlock_bh(rwlock_t *lock) static inline void __raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) + __releases(lock) { rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); @@ -267,6 +292,7 @@ static inline void __raw_write_unlock_irqrestore(rwlock_t *lock, } static inline void __raw_write_unlock_irq(rwlock_t *lock) + __releases(lock) { rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); @@ -275,6 +301,7 @@ static inline void __raw_write_unlock_irq(rwlock_t *lock) } static inline void __raw_write_unlock_bh(rwlock_t *lock) + __releases(lock) { rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h index 7d81fc6918ee..5353abbfdc0b 100644 --- a/include/linux/rwlock_rt.h +++ b/include/linux/rwlock_rt.h @@ -24,26 +24,29 @@ do { \ __rt_rwlock_init(rwl, #rwl, &__key); \ } while (0) -extern void rt_read_lock(rwlock_t *rwlock) __acquires(rwlock); -extern int rt_read_trylock(rwlock_t *rwlock); -extern void rt_read_unlock(rwlock_t *rwlock) __releases(rwlock); +extern void rt_read_lock(rwlock_t *rwlock) __acquires_shared(rwlock); +extern int rt_read_trylock(rwlock_t *rwlock) __cond_acquires_shared(true, rwlock); +extern void rt_read_unlock(rwlock_t *rwlock) __releases_shared(rwlock); extern void rt_write_lock(rwlock_t *rwlock) __acquires(rwlock); extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(rwlock); -extern int rt_write_trylock(rwlock_t *rwlock); +extern int rt_write_trylock(rwlock_t *rwlock) __cond_acquires(true, rwlock); extern void rt_write_unlock(rwlock_t *rwlock) __releases(rwlock); static __always_inline void read_lock(rwlock_t *rwlock) + __acquires_shared(rwlock) { rt_read_lock(rwlock); } static __always_inline void read_lock_bh(rwlock_t *rwlock) + __acquires_shared(rwlock) { local_bh_disable(); rt_read_lock(rwlock); } static __always_inline void read_lock_irq(rwlock_t *rwlock) + __acquires_shared(rwlock) { rt_read_lock(rwlock); } @@ -55,37 +58,43 @@ static __always_inline void read_lock_irq(rwlock_t *rwlock) flags = 0; \ } while (0) -#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) +#define read_trylock(lock) rt_read_trylock(lock) static __always_inline void read_unlock(rwlock_t *rwlock) + __releases_shared(rwlock) { rt_read_unlock(rwlock); } static __always_inline void read_unlock_bh(rwlock_t *rwlock) + __releases_shared(rwlock) { rt_read_unlock(rwlock); local_bh_enable(); } static __always_inline void read_unlock_irq(rwlock_t *rwlock) + __releases_shared(rwlock) { rt_read_unlock(rwlock); } static __always_inline void read_unlock_irqrestore(rwlock_t *rwlock, unsigned long flags) + __releases_shared(rwlock) { rt_read_unlock(rwlock); } static __always_inline void write_lock(rwlock_t *rwlock) + __acquires(rwlock) { rt_write_lock(rwlock); } #ifdef CONFIG_DEBUG_LOCK_ALLOC static __always_inline void write_lock_nested(rwlock_t *rwlock, int subclass) + __acquires(rwlock) { rt_write_lock_nested(rwlock, subclass); } @@ -94,12 +103,14 @@ static __always_inline void write_lock_nested(rwlock_t *rwlock, int subclass) #endif static __always_inline void write_lock_bh(rwlock_t *rwlock) + __acquires(rwlock) { local_bh_disable(); rt_write_lock(rwlock); } static __always_inline void write_lock_irq(rwlock_t *rwlock) + __acquires(rwlock) { rt_write_lock(rwlock); } @@ -111,36 +122,38 @@ static __always_inline void write_lock_irq(rwlock_t *rwlock) flags = 0; \ } while (0) -#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) +#define write_trylock(lock) rt_write_trylock(lock) -#define write_trylock_irqsave(lock, flags) \ -({ \ - int __locked; \ - \ - typecheck(unsigned long, flags); \ - flags = 0; \ - __locked = write_trylock(lock); \ - __locked; \ -}) +static __always_inline bool _write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) + __cond_acquires(true, rwlock) +{ + *flags = 0; + return rt_write_trylock(rwlock); +} +#define write_trylock_irqsave(lock, flags) _write_trylock_irqsave(lock, &(flags)) static __always_inline void write_unlock(rwlock_t *rwlock) + __releases(rwlock) { rt_write_unlock(rwlock); } static __always_inline void write_unlock_bh(rwlock_t *rwlock) + __releases(rwlock) { rt_write_unlock(rwlock); local_bh_enable(); } static __always_inline void write_unlock_irq(rwlock_t *rwlock) + __releases(rwlock) { rt_write_unlock(rwlock); } static __always_inline void write_unlock_irqrestore(rwlock_t *rwlock, unsigned long flags) + __releases(rwlock) { rt_write_unlock(rwlock); } diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index 1948442e7750..d5e7316401e7 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -22,7 +22,7 @@ * portions Copyright 2005, Red Hat, Inc., Ingo Molnar * Released under the General Public License (GPL). */ -typedef struct { +context_lock_struct(rwlock) { arch_rwlock_t raw_lock; #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; @@ -31,7 +31,8 @@ typedef struct { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif -} rwlock_t; +}; +typedef struct rwlock rwlock_t; #define RWLOCK_MAGIC 0xdeaf1eed @@ -54,13 +55,14 @@ typedef struct { #include <linux/rwbase_rt.h> -typedef struct { +context_lock_struct(rwlock) { struct rwbase_rt rwbase; atomic_t readers; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif -} rwlock_t; +}; +typedef struct rwlock rwlock_t; #define __RWLOCK_RT_INITIALIZER(name) \ { \ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index f1aaf676a874..9bf1d93d3d7b 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -45,7 +45,7 @@ * reduce the chance that they will share the same cacheline causing * cacheline bouncing problem. */ -struct rw_semaphore { +context_lock_struct(rw_semaphore) { atomic_long_t count; /* * Write owner or one of the read owners as well flags regarding @@ -76,11 +76,13 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) } static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) + __assumes_ctx_lock(sem) { WARN_ON(atomic_long_read(&sem->count) == RWSEM_UNLOCKED_VALUE); } static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) + __assumes_ctx_lock(sem) { WARN_ON(!(atomic_long_read(&sem->count) & RWSEM_WRITER_LOCKED)); } @@ -148,7 +150,7 @@ extern bool is_rwsem_reader_owned(struct rw_semaphore *sem); #include <linux/rwbase_rt.h> -struct rw_semaphore { +context_lock_struct(rw_semaphore) { struct rwbase_rt rwbase; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -180,11 +182,13 @@ static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem) } static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) + __assumes_ctx_lock(sem) { WARN_ON(!rwsem_is_locked(sem)); } static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) + __assumes_ctx_lock(sem) { WARN_ON(!rw_base_is_write_locked(&sem->rwbase)); } @@ -202,6 +206,7 @@ static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) */ static inline void rwsem_assert_held(const struct rw_semaphore *sem) + __assumes_ctx_lock(sem) { if (IS_ENABLED(CONFIG_LOCKDEP)) lockdep_assert_held(sem); @@ -210,6 +215,7 @@ static inline void rwsem_assert_held(const struct rw_semaphore *sem) } static inline void rwsem_assert_held_write(const struct rw_semaphore *sem) + __assumes_ctx_lock(sem) { if (IS_ENABLED(CONFIG_LOCKDEP)) lockdep_assert_held_write(sem); @@ -220,48 +226,66 @@ static inline void rwsem_assert_held_write(const struct rw_semaphore *sem) /* * lock for reading */ -extern void down_read(struct rw_semaphore *sem); -extern int __must_check down_read_interruptible(struct rw_semaphore *sem); -extern int __must_check down_read_killable(struct rw_semaphore *sem); +extern void down_read(struct rw_semaphore *sem) __acquires_shared(sem); +extern int __must_check down_read_interruptible(struct rw_semaphore *sem) __cond_acquires_shared(0, sem); +extern int __must_check down_read_killable(struct rw_semaphore *sem) __cond_acquires_shared(0, sem); /* * trylock for reading -- returns 1 if successful, 0 if contention */ -extern int down_read_trylock(struct rw_semaphore *sem); +extern int down_read_trylock(struct rw_semaphore *sem) __cond_acquires_shared(true, sem); /* * lock for writing */ -extern void down_write(struct rw_semaphore *sem); -extern int __must_check down_write_killable(struct rw_semaphore *sem); +extern void down_write(struct rw_semaphore *sem) __acquires(sem); +extern int __must_check down_write_killable(struct rw_semaphore *sem) __cond_acquires(0, sem); /* * trylock for writing -- returns 1 if successful, 0 if contention */ -extern int down_write_trylock(struct rw_semaphore *sem); +extern int down_write_trylock(struct rw_semaphore *sem) __cond_acquires(true, sem); /* * release a read lock */ -extern void up_read(struct rw_semaphore *sem); +extern void up_read(struct rw_semaphore *sem) __releases_shared(sem); /* * release a write lock */ -extern void up_write(struct rw_semaphore *sem); - -DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) -DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) -DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T), _RET == 0) - -DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) -DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) -DEFINE_GUARD_COND(rwsem_write, _kill, down_write_killable(_T), _RET == 0) +extern void up_write(struct rw_semaphore *sem) __releases(sem); + +DEFINE_LOCK_GUARD_1(rwsem_read, struct rw_semaphore, down_read(_T->lock), up_read(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(rwsem_read, _try, down_read_trylock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(rwsem_read, _intr, down_read_interruptible(_T->lock), _RET == 0) + +DECLARE_LOCK_GUARD_1_ATTRS(rwsem_read, __acquires_shared(_T), __releases_shared(*(struct rw_semaphore **)_T)) +#define class_rwsem_read_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rwsem_read, _T) +DECLARE_LOCK_GUARD_1_ATTRS(rwsem_read_try, __acquires_shared(_T), __releases_shared(*(struct rw_semaphore **)_T)) +#define class_rwsem_read_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rwsem_read_try, _T) +DECLARE_LOCK_GUARD_1_ATTRS(rwsem_read_intr, __acquires_shared(_T), __releases_shared(*(struct rw_semaphore **)_T)) +#define class_rwsem_read_intr_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rwsem_read_intr, _T) + +DEFINE_LOCK_GUARD_1(rwsem_write, struct rw_semaphore, down_write(_T->lock), up_write(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(rwsem_write, _try, down_write_trylock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(rwsem_write, _kill, down_write_killable(_T->lock), _RET == 0) + +DECLARE_LOCK_GUARD_1_ATTRS(rwsem_write, __acquires(_T), __releases(*(struct rw_semaphore **)_T)) +#define class_rwsem_write_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rwsem_write, _T) +DECLARE_LOCK_GUARD_1_ATTRS(rwsem_write_try, __acquires(_T), __releases(*(struct rw_semaphore **)_T)) +#define class_rwsem_write_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rwsem_write_try, _T) +DECLARE_LOCK_GUARD_1_ATTRS(rwsem_write_kill, __acquires(_T), __releases(*(struct rw_semaphore **)_T)) +#define class_rwsem_write_kill_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rwsem_write_kill, _T) + +DEFINE_LOCK_GUARD_1(rwsem_init, struct rw_semaphore, init_rwsem(_T->lock), /* */) +DECLARE_LOCK_GUARD_1_ATTRS(rwsem_init, __acquires(_T), __releases(*(struct rw_semaphore **)_T)) +#define class_rwsem_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rwsem_init, _T) /* * downgrade write lock to read lock */ -extern void downgrade_write(struct rw_semaphore *sem); +extern void downgrade_write(struct rw_semaphore *sem) __releases(sem) __acquires_shared(sem); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -277,11 +301,11 @@ extern void downgrade_write(struct rw_semaphore *sem); * lockdep_set_class() at lock initialization time. * See Documentation/locking/lockdep-design.rst for more details.) */ -extern void down_read_nested(struct rw_semaphore *sem, int subclass); -extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass); -extern void down_write_nested(struct rw_semaphore *sem, int subclass); -extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass); -extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); +extern void down_read_nested(struct rw_semaphore *sem, int subclass) __acquires_shared(sem); +extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass) __cond_acquires_shared(0, sem); +extern void down_write_nested(struct rw_semaphore *sem, int subclass) __acquires(sem); +extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass) __cond_acquires(0, sem); +extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock) __acquires(sem); # define down_write_nest_lock(sem, nest_lock) \ do { \ @@ -295,8 +319,8 @@ do { \ * [ This API should be avoided as much as possible - the * proper abstraction for this case is completions. ] */ -extern void down_read_non_owner(struct rw_semaphore *sem); -extern void up_read_non_owner(struct rw_semaphore *sem); +extern void down_read_non_owner(struct rw_semaphore *sem) __acquires_shared(sem); +extern void up_read_non_owner(struct rw_semaphore *sem) __releases_shared(sem); #else # define down_read_nested(sem, subclass) down_read(sem) # define down_read_killable_nested(sem, subclass) down_read_killable(sem) diff --git a/include/linux/sched.h b/include/linux/sched.h index da0133524d08..36ae08ca0c62 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -586,15 +586,10 @@ struct sched_entity { u64 sum_exec_runtime; u64 prev_sum_exec_runtime; u64 vruntime; - union { - /* - * When !@on_rq this field is vlag. - * When cfs_rq->curr == se (which implies @on_rq) - * this field is vprot. See protect_slice(). - */ - s64 vlag; - u64 vprot; - }; + /* Approximated virtual lag: */ + s64 vlag; + /* 'Protected' deadline, to give out minimum quantums: */ + u64 vprot; u64 slice; u64 nr_migrations; @@ -945,11 +940,7 @@ struct task_struct { #ifdef CONFIG_TASKS_TRACE_RCU int trc_reader_nesting; - int trc_ipi_to_cpu; - union rcu_special trc_reader_special; - struct list_head trc_holdout_list; - struct list_head trc_blkd_node; - int trc_blkd_cpu; + struct srcu_ctr __percpu *trc_reader_scp; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ struct sched_info sched_info; @@ -960,7 +951,6 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; - struct address_space *faults_disabled_mapping; int exit_state; int exit_code; @@ -1190,6 +1180,7 @@ struct task_struct { #ifdef CONFIG_IO_URING struct io_uring_task *io_uring; + struct io_restriction *io_uring_restrict; #endif /* Namespaces: */ @@ -1776,6 +1767,11 @@ static __always_inline bool is_percpu_thread(void) (current->nr_cpus_allowed == 1); } +static __always_inline bool is_user_task(struct task_struct *task) +{ + return task->mm && !(task->flags & (PF_KTHREAD | PF_USER_WORKER)); +} + /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ @@ -2093,9 +2089,9 @@ static inline int _cond_resched(void) _cond_resched(); \ }) -extern int __cond_resched_lock(spinlock_t *lock); -extern int __cond_resched_rwlock_read(rwlock_t *lock); -extern int __cond_resched_rwlock_write(rwlock_t *lock); +extern int __cond_resched_lock(spinlock_t *lock) __must_hold(lock); +extern int __cond_resched_rwlock_read(rwlock_t *lock) __must_hold_shared(lock); +extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock); #define MIGHT_RESCHED_RCU_SHIFT 8 #define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 5f8fd5b24a2e..e90efaf6d26e 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -2,6 +2,7 @@ #ifndef _LINUX_SCHED_CPUTIME_H #define _LINUX_SCHED_CPUTIME_H +#include <linux/static_call_types.h> #include <linux/sched/signal.h> /* @@ -180,4 +181,21 @@ static inline void prev_cputime_init(struct prev_cputime *prev) extern unsigned long long task_sched_runtime(struct task_struct *task); +#ifdef CONFIG_PARAVIRT +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +#ifdef CONFIG_HAVE_PV_STEAL_CLOCK_GEN +u64 dummy_steal_clock(int cpu); + +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return static_call(pv_steal_clock)(cpu); +} +#endif +#endif + #endif /* _LINUX_SCHED_CPUTIME_H */ diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index d8501f4709b5..dc3975ff1b2e 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -2,13 +2,21 @@ #define _LINUX_SCHED_ISOLATION_H #include <linux/cpumask.h> -#include <linux/cpuset.h> #include <linux/init.h> #include <linux/tick.h> enum hk_type { + /* Inverse of boot-time isolcpus= argument */ + HK_TYPE_DOMAIN_BOOT, + /* + * Same as HK_TYPE_DOMAIN_BOOT but also includes the + * inverse of cpuset isolated partitions. As such it + * is always a subset of HK_TYPE_DOMAIN_BOOT. + */ HK_TYPE_DOMAIN, + /* Inverse of boot-time isolcpus=managed_irq argument */ HK_TYPE_MANAGED_IRQ, + /* Inverse of boot-time nohz_full= or isolcpus=nohz arguments */ HK_TYPE_KERNEL_NOISE, HK_TYPE_MAX, @@ -31,6 +39,7 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type); extern bool housekeeping_enabled(enum hk_type type); extern void housekeeping_affine(struct task_struct *t, enum hk_type type); extern bool housekeeping_test_cpu(int cpu, enum hk_type type); +extern int housekeeping_update(struct cpumask *isol_mask); extern void __init housekeeping_init(void); #else @@ -58,6 +67,7 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type) return true; } +static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ @@ -72,9 +82,7 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type) static inline bool cpu_is_isolated(int cpu) { - return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) || - !housekeeping_test_cpu(cpu, HK_TYPE_TICK) || - cpuset_cpu_is_isolated(cpu); + return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN); } #endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7d6449982822..a22248aebcf9 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -737,21 +737,13 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, - unsigned long *flags); - -static inline struct sighand_struct *lock_task_sighand(struct task_struct *task, - unsigned long *flags) -{ - struct sighand_struct *ret; - - ret = __lock_task_sighand(task, flags); - (void)__cond_lock(&task->sighand->siglock, ret); - return ret; -} +extern struct sighand_struct *lock_task_sighand(struct task_struct *task, + unsigned long *flags) + __acquires(&task->sighand->siglock); static inline void unlock_task_sighand(struct task_struct *task, unsigned long *flags) + __releases(&task->sighand->siglock) { spin_unlock_irqrestore(&task->sighand->siglock, *flags); } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 525aa2a632b2..41ed884cffc9 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -214,15 +214,19 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * write_lock_irq(&tasklist_lock), neither inside nor outside. */ static inline void task_lock(struct task_struct *p) + __acquires(&p->alloc_lock) { spin_lock(&p->alloc_lock); } static inline void task_unlock(struct task_struct *p) + __releases(&p->alloc_lock) { spin_unlock(&p->alloc_lock); } -DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T)) +DEFINE_LOCK_GUARD_1(task_lock, struct task_struct, task_lock(_T->lock), task_unlock(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(task_lock, __acquires(&_T->alloc_lock), __releases(&(*(struct task_struct **)_T)->alloc_lock)) +#define class_task_lock_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(task_lock, _T) #endif /* _LINUX_SCHED_TASK_H */ diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 0f28b4623ad4..765bbc3d54be 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -66,6 +66,7 @@ extern void wake_up_q(struct wake_q_head *head); /* Spin unlock helpers to unlock and call wake_up_q with preempt disabled */ static inline void raw_spin_unlock_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) + __releases(lock) { guard(preempt)(); raw_spin_unlock(lock); @@ -77,6 +78,7 @@ void raw_spin_unlock_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) static inline void raw_spin_unlock_irq_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) + __releases(lock) { guard(preempt)(); raw_spin_unlock_irq(lock); @@ -89,6 +91,7 @@ void raw_spin_unlock_irq_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) static inline void raw_spin_unlock_irqrestore_wake(raw_spinlock_t *lock, unsigned long flags, struct wake_q_head *wake_q) + __releases(lock) { guard(preempt)(); raw_spin_unlock_irqrestore(lock, flags); diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h index 27bd372cbfb1..2407d7693b6b 100644 --- a/include/linux/scmi_imx_protocol.h +++ b/include/linux/scmi_imx_protocol.h @@ -59,6 +59,8 @@ struct scmi_imx_misc_proto_ops { u32 *num, u32 *val); int (*misc_ctrl_req_notify)(const struct scmi_protocol_handle *ph, u32 ctrl_id, u32 evt_id, u32 flags); + int (*misc_syslog)(const struct scmi_protocol_handle *ph, u16 *size, + void *array); }; /* See LMM_ATTRIBUTES in imx95.rst */ diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 1690706206e8..c022403c599a 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -151,6 +151,4 @@ static inline struct pci_dev *screen_info_pci_dev(const struct screen_info *si) } #endif -extern struct screen_info screen_info; - #endif /* _SCREEN_INFO_H */ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 221123660e71..5a40252b8334 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -14,6 +14,7 @@ */ #include <linux/compiler.h> +#include <linux/cleanup.h> #include <linux/kcsan-checks.h> #include <linux/lockdep.h> #include <linux/mutex.h> @@ -832,6 +833,7 @@ static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s) * Return: count, to be passed to read_seqretry() */ static inline unsigned read_seqbegin(const seqlock_t *sl) + __acquires_shared(sl) __no_context_analysis { return read_seqcount_begin(&sl->seqcount); } @@ -848,6 +850,7 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) * Return: true if a read section retry is required, else false */ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) + __releases_shared(sl) __no_context_analysis { return read_seqcount_retry(&sl->seqcount, start); } @@ -872,6 +875,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) * _irqsave or _bh variants of this function instead. */ static inline void write_seqlock(seqlock_t *sl) + __acquires(sl) __no_context_analysis { spin_lock(&sl->lock); do_write_seqcount_begin(&sl->seqcount.seqcount); @@ -885,6 +889,7 @@ static inline void write_seqlock(seqlock_t *sl) * critical section of given seqlock_t. */ static inline void write_sequnlock(seqlock_t *sl) + __releases(sl) __no_context_analysis { do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock(&sl->lock); @@ -898,6 +903,7 @@ static inline void write_sequnlock(seqlock_t *sl) * other write side sections, can be invoked from softirq contexts. */ static inline void write_seqlock_bh(seqlock_t *sl) + __acquires(sl) __no_context_analysis { spin_lock_bh(&sl->lock); do_write_seqcount_begin(&sl->seqcount.seqcount); @@ -912,6 +918,7 @@ static inline void write_seqlock_bh(seqlock_t *sl) * write_seqlock_bh(). */ static inline void write_sequnlock_bh(seqlock_t *sl) + __releases(sl) __no_context_analysis { do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_bh(&sl->lock); @@ -925,6 +932,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl) * other write sections, can be invoked from hardirq contexts. */ static inline void write_seqlock_irq(seqlock_t *sl) + __acquires(sl) __no_context_analysis { spin_lock_irq(&sl->lock); do_write_seqcount_begin(&sl->seqcount.seqcount); @@ -938,12 +946,14 @@ static inline void write_seqlock_irq(seqlock_t *sl) * seqlock_t write side section opened with write_seqlock_irq(). */ static inline void write_sequnlock_irq(seqlock_t *sl) + __releases(sl) __no_context_analysis { do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_irq(&sl->lock); } static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) + __acquires(sl) __no_context_analysis { unsigned long flags; @@ -976,6 +986,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) */ static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) + __releases(sl) __no_context_analysis { do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_irqrestore(&sl->lock, flags); @@ -998,6 +1009,7 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) * The opened read section must be closed with read_sequnlock_excl(). */ static inline void read_seqlock_excl(seqlock_t *sl) + __acquires_shared(sl) __no_context_analysis { spin_lock(&sl->lock); } @@ -1007,6 +1019,7 @@ static inline void read_seqlock_excl(seqlock_t *sl) * @sl: Pointer to seqlock_t */ static inline void read_sequnlock_excl(seqlock_t *sl) + __releases_shared(sl) __no_context_analysis { spin_unlock(&sl->lock); } @@ -1021,6 +1034,7 @@ static inline void read_sequnlock_excl(seqlock_t *sl) * from softirq contexts. */ static inline void read_seqlock_excl_bh(seqlock_t *sl) + __acquires_shared(sl) __no_context_analysis { spin_lock_bh(&sl->lock); } @@ -1031,6 +1045,7 @@ static inline void read_seqlock_excl_bh(seqlock_t *sl) * @sl: Pointer to seqlock_t */ static inline void read_sequnlock_excl_bh(seqlock_t *sl) + __releases_shared(sl) __no_context_analysis { spin_unlock_bh(&sl->lock); } @@ -1045,6 +1060,7 @@ static inline void read_sequnlock_excl_bh(seqlock_t *sl) * hardirq context. */ static inline void read_seqlock_excl_irq(seqlock_t *sl) + __acquires_shared(sl) __no_context_analysis { spin_lock_irq(&sl->lock); } @@ -1055,11 +1071,13 @@ static inline void read_seqlock_excl_irq(seqlock_t *sl) * @sl: Pointer to seqlock_t */ static inline void read_sequnlock_excl_irq(seqlock_t *sl) + __releases_shared(sl) __no_context_analysis { spin_unlock_irq(&sl->lock); } static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) + __acquires_shared(sl) __no_context_analysis { unsigned long flags; @@ -1089,6 +1107,7 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) */ static inline void read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) + __releases_shared(sl) __no_context_analysis { spin_unlock_irqrestore(&sl->lock, flags); } @@ -1125,6 +1144,7 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) * parameter of the next read_seqbegin_or_lock() iteration. */ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) + __acquires_shared(lock) __no_context_analysis { if (!(*seq & 1)) /* Even */ *seq = read_seqbegin(lock); @@ -1140,6 +1160,7 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) * Return: true if a read section retry is required, false otherwise */ static inline int need_seqretry(seqlock_t *lock, int seq) + __releases_shared(lock) __no_context_analysis { return !(seq & 1) && read_seqretry(lock, seq); } @@ -1153,6 +1174,7 @@ static inline int need_seqretry(seqlock_t *lock, int seq) * with read_seqbegin_or_lock() and validated by need_seqretry(). */ static inline void done_seqretry(seqlock_t *lock, int seq) + __no_context_analysis { if (seq & 1) read_sequnlock_excl(lock); @@ -1180,6 +1202,7 @@ static inline void done_seqretry(seqlock_t *lock, int seq) */ static inline unsigned long read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) + __acquires_shared(lock) __no_context_analysis { unsigned long flags = 0; @@ -1205,6 +1228,7 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) */ static inline void done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) + __no_context_analysis { if (seq & 1) read_sequnlock_excl_irqrestore(lock, flags); @@ -1225,6 +1249,7 @@ struct ss_tmp { }; static __always_inline void __scoped_seqlock_cleanup(struct ss_tmp *sst) + __no_context_analysis { if (sst->lock) spin_unlock(sst->lock); @@ -1254,6 +1279,7 @@ extern void __scoped_seqlock_bug(void); static __always_inline void __scoped_seqlock_next(struct ss_tmp *sst, seqlock_t *lock, enum ss_state target) + __no_context_analysis { switch (sst->state) { case ss_done: @@ -1296,22 +1322,31 @@ __scoped_seqlock_next(struct ss_tmp *sst, seqlock_t *lock, enum ss_state target) } } +/* + * Context analysis no-op helper to release seqlock at the end of the for-scope; + * the alias analysis of the compiler will recognize that the pointer @s is an + * alias to @_seqlock passed to read_seqbegin(_seqlock) below. + */ +static __always_inline void __scoped_seqlock_cleanup_ctx(struct ss_tmp **s) + __releases_shared(*((seqlock_t **)s)) __no_context_analysis {} + #define __scoped_seqlock_read(_seqlock, _target, _s) \ for (struct ss_tmp _s __cleanup(__scoped_seqlock_cleanup) = \ - { .state = ss_lockless, .data = read_seqbegin(_seqlock) }; \ + { .state = ss_lockless, .data = read_seqbegin(_seqlock) }, \ + *__UNIQUE_ID(ctx) __cleanup(__scoped_seqlock_cleanup_ctx) =\ + (struct ss_tmp *)_seqlock; \ _s.state != ss_done; \ __scoped_seqlock_next(&_s, _seqlock, _target)) /** - * scoped_seqlock_read (lock, ss_state) - execute the read side critical - * section without manual sequence - * counter handling or calls to other - * helpers - * @lock: pointer to seqlock_t protecting the data - * @ss_state: one of {ss_lock, ss_lock_irqsave, ss_lockless} indicating - * the type of critical read section + * scoped_seqlock_read() - execute the read-side critical section + * without manual sequence counter handling + * or calls to other helpers + * @_seqlock: pointer to seqlock_t protecting the data + * @_target: an enum ss_state: one of {ss_lock, ss_lock_irqsave, ss_lockless} + * indicating the type of critical read section * - * Example: + * Example:: * * scoped_seqlock_read (&lock, ss_lock) { * // read-side critical section @@ -1323,4 +1358,8 @@ __scoped_seqlock_next(struct ss_tmp *sst, seqlock_t *lock, enum ss_state target) #define scoped_seqlock_read(_seqlock, _target) \ __scoped_seqlock_read(_seqlock, _target, __UNIQUE_ID(seqlock)) +DEFINE_LOCK_GUARD_1(seqlock_init, seqlock_t, seqlock_init(_T->lock), /* */) +DECLARE_LOCK_GUARD_1_ATTRS(seqlock_init, __acquires(_T), __releases(*(seqlock_t **)_T)) +#define class_seqlock_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(seqlock_init, _T) + #endif /* __LINUX_SEQLOCK_H */ diff --git a/include/linux/seqlock_types.h b/include/linux/seqlock_types.h index dfdf43e3fa3d..2d5d793ef660 100644 --- a/include/linux/seqlock_types.h +++ b/include/linux/seqlock_types.h @@ -81,13 +81,14 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) * - Comments on top of seqcount_t * - Documentation/locking/seqlock.rst */ -typedef struct { +context_lock_struct(seqlock) { /* * Make sure that readers don't starve writers on PREEMPT_RT: use * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK(). */ seqcount_spinlock_t seqcount; spinlock_t lock; -} seqlock_t; +}; +typedef struct seqlock seqlock_t; #endif /* __LINUX_SEQLOCK_TYPES_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 86737076101d..112e48970338 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4301,6 +4301,18 @@ skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) skb_headlen(skb), buffer); } +/* Variant of skb_header_pointer() where @offset is user-controlled + * and potentially negative. + */ +static inline void * __must_check +skb_header_pointer_careful(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + if (unlikely(offset < 0 && -offset > skb_headroom(skb))) + return NULL; + return skb_header_pointer(skb, offset, len, buffer); +} + static inline void * __must_check skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len) { diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 49847888c287..829b281d6c9c 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -97,6 +97,8 @@ struct sk_psock { struct sk_buff_head ingress_skb; struct list_head ingress_msg; spinlock_t ingress_lock; + /** @msg_tot_len: Total bytes queued in ingress_msg list. */ + u32 msg_tot_len; unsigned long state; struct list_head link; spinlock_t link_lock; @@ -141,6 +143,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); +int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + int len, int flags, int *copied_from_self); bool sk_msg_is_readable(struct sock *sk); static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) @@ -319,6 +323,27 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } +static inline u32 sk_psock_get_msg_len_nolock(struct sk_psock *psock) +{ + /* Used by ioctl to read msg_tot_len only; lock-free for performance */ + return READ_ONCE(psock->msg_tot_len); +} + +static inline void sk_psock_msg_len_add_locked(struct sk_psock *psock, int diff) +{ + /* Use WRITE_ONCE to ensure correct read in sk_psock_get_msg_len_nolock(). + * ingress_lock should be held to prevent concurrent updates to msg_tot_len + */ + WRITE_ONCE(psock->msg_tot_len, psock->msg_tot_len + diff); +} + +static inline void sk_psock_msg_len_add(struct sk_psock *psock, int diff) +{ + spin_lock_bh(&psock->ingress_lock); + sk_psock_msg_len_add_locked(psock, diff); + spin_unlock_bh(&psock->ingress_lock); +} + static inline bool sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { @@ -327,6 +352,7 @@ static inline bool sk_psock_queue_msg(struct sk_psock *psock, spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { list_add_tail(&msg->list, &psock->ingress_msg); + sk_psock_msg_len_add_locked(psock, msg->sg.size); ret = true; } else { sk_msg_free(psock->sk, msg); @@ -343,18 +369,25 @@ static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) spin_lock_bh(&psock->ingress_lock); msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); - if (msg) + if (msg) { list_del(&msg->list); + sk_psock_msg_len_add_locked(psock, -msg->sg.size); + } spin_unlock_bh(&psock->ingress_lock); return msg; } +static inline struct sk_msg *sk_psock_peek_msg_locked(struct sk_psock *psock) +{ + return list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); +} + static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock) { struct sk_msg *msg; spin_lock_bh(&psock->ingress_lock); - msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); + msg = sk_psock_peek_msg_locked(psock); spin_unlock_bh(&psock->ingress_lock); return msg; } @@ -521,6 +554,39 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } +/* for tcp only, sk is locked */ +static inline ssize_t sk_psock_msg_inq(struct sock *sk) +{ + struct sk_psock *psock; + ssize_t inq = 0; + + psock = sk_psock_get(sk); + if (likely(psock)) { + inq = sk_psock_get_msg_len_nolock(psock); + sk_psock_put(sk, psock); + } + return inq; +} + +/* for udp only, sk is not locked */ +static inline ssize_t sk_msg_first_len(struct sock *sk) +{ + struct sk_psock *psock; + struct sk_msg *msg; + ssize_t inq = 0; + + psock = sk_psock_get(sk); + if (likely(psock)) { + spin_lock_bh(&psock->ingress_lock); + msg = sk_psock_peek_msg_locked(psock); + if (msg) + inq = msg->sg.size; + spin_unlock_bh(&psock->ingress_lock); + sk_psock_put(sk, psock); + } + return inq; +} + #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/linux/slab.h b/include/linux/slab.h index 2482992248dc..7701b38cedec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -12,6 +12,7 @@ #ifndef _LINUX_SLAB_H #define _LINUX_SLAB_H +#include <linux/bug.h> #include <linux/cache.h> #include <linux/gfp.h> #include <linux/overflow.h> @@ -965,6 +966,111 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node); #define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__)) +/** + * __alloc_objs - Allocate objects of a given type using + * @KMALLOC: which size-based kmalloc wrapper to allocate with. + * @GFP: GFP flags for the allocation. + * @TYPE: type to allocate space for. + * @COUNT: how many @TYPE objects to allocate. + * + * Returns: Newly allocated pointer to (first) @TYPE of @COUNT-many + * allocated @TYPE objects, or NULL on failure. + */ +#define __alloc_objs(KMALLOC, GFP, TYPE, COUNT) \ +({ \ + const size_t __obj_size = size_mul(sizeof(TYPE), COUNT); \ + (TYPE *)KMALLOC(__obj_size, GFP); \ +}) + +/** + * __alloc_flex - Allocate an object that has a trailing flexible array + * @KMALLOC: kmalloc wrapper function to use for allocation. + * @GFP: GFP flags for the allocation. + * @TYPE: type of structure to allocate space for. + * @FAM: The name of the flexible array member of @TYPE structure. + * @COUNT: how many @FAM elements to allocate space for. + * + * Returns: Newly allocated pointer to @TYPE with @COUNT-many trailing + * @FAM elements, or NULL on failure or if @COUNT cannot be represented + * by the member of @TYPE that counts the @FAM elements (annotated via + * __counted_by()). + */ +#define __alloc_flex(KMALLOC, GFP, TYPE, FAM, COUNT) \ +({ \ + const size_t __count = (COUNT); \ + const size_t __obj_size = struct_size_t(TYPE, FAM, __count); \ + TYPE *__obj_ptr; \ + if (WARN_ON_ONCE(overflows_flex_counter_type(TYPE, FAM, __count))) \ + __obj_ptr = NULL; \ + else \ + __obj_ptr = KMALLOC(__obj_size, GFP); \ + if (__obj_ptr) \ + __set_flex_counter(__obj_ptr->FAM, __count); \ + __obj_ptr; \ +}) + +/** + * kmalloc_obj - Allocate a single instance of the given type + * @VAR_OR_TYPE: Variable or type to allocate. + * @GFP: GFP flags for the allocation. + * + * Returns: newly allocated pointer to a @VAR_OR_TYPE on success, or NULL + * on failure. + */ +#define kmalloc_obj(VAR_OR_TYPE, GFP) \ + __alloc_objs(kmalloc, GFP, typeof(VAR_OR_TYPE), 1) + +/** + * kmalloc_objs - Allocate an array of the given type + * @VAR_OR_TYPE: Variable or type to allocate an array of. + * @COUNT: How many elements in the array. + * @GFP: GFP flags for the allocation. + * + * Returns: newly allocated pointer to array of @VAR_OR_TYPE on success, + * or NULL on failure. + */ +#define kmalloc_objs(VAR_OR_TYPE, COUNT, GFP) \ + __alloc_objs(kmalloc, GFP, typeof(VAR_OR_TYPE), COUNT) + +/** + * kmalloc_flex - Allocate a single instance of the given flexible structure + * @VAR_OR_TYPE: Variable or type to allocate (with its flex array). + * @FAM: The name of the flexible array member of the structure. + * @COUNT: How many flexible array member elements are desired. + * @GFP: GFP flags for the allocation. + * + * Returns: newly allocated pointer to @VAR_OR_TYPE on success, NULL on + * failure. If @FAM has been annotated with __counted_by(), the allocation + * will immediately fail if @COUNT is larger than what the type of the + * struct's counter variable can represent. + */ +#define kmalloc_flex(VAR_OR_TYPE, FAM, COUNT, GFP) \ + __alloc_flex(kmalloc, GFP, typeof(VAR_OR_TYPE), FAM, COUNT) + +/* All kzalloc aliases for kmalloc_(obj|objs|flex). */ +#define kzalloc_obj(P, GFP) \ + __alloc_objs(kzalloc, GFP, typeof(P), 1) +#define kzalloc_objs(P, COUNT, GFP) \ + __alloc_objs(kzalloc, GFP, typeof(P), COUNT) +#define kzalloc_flex(P, FAM, COUNT, GFP) \ + __alloc_flex(kzalloc, GFP, typeof(P), FAM, COUNT) + +/* All kvmalloc aliases for kmalloc_(obj|objs|flex). */ +#define kvmalloc_obj(P, GFP) \ + __alloc_objs(kvmalloc, GFP, typeof(P), 1) +#define kvmalloc_objs(P, COUNT, GFP) \ + __alloc_objs(kvmalloc, GFP, typeof(P), COUNT) +#define kvmalloc_flex(P, FAM, COUNT, GFP) \ + __alloc_flex(kvmalloc, GFP, typeof(P), FAM, COUNT) + +/* All kvzalloc aliases for kmalloc_(obj|objs|flex). */ +#define kvzalloc_obj(P, GFP) \ + __alloc_objs(kvzalloc, GFP, typeof(P), 1) +#define kvzalloc_objs(P, COUNT, GFP) \ + __alloc_objs(kvzalloc, GFP, typeof(P), COUNT) +#define kvzalloc_flex(P, FAM, COUNT, GFP) \ + __alloc_flex(kvzalloc, GFP, typeof(P), FAM, COUNT) + #define kmem_buckets_alloc(_b, _size, _flags) \ alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index 736f53018017..bda3c528b515 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -126,6 +126,13 @@ int apple_rtkit_wake(struct apple_rtkit *rtk); int apple_rtkit_shutdown(struct apple_rtkit *rtk); /* + * Put the co-processor into the lowest power state. Note that it usually + * is not possible to recover from this state without a full SoC reset. + */ + +int apple_rtkit_poweroff(struct apple_rtkit *rtk); + +/* * Put the co-processor into idle mode */ int apple_rtkit_idle(struct apple_rtkit *rtk); diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 0c3906e8ad19..a06b5a61f337 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -23,6 +23,8 @@ #define CMDQ_THR_SPR_IDX2 (2) #define CMDQ_THR_SPR_IDX3 (3) +#define CMDQ_SUBSYS_INVALID (U8_MAX) + struct cmdq_pkt; enum cmdq_logic_op { @@ -52,8 +54,20 @@ struct cmdq_operand { struct cmdq_client_reg { u8 subsys; + phys_addr_t pa_base; u16 offset; u16 size; + + /* + * Client only uses these functions for MMIO access, + * so doesn't need to handle the mminfra_offset. + * The mminfra_offset is used for DRAM access and + * is handled internally by CMDQ APIs. + */ + int (*pkt_write)(struct cmdq_pkt *pkt, u8 subsys, u32 pa_base, + u16 offset, u32 value); + int (*pkt_write_mask)(struct cmdq_pkt *pkt, u8 subsys, u32 pa_base, + u16 offset, u32 value, u32 mask); }; struct cmdq_client { @@ -122,6 +136,32 @@ void cmdq_pkt_destroy(struct cmdq_client *client, struct cmdq_pkt *pkt); int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value); /** + * cmdq_pkt_write_pa() - append write command to the CMDQ packet with pa_base + * @pkt: the CMDQ packet + * @subsys: unused parameter + * @pa_base: the physical address base of the hardware register + * @offset: register offset from CMDQ sub system + * @value: the specified target register value + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_write_pa(struct cmdq_pkt *pkt, u8 subsys /*unused*/, + u32 pa_base, u16 offset, u32 value); + +/** + * cmdq_pkt_write_subsys() - append write command to the CMDQ packet with subsys + * @pkt: the CMDQ packet + * @subsys: the CMDQ sub system code + * @pa_base: unused parameter + * @offset: register offset from CMDQ sub system + * @value: the specified target register value + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_write_subsys(struct cmdq_pkt *pkt, u8 subsys, + u32 pa_base /*unused*/, u16 offset, u32 value); + +/** * cmdq_pkt_write_mask() - append write command with mask to the CMDQ packet * @pkt: the CMDQ packet * @subsys: the CMDQ sub system code @@ -134,6 +174,34 @@ int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value); int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value, u32 mask); +/** + * cmdq_pkt_write_mask_pa() - append write command with mask to the CMDQ packet with pa + * @pkt: the CMDQ packet + * @subsys: unused parameter + * @pa_base: the physical address base of the hardware register + * @offset: register offset from CMDQ sub system + * @value: the specified target register value + * @mask: the specified target register mask + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_write_mask_pa(struct cmdq_pkt *pkt, u8 subsys /*unused*/, + u32 pa_base, u16 offset, u32 value, u32 mask); + +/** + * cmdq_pkt_write_mask_subsys() - append write command with mask to the CMDQ packet with subsys + * @pkt: the CMDQ packet + * @subsys: the CMDQ sub system code + * @pa_base: unused parameter + * @offset: register offset from CMDQ sub system + * @value: the specified target register value + * @mask: the specified target register mask + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_write_mask_subsys(struct cmdq_pkt *pkt, u8 subsys, + u32 pa_base /*unused*/, u16 offset, u32 value, u32 mask); + /* * cmdq_pkt_read_s() - append read_s command to the CMDQ packet * @pkt: the CMDQ packet @@ -418,12 +486,37 @@ static inline int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u3 return -ENOENT; } +static inline int cmdq_pkt_write_pa(struct cmdq_pkt *pkt, u8 subsys /*unused*/, + u32 pa_base, u16 offset, u32 value) +{ + return -ENOENT; +} + +static inline int cmdq_pkt_write_subsys(struct cmdq_pkt *pkt, u8 subsys, + u32 pa_base /*unused*/, u16 offset, u32 value) +{ + return -ENOENT; +} + static inline int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value, u32 mask) { return -ENOENT; } +static inline int cmdq_pkt_write_mask_pa(struct cmdq_pkt *pkt, u8 subsys /*unused*/, + u32 pa_base, u16 offset, u32 value, u32 mask) +{ + return -ENOENT; +} + +static inline int cmdq_pkt_write_mask_subsys(struct cmdq_pkt *pkt, u8 subsys, + u32 pa_base /*unused*/, u16 offset, + u32 value, u32 mask) +{ + return -ENOENT; +} + static inline int cmdq_pkt_read_s(struct cmdq_pkt *pkt, u16 high_addr_reg_idx, u16 addr_low, u16 reg_idx) { diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 0287f9182c4d..8243ab3a12a8 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -74,13 +74,17 @@ #define LLCC_CAMSRTIP 73 #define LLCC_CAMRTRF 74 #define LLCC_CAMSRTRF 75 +#define LLCC_OOBM_NS 81 +#define LLCC_OOBM_S 82 #define LLCC_VIDEO_APV 83 #define LLCC_COMPUTE1 87 #define LLCC_CPUSS_OPP 88 #define LLCC_CPUSSMPAM 89 +#define LLCC_VIDSC_VSP1 91 #define LLCC_CAM_IPE_STROV 92 #define LLCC_CAM_OFE_STROV 93 #define LLCC_CPUSS_HEU 94 +#define LLCC_PCIE_TCU 97 #define LLCC_MDM_PNG_FIXED 100 /** diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index 8ea8230579a2..82372e0db0a1 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -10,19 +10,19 @@ struct device; struct firmware; -struct qcom_scm_pas_metadata; +struct qcom_scm_pas_context; #if IS_ENABLED(CONFIG_QCOM_MDT_LOADER) ssize_t qcom_mdt_get_size(const struct firmware *fw); -int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, - const char *fw_name, int pas_id, phys_addr_t mem_phys, - struct qcom_scm_pas_metadata *pas_metadata_ctx); int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base); +int qcom_mdt_pas_load(struct qcom_scm_pas_context *ctx, const struct firmware *fw, + const char *firmware, void *mem_region, phys_addr_t *reloc_base); + int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, const char *fw_name, void *mem_region, phys_addr_t mem_phys, size_t mem_size, @@ -37,13 +37,6 @@ static inline ssize_t qcom_mdt_get_size(const struct firmware *fw) return -ENODEV; } -static inline int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, - const char *fw_name, int pas_id, phys_addr_t mem_phys, - struct qcom_scm_pas_metadata *pas_metadata_ctx) -{ - return -ENODEV; -} - static inline int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, phys_addr_t mem_phys, @@ -52,6 +45,13 @@ static inline int qcom_mdt_load(struct device *dev, const struct firmware *fw, return -ENODEV; } +static inline int qcom_mdt_pas_load(struct qcom_scm_pas_context *ctx, + const struct firmware *fw, const char *firmware, + void *mem_region, phys_addr_t *reloc_base) +{ + return -ENODEV; +} + static inline int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, const char *fw_name, void *mem_region, diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h index 0a4edfe3d96d..f052e241736c 100644 --- a/include/linux/soc/qcom/ubwc.h +++ b/include/linux/soc/qcom/ubwc.h @@ -8,6 +8,7 @@ #define __QCOM_UBWC_H__ #include <linux/bits.h> +#include <linux/printk.h> #include <linux/types.h> struct qcom_ubwc_cfg_data { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d3561c4a080e..e1e2f144af9b 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -212,7 +212,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) * various methods are defined as nops in the case they are not * required. */ -#define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) +#define raw_spin_trylock(lock) _raw_spin_trylock(lock) #define raw_spin_lock(lock) _raw_spin_lock(lock) @@ -283,22 +283,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) } while (0) #define raw_spin_unlock_bh(lock) _raw_spin_unlock_bh(lock) -#define raw_spin_trylock_bh(lock) \ - __cond_lock(lock, _raw_spin_trylock_bh(lock)) +#define raw_spin_trylock_bh(lock) _raw_spin_trylock_bh(lock) -#define raw_spin_trylock_irq(lock) \ -({ \ - local_irq_disable(); \ - raw_spin_trylock(lock) ? \ - 1 : ({ local_irq_enable(); 0; }); \ -}) +#define raw_spin_trylock_irq(lock) _raw_spin_trylock_irq(lock) -#define raw_spin_trylock_irqsave(lock, flags) \ -({ \ - local_irq_save(flags); \ - raw_spin_trylock(lock) ? \ - 1 : ({ local_irq_restore(flags); 0; }); \ -}) +#define raw_spin_trylock_irqsave(lock, flags) _raw_spin_trylock_irqsave(lock, &(flags)) #ifndef CONFIG_PREEMPT_RT /* Include rwlock functions for !RT */ @@ -347,16 +336,19 @@ do { \ #endif static __always_inline void spin_lock(spinlock_t *lock) + __acquires(lock) __no_context_analysis { raw_spin_lock(&lock->rlock); } static __always_inline void spin_lock_bh(spinlock_t *lock) + __acquires(lock) __no_context_analysis { raw_spin_lock_bh(&lock->rlock); } static __always_inline int spin_trylock(spinlock_t *lock) + __cond_acquires(true, lock) __no_context_analysis { return raw_spin_trylock(&lock->rlock); } @@ -364,14 +356,17 @@ static __always_inline int spin_trylock(spinlock_t *lock) #define spin_lock_nested(lock, subclass) \ do { \ raw_spin_lock_nested(spinlock_check(lock), subclass); \ + __release(spinlock_check(lock)); __acquire(lock); \ } while (0) #define spin_lock_nest_lock(lock, nest_lock) \ do { \ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ + __release(spinlock_check(lock)); __acquire(lock); \ } while (0) static __always_inline void spin_lock_irq(spinlock_t *lock) + __acquires(lock) __no_context_analysis { raw_spin_lock_irq(&lock->rlock); } @@ -379,47 +374,57 @@ static __always_inline void spin_lock_irq(spinlock_t *lock) #define spin_lock_irqsave(lock, flags) \ do { \ raw_spin_lock_irqsave(spinlock_check(lock), flags); \ + __release(spinlock_check(lock)); __acquire(lock); \ } while (0) #define spin_lock_irqsave_nested(lock, flags, subclass) \ do { \ raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \ + __release(spinlock_check(lock)); __acquire(lock); \ } while (0) static __always_inline void spin_unlock(spinlock_t *lock) + __releases(lock) __no_context_analysis { raw_spin_unlock(&lock->rlock); } static __always_inline void spin_unlock_bh(spinlock_t *lock) + __releases(lock) __no_context_analysis { raw_spin_unlock_bh(&lock->rlock); } static __always_inline void spin_unlock_irq(spinlock_t *lock) + __releases(lock) __no_context_analysis { raw_spin_unlock_irq(&lock->rlock); } static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) + __releases(lock) __no_context_analysis { raw_spin_unlock_irqrestore(&lock->rlock, flags); } static __always_inline int spin_trylock_bh(spinlock_t *lock) + __cond_acquires(true, lock) __no_context_analysis { return raw_spin_trylock_bh(&lock->rlock); } static __always_inline int spin_trylock_irq(spinlock_t *lock) + __cond_acquires(true, lock) __no_context_analysis { return raw_spin_trylock_irq(&lock->rlock); } -#define spin_trylock_irqsave(lock, flags) \ -({ \ - raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ -}) +static __always_inline bool _spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) __no_context_analysis +{ + return raw_spin_trylock_irqsave(spinlock_check(lock), *flags); +} +#define spin_trylock_irqsave(lock, flags) _spin_trylock_irqsave(lock, &(flags)) /** * spin_is_locked() - Check whether a spinlock is locked. @@ -497,23 +502,17 @@ static inline int rwlock_needbreak(rwlock_t *lock) * Decrements @atomic by 1. If the result is 0, returns true and locks * @lock. Returns false for all other cases. */ -extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); -#define atomic_dec_and_lock(atomic, lock) \ - __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) __cond_acquires(true, lock); extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, - unsigned long *flags); -#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ - __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) + unsigned long *flags) __cond_acquires(true, lock); +#define atomic_dec_and_lock_irqsave(atomic, lock, flags) _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)) -extern int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock); -#define atomic_dec_and_raw_lock(atomic, lock) \ - __cond_lock(lock, _atomic_dec_and_raw_lock(atomic, lock)) +extern int atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) __cond_acquires(true, lock); extern int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, - unsigned long *flags); -#define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) \ - __cond_lock(lock, _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags))) + unsigned long *flags) __cond_acquires(true, lock); +#define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags)) int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, size_t max_size, unsigned int cpu_mult, @@ -535,86 +534,144 @@ void free_bucket_spinlocks(spinlock_t *locks); DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, raw_spin_lock(_T->lock), raw_spin_unlock(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock, _T) DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_try, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_try, _T) DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), raw_spin_unlock(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_nested, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_nested_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_nested, _T) DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, raw_spin_lock_irq(_T->lock), raw_spin_unlock_irq(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_irq, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_irq_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_irq, _T) DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_irq_try, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_irq_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_irq_try, _T) DEFINE_LOCK_GUARD_1(raw_spinlock_bh, raw_spinlock_t, raw_spin_lock_bh(_T->lock), raw_spin_unlock_bh(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_bh, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_bh_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_bh, _T) DEFINE_LOCK_GUARD_1_COND(raw_spinlock_bh, _try, raw_spin_trylock_bh(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_bh_try, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_bh_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_bh_try, _T) DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, raw_spin_lock_irqsave(_T->lock, _T->flags), raw_spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_irqsave, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_irqsave_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_irqsave, _T) DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try, raw_spin_trylock_irqsave(_T->lock, _T->flags)) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_irqsave_try, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_irqsave_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_irqsave_try, _T) + +DEFINE_LOCK_GUARD_1(raw_spinlock_init, raw_spinlock_t, raw_spin_lock_init(_T->lock), /* */) +DECLARE_LOCK_GUARD_1_ATTRS(raw_spinlock_init, __acquires(_T), __releases(*(raw_spinlock_t **)_T)) +#define class_raw_spinlock_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(raw_spinlock_init, _T) DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, spin_lock(_T->lock), spin_unlock(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock, _T) DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock_try, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock_try, _T) DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, spin_lock_irq(_T->lock), spin_unlock_irq(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock_irq, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_irq_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock_irq, _T) DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, spin_trylock_irq(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock_irq_try, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_irq_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock_irq_try, _T) DEFINE_LOCK_GUARD_1(spinlock_bh, spinlock_t, spin_lock_bh(_T->lock), spin_unlock_bh(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock_bh, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_bh_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock_bh, _T) DEFINE_LOCK_GUARD_1_COND(spinlock_bh, _try, spin_trylock_bh(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock_bh_try, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_bh_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock_bh_try, _T) DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, spin_lock_irqsave(_T->lock, _T->flags), spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock_irqsave, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_irqsave_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock_irqsave, _T) DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try, spin_trylock_irqsave(_T->lock, _T->flags)) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock_irqsave_try, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_irqsave_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock_irqsave_try, _T) + +DEFINE_LOCK_GUARD_1(spinlock_init, spinlock_t, spin_lock_init(_T->lock), /* */) +DECLARE_LOCK_GUARD_1_ATTRS(spinlock_init, __acquires(_T), __releases(*(spinlock_t **)_T)) +#define class_spinlock_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(spinlock_init, _T) DEFINE_LOCK_GUARD_1(read_lock, rwlock_t, read_lock(_T->lock), read_unlock(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(read_lock, __acquires(_T), __releases(*(rwlock_t **)_T)) +#define class_read_lock_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(read_lock, _T) DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t, read_lock_irq(_T->lock), read_unlock_irq(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(read_lock_irq, __acquires(_T), __releases(*(rwlock_t **)_T)) +#define class_read_lock_irq_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(read_lock_irq, _T) DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t, read_lock_irqsave(_T->lock, _T->flags), read_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DECLARE_LOCK_GUARD_1_ATTRS(read_lock_irqsave, __acquires(_T), __releases(*(rwlock_t **)_T)) +#define class_read_lock_irqsave_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(read_lock_irqsave, _T) DEFINE_LOCK_GUARD_1(write_lock, rwlock_t, write_lock(_T->lock), write_unlock(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(write_lock, __acquires(_T), __releases(*(rwlock_t **)_T)) +#define class_write_lock_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(write_lock, _T) DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t, write_lock_irq(_T->lock), write_unlock_irq(_T->lock)) +DECLARE_LOCK_GUARD_1_ATTRS(write_lock_irq, __acquires(_T), __releases(*(rwlock_t **)_T)) +#define class_write_lock_irq_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(write_lock_irq, _T) DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t, write_lock_irqsave(_T->lock, _T->flags), write_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DECLARE_LOCK_GUARD_1_ATTRS(write_lock_irqsave, __acquires(_T), __releases(*(rwlock_t **)_T)) +#define class_write_lock_irqsave_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(write_lock_irqsave, _T) + +DEFINE_LOCK_GUARD_1(rwlock_init, rwlock_t, rwlock_init(_T->lock), /* */) +DECLARE_LOCK_GUARD_1_ATTRS(rwlock_init, __acquires(_T), __releases(*(rwlock_t **)_T)) +#define class_rwlock_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(rwlock_init, _T) #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 9ecb0ab504e3..bda5e7a390cd 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -34,8 +34,8 @@ unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock) unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass) __acquires(lock); -int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock); -int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock); +int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock) __cond_acquires(true, lock); +int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock) __cond_acquires(true, lock); void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock) __releases(lock); void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) __releases(lock); void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock) __releases(lock); @@ -84,6 +84,7 @@ _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags) #endif static inline int __raw_spin_trylock(raw_spinlock_t *lock) + __cond_acquires(true, lock) { preempt_disable(); if (do_raw_spin_trylock(lock)) { @@ -94,6 +95,26 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) return 0; } +static __always_inline bool _raw_spin_trylock_irq(raw_spinlock_t *lock) + __cond_acquires(true, lock) +{ + local_irq_disable(); + if (_raw_spin_trylock(lock)) + return true; + local_irq_enable(); + return false; +} + +static __always_inline bool _raw_spin_trylock_irqsave(raw_spinlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + local_irq_save(*flags); + if (_raw_spin_trylock(lock)) + return true; + local_irq_restore(*flags); + return false; +} + /* * If lockdep is enabled then we use the non-preemption spin-ops * even on CONFIG_PREEMPTION, because lockdep assumes that interrupts are @@ -102,6 +123,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) + __acquires(lock) __no_context_analysis { unsigned long flags; @@ -113,6 +135,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) } static inline void __raw_spin_lock_irq(raw_spinlock_t *lock) + __acquires(lock) __no_context_analysis { local_irq_disable(); preempt_disable(); @@ -121,6 +144,7 @@ static inline void __raw_spin_lock_irq(raw_spinlock_t *lock) } static inline void __raw_spin_lock_bh(raw_spinlock_t *lock) + __acquires(lock) __no_context_analysis { __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); @@ -128,6 +152,7 @@ static inline void __raw_spin_lock_bh(raw_spinlock_t *lock) } static inline void __raw_spin_lock(raw_spinlock_t *lock) + __acquires(lock) __no_context_analysis { preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); @@ -137,6 +162,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */ static inline void __raw_spin_unlock(raw_spinlock_t *lock) + __releases(lock) { spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); @@ -145,6 +171,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags) + __releases(lock) { spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); @@ -153,6 +180,7 @@ static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock, } static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock) + __releases(lock) { spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); @@ -161,6 +189,7 @@ static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock) } static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock) + __releases(lock) { spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); @@ -168,6 +197,7 @@ static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock) } static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) + __cond_acquires(true, lock) { __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); if (do_raw_spin_trylock(lock)) { diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index 819aeba1c87e..a9d5c7c66e03 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -24,68 +24,120 @@ * flags straight, to suppress compiler warnings of unused lock * variables, and to add the proper checker annotations: */ -#define ___LOCK(lock) \ +#define ___LOCK_(lock) \ do { __acquire(lock); (void)(lock); } while (0) -#define __LOCK(lock) \ - do { preempt_disable(); ___LOCK(lock); } while (0) +#define ___LOCK_shared(lock) \ + do { __acquire_shared(lock); (void)(lock); } while (0) -#define __LOCK_BH(lock) \ - do { __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_LOCK_OFFSET); ___LOCK(lock); } while (0) +#define __LOCK(lock, ...) \ + do { preempt_disable(); ___LOCK_##__VA_ARGS__(lock); } while (0) -#define __LOCK_IRQ(lock) \ - do { local_irq_disable(); __LOCK(lock); } while (0) +#define __LOCK_BH(lock, ...) \ + do { __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_LOCK_OFFSET); ___LOCK_##__VA_ARGS__(lock); } while (0) -#define __LOCK_IRQSAVE(lock, flags) \ - do { local_irq_save(flags); __LOCK(lock); } while (0) +#define __LOCK_IRQ(lock, ...) \ + do { local_irq_disable(); __LOCK(lock, ##__VA_ARGS__); } while (0) -#define ___UNLOCK(lock) \ +#define __LOCK_IRQSAVE(lock, flags, ...) \ + do { local_irq_save(flags); __LOCK(lock, ##__VA_ARGS__); } while (0) + +#define ___UNLOCK_(lock) \ do { __release(lock); (void)(lock); } while (0) -#define __UNLOCK(lock) \ - do { preempt_enable(); ___UNLOCK(lock); } while (0) +#define ___UNLOCK_shared(lock) \ + do { __release_shared(lock); (void)(lock); } while (0) + +#define __UNLOCK(lock, ...) \ + do { preempt_enable(); ___UNLOCK_##__VA_ARGS__(lock); } while (0) -#define __UNLOCK_BH(lock) \ +#define __UNLOCK_BH(lock, ...) \ do { __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_LOCK_OFFSET); \ - ___UNLOCK(lock); } while (0) + ___UNLOCK_##__VA_ARGS__(lock); } while (0) -#define __UNLOCK_IRQ(lock) \ - do { local_irq_enable(); __UNLOCK(lock); } while (0) +#define __UNLOCK_IRQ(lock, ...) \ + do { local_irq_enable(); __UNLOCK(lock, ##__VA_ARGS__); } while (0) -#define __UNLOCK_IRQRESTORE(lock, flags) \ - do { local_irq_restore(flags); __UNLOCK(lock); } while (0) +#define __UNLOCK_IRQRESTORE(lock, flags, ...) \ + do { local_irq_restore(flags); __UNLOCK(lock, ##__VA_ARGS__); } while (0) #define _raw_spin_lock(lock) __LOCK(lock) #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) -#define _raw_read_lock(lock) __LOCK(lock) +#define _raw_read_lock(lock) __LOCK(lock, shared) #define _raw_write_lock(lock) __LOCK(lock) #define _raw_write_lock_nested(lock, subclass) __LOCK(lock) #define _raw_spin_lock_bh(lock) __LOCK_BH(lock) -#define _raw_read_lock_bh(lock) __LOCK_BH(lock) +#define _raw_read_lock_bh(lock) __LOCK_BH(lock, shared) #define _raw_write_lock_bh(lock) __LOCK_BH(lock) #define _raw_spin_lock_irq(lock) __LOCK_IRQ(lock) -#define _raw_read_lock_irq(lock) __LOCK_IRQ(lock) +#define _raw_read_lock_irq(lock) __LOCK_IRQ(lock, shared) #define _raw_write_lock_irq(lock) __LOCK_IRQ(lock) #define _raw_spin_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) -#define _raw_read_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) +#define _raw_read_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags, shared) #define _raw_write_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) -#define _raw_spin_trylock(lock) ({ __LOCK(lock); 1; }) -#define _raw_read_trylock(lock) ({ __LOCK(lock); 1; }) -#define _raw_write_trylock(lock) ({ __LOCK(lock); 1; }) -#define _raw_spin_trylock_bh(lock) ({ __LOCK_BH(lock); 1; }) + +static __always_inline int _raw_spin_trylock(raw_spinlock_t *lock) + __cond_acquires(true, lock) +{ + __LOCK(lock); + return 1; +} + +static __always_inline int _raw_spin_trylock_bh(raw_spinlock_t *lock) + __cond_acquires(true, lock) +{ + __LOCK_BH(lock); + return 1; +} + +static __always_inline int _raw_spin_trylock_irq(raw_spinlock_t *lock) + __cond_acquires(true, lock) +{ + __LOCK_IRQ(lock); + return 1; +} + +static __always_inline int _raw_spin_trylock_irqsave(raw_spinlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + __LOCK_IRQSAVE(lock, *(flags)); + return 1; +} + +static __always_inline int _raw_read_trylock(rwlock_t *lock) + __cond_acquires_shared(true, lock) +{ + __LOCK(lock, shared); + return 1; +} + +static __always_inline int _raw_write_trylock(rwlock_t *lock) + __cond_acquires(true, lock) +{ + __LOCK(lock); + return 1; +} + +static __always_inline int _raw_write_trylock_irqsave(rwlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + __LOCK_IRQSAVE(lock, *(flags)); + return 1; +} + #define _raw_spin_unlock(lock) __UNLOCK(lock) -#define _raw_read_unlock(lock) __UNLOCK(lock) +#define _raw_read_unlock(lock) __UNLOCK(lock, shared) #define _raw_write_unlock(lock) __UNLOCK(lock) #define _raw_spin_unlock_bh(lock) __UNLOCK_BH(lock) #define _raw_write_unlock_bh(lock) __UNLOCK_BH(lock) -#define _raw_read_unlock_bh(lock) __UNLOCK_BH(lock) +#define _raw_read_unlock_bh(lock) __UNLOCK_BH(lock, shared) #define _raw_spin_unlock_irq(lock) __UNLOCK_IRQ(lock) -#define _raw_read_unlock_irq(lock) __UNLOCK_IRQ(lock) +#define _raw_read_unlock_irq(lock) __UNLOCK_IRQ(lock, shared) #define _raw_write_unlock_irq(lock) __UNLOCK_IRQ(lock) #define _raw_spin_unlock_irqrestore(lock, flags) \ __UNLOCK_IRQRESTORE(lock, flags) #define _raw_read_unlock_irqrestore(lock, flags) \ - __UNLOCK_IRQRESTORE(lock, flags) + __UNLOCK_IRQRESTORE(lock, flags, shared) #define _raw_write_unlock_irqrestore(lock, flags) \ __UNLOCK_IRQRESTORE(lock, flags) diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index f6499c37157d..373618a4243c 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -36,10 +36,11 @@ extern void rt_spin_lock_nested(spinlock_t *lock, int subclass) __acquires(lock) extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock); extern void rt_spin_unlock(spinlock_t *lock) __releases(lock); extern void rt_spin_lock_unlock(spinlock_t *lock); -extern int rt_spin_trylock_bh(spinlock_t *lock); -extern int rt_spin_trylock(spinlock_t *lock); +extern int rt_spin_trylock_bh(spinlock_t *lock) __cond_acquires(true, lock); +extern int rt_spin_trylock(spinlock_t *lock) __cond_acquires(true, lock); static __always_inline void spin_lock(spinlock_t *lock) + __acquires(lock) { rt_spin_lock(lock); } @@ -82,6 +83,7 @@ static __always_inline void spin_lock(spinlock_t *lock) __spin_lock_irqsave_nested(lock, flags, subclass) static __always_inline void spin_lock_bh(spinlock_t *lock) + __acquires(lock) { /* Investigate: Drop bh when blocking ? */ local_bh_disable(); @@ -89,6 +91,7 @@ static __always_inline void spin_lock_bh(spinlock_t *lock) } static __always_inline void spin_lock_irq(spinlock_t *lock) + __acquires(lock) { rt_spin_lock(lock); } @@ -101,45 +104,44 @@ static __always_inline void spin_lock_irq(spinlock_t *lock) } while (0) static __always_inline void spin_unlock(spinlock_t *lock) + __releases(lock) { rt_spin_unlock(lock); } static __always_inline void spin_unlock_bh(spinlock_t *lock) + __releases(lock) { rt_spin_unlock(lock); local_bh_enable(); } static __always_inline void spin_unlock_irq(spinlock_t *lock) + __releases(lock) { rt_spin_unlock(lock); } static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) + __releases(lock) { rt_spin_unlock(lock); } -#define spin_trylock(lock) \ - __cond_lock(lock, rt_spin_trylock(lock)) +#define spin_trylock(lock) rt_spin_trylock(lock) -#define spin_trylock_bh(lock) \ - __cond_lock(lock, rt_spin_trylock_bh(lock)) +#define spin_trylock_bh(lock) rt_spin_trylock_bh(lock) -#define spin_trylock_irq(lock) \ - __cond_lock(lock, rt_spin_trylock(lock)) +#define spin_trylock_irq(lock) rt_spin_trylock(lock) -#define spin_trylock_irqsave(lock, flags) \ -({ \ - int __locked; \ - \ - typecheck(unsigned long, flags); \ - flags = 0; \ - __locked = spin_trylock(lock); \ - __locked; \ -}) +static __always_inline bool _spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + *flags = 0; + return rt_spin_trylock(lock); +} +#define spin_trylock_irqsave(lock, flags) _spin_trylock_irqsave(lock, &(flags)) #define spin_is_contended(lock) (((void)(lock), 0)) diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 2dfa35ffec76..b65bb6e4451c 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -14,7 +14,7 @@ #ifndef CONFIG_PREEMPT_RT /* Non PREEMPT_RT kernels map spinlock to raw_spinlock */ -typedef struct spinlock { +context_lock_struct(spinlock) { union { struct raw_spinlock rlock; @@ -26,7 +26,8 @@ typedef struct spinlock { }; #endif }; -} spinlock_t; +}; +typedef struct spinlock spinlock_t; #define ___SPIN_LOCK_INITIALIZER(lockname) \ { \ @@ -47,12 +48,13 @@ typedef struct spinlock { /* PREEMPT_RT kernels map spinlock to rt_mutex */ #include <linux/rtmutex.h> -typedef struct spinlock { +context_lock_struct(spinlock) { struct rt_mutex_base lock; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif -} spinlock_t; +}; +typedef struct spinlock spinlock_t; #define __SPIN_LOCK_UNLOCKED(name) \ { \ diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h index 91cb36b65a17..e5644ab2161f 100644 --- a/include/linux/spinlock_types_raw.h +++ b/include/linux/spinlock_types_raw.h @@ -11,7 +11,7 @@ #include <linux/lockdep_types.h> -typedef struct raw_spinlock { +context_lock_struct(raw_spinlock) { arch_spinlock_t raw_lock; #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; @@ -20,7 +20,8 @@ typedef struct raw_spinlock { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif -} raw_spinlock_t; +}; +typedef struct raw_spinlock raw_spinlock_t; #define SPINLOCK_MAGIC 0xdead4ead diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 344ad51c8f6c..bb44a0bd7696 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -21,7 +21,7 @@ #include <linux/workqueue.h> #include <linux/rcu_segcblist.h> -struct srcu_struct; +context_lock_struct(srcu_struct, __reentrant_ctx_lock); #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -77,7 +77,7 @@ int init_srcu_struct_fast_updown(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_SLOWGP (SRCU_READ_FLAVOR_FAST | SRCU_READ_FLAVOR_FAST_UPDOWN) // Flavors requiring synchronize_rcu() // instead of smp_mb(). -void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases_shared(ssp); #ifdef CONFIG_TINY_SRCU #include <linux/srcutiny.h> @@ -131,14 +131,16 @@ static inline bool same_state_synchronize_srcu(unsigned long oldstate1, unsigned } #ifdef CONFIG_NEED_SRCU_NMI_SAFE -int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); -void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires_shared(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases_shared(ssp); #else static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) + __acquires_shared(ssp) { return __srcu_read_lock(ssp); } static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) + __releases_shared(ssp) { __srcu_read_unlock(ssp, idx); } @@ -210,6 +212,14 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* + * No-op helper to denote that ssp must be held. Because SRCU-protected pointers + * should still be marked with __rcu_guarded, and we do not want to mark them + * with __guarded_by(ssp) as it would complicate annotations for writers, we + * choose the following strategy: srcu_dereference_check() calls this helper + * that checks that the passed ssp is held, and then fake-acquires 'RCU'. + */ +static inline void __srcu_read_lock_must_hold(const struct srcu_struct *ssp) __must_hold_shared(ssp) { } /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing @@ -223,9 +233,15 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * to 1. The @c argument will normally be a logical expression containing * lockdep_is_held() calls. */ -#define srcu_dereference_check(p, ssp, c) \ - __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ - (c) || srcu_read_lock_held(ssp), __rcu) +#define srcu_dereference_check(p, ssp, c) \ +({ \ + __srcu_read_lock_must_hold(ssp); \ + __acquire_shared_ctx_lock(RCU); \ + __auto_type __v = __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || srcu_read_lock_held(ssp), __rcu); \ + __release_shared_ctx_lock(RCU); \ + __v; \ +}) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing @@ -268,7 +284,8 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * invoke srcu_read_unlock() from one task and the matching srcu_read_lock() * from another. */ -static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) +static inline int srcu_read_lock(struct srcu_struct *ssp) + __acquires_shared(ssp) { int retval; @@ -304,7 +321,8 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) * contexts where RCU is watching, that is, from contexts where it would * be legal to invoke rcu_read_lock(). Otherwise, lockdep will complain. */ -static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *ssp) __acquires(ssp) +static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *ssp) __acquires_shared(ssp) + __acquires_shared(ssp) { struct srcu_ctr __percpu *retval; @@ -344,7 +362,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * * complain. */ static inline struct srcu_ctr __percpu *srcu_read_lock_fast_updown(struct srcu_struct *ssp) -__acquires(ssp) + __acquires_shared(ssp) { struct srcu_ctr __percpu *retval; @@ -360,7 +378,7 @@ __acquires(ssp) * See srcu_read_lock_fast() for more information. */ static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_struct *ssp) - __acquires(ssp) + __acquires_shared(ssp) { struct srcu_ctr __percpu *retval; @@ -381,7 +399,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_ * and srcu_read_lock_fast(). However, the same definition/initialization * requirements called out for srcu_read_lock_safe() apply. */ -static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp) +static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires_shared(ssp) { WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_down_read_fast()."); @@ -400,7 +418,8 @@ static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct * * then none of the other flavors may be used, whether before, during, * or after. */ -static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp) +static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) + __acquires_shared(ssp) { int retval; @@ -412,7 +431,8 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int -srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) +srcu_read_lock_notrace(struct srcu_struct *ssp) + __acquires_shared(ssp) { int retval; @@ -443,7 +463,8 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) * which calls to down_read() may be nested. The same srcu_struct may be * used concurrently by srcu_down_read() and srcu_read_lock(). */ -static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) +static inline int srcu_down_read(struct srcu_struct *ssp) + __acquires_shared(ssp) { WARN_ON_ONCE(in_nmi()); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); @@ -458,7 +479,7 @@ static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) * Exit an SRCU read-side critical section. */ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) - __releases(ssp) + __releases_shared(ssp) { WARN_ON_ONCE(idx & ~0x1); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); @@ -474,7 +495,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) * Exit a light-weight SRCU read-side critical section. */ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) - __releases(ssp) + __releases_shared(ssp) { srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); srcu_lock_release(&ssp->dep_map); @@ -490,7 +511,7 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct * Exit an SRCU-fast-updown read-side critical section. */ static inline void -srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) __releases(ssp) +srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) __releases_shared(ssp) { srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST_UPDOWN); srcu_lock_release(&ssp->dep_map); @@ -504,7 +525,7 @@ srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu * * See srcu_read_unlock_fast() for more information. */ static inline void srcu_read_unlock_fast_notrace(struct srcu_struct *ssp, - struct srcu_ctr __percpu *scp) __releases(ssp) + struct srcu_ctr __percpu *scp) __releases_shared(ssp) { srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); __srcu_read_unlock_fast(ssp, scp); @@ -519,7 +540,7 @@ static inline void srcu_read_unlock_fast_notrace(struct srcu_struct *ssp, * the same context as the maching srcu_down_read_fast(). */ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) - __releases(ssp) + __releases_shared(ssp) { WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST_UPDOWN); @@ -535,7 +556,7 @@ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __ * Exit an SRCU read-side critical section, but in an NMI-safe manner. */ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) - __releases(ssp) + __releases_shared(ssp) { WARN_ON_ONCE(idx & ~0x1); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI); @@ -545,7 +566,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void -srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) +srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases_shared(ssp) { srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); __srcu_read_unlock(ssp, idx); @@ -560,7 +581,7 @@ srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) * the same context as the maching srcu_down_read(). */ static inline void srcu_up_read(struct srcu_struct *ssp, int idx) - __releases(ssp) + __releases_shared(ssp) { WARN_ON_ONCE(idx & ~0x1); WARN_ON_ONCE(in_nmi()); @@ -600,15 +621,21 @@ DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct, _T->idx = srcu_read_lock(_T->lock), srcu_read_unlock(_T->lock, _T->idx), int idx) +DECLARE_LOCK_GUARD_1_ATTRS(srcu, __acquires_shared(_T), __releases_shared(*(struct srcu_struct **)_T)) +#define class_srcu_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(srcu, _T) DEFINE_LOCK_GUARD_1(srcu_fast, struct srcu_struct, _T->scp = srcu_read_lock_fast(_T->lock), srcu_read_unlock_fast(_T->lock, _T->scp), struct srcu_ctr __percpu *scp) +DECLARE_LOCK_GUARD_1_ATTRS(srcu_fast, __acquires_shared(_T), __releases_shared(*(struct srcu_struct **)_T)) +#define class_srcu_fast_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(srcu_fast, _T) DEFINE_LOCK_GUARD_1(srcu_fast_notrace, struct srcu_struct, _T->scp = srcu_read_lock_fast_notrace(_T->lock), srcu_read_unlock_fast_notrace(_T->lock, _T->scp), struct srcu_ctr __percpu *scp) +DECLARE_LOCK_GUARD_1_ATTRS(srcu_fast_notrace, __acquires_shared(_T), __releases_shared(*(struct srcu_struct **)_T)) +#define class_srcu_fast_notrace_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(srcu_fast_notrace, _T) #endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index e0698024667a..dec7cbe015aa 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -73,6 +73,7 @@ void synchronize_srcu(struct srcu_struct *ssp); * index that must be passed to the matching srcu_read_unlock(). */ static inline int __srcu_read_lock(struct srcu_struct *ssp) + __acquires_shared(ssp) { int idx; @@ -80,6 +81,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1); preempt_enable(); + __acquire_shared(ssp); return idx; } @@ -96,22 +98,26 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss } static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) + __acquires_shared(ssp) { return __srcu_ctr_to_ptr(ssp, __srcu_read_lock(ssp)); } static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases_shared(ssp) { __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp)); } static inline struct srcu_ctr __percpu *__srcu_read_lock_fast_updown(struct srcu_struct *ssp) + __acquires_shared(ssp) { return __srcu_ctr_to_ptr(ssp, __srcu_read_lock(ssp)); } static inline void __srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases_shared(ssp) { __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp)); } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index d6f978b50472..958cb7ef41cb 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -233,7 +233,7 @@ struct srcu_struct { #define DEFINE_STATIC_SRCU_FAST_UPDOWN(name) \ __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, static) -int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); +int __srcu_read_lock(struct srcu_struct *ssp) __acquires_shared(ssp); void synchronize_srcu_expedited(struct srcu_struct *ssp); void srcu_barrier(struct srcu_struct *ssp); void srcu_expedite_current(struct srcu_struct *ssp); @@ -286,6 +286,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * implementations of this_cpu_inc(). */ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp) + __acquires_shared(ssp) { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); @@ -294,6 +295,7 @@ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct src else atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. barrier(); /* Avoid leaking the critical section. */ + __acquire_shared(ssp); return scp; } @@ -308,7 +310,9 @@ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct src */ static inline void notrace __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases_shared(ssp) { + __release_shared(ssp); barrier(); /* Avoid leaking the critical section. */ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. @@ -326,6 +330,7 @@ __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) */ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast_updown(struct srcu_struct *ssp) + __acquires_shared(ssp) { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); @@ -334,6 +339,7 @@ struct srcu_ctr __percpu notrace *__srcu_read_lock_fast_updown(struct srcu_struc else atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. barrier(); /* Avoid leaking the critical section. */ + __acquire_shared(ssp); return scp; } @@ -348,7 +354,9 @@ struct srcu_ctr __percpu notrace *__srcu_read_lock_fast_updown(struct srcu_struc */ static inline void notrace __srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases_shared(ssp) { + __release_shared(ssp); barrier(); /* Avoid leaking the critical section. */ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cf84d98964b2..02bd6ddb6278 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -59,7 +59,6 @@ struct compat_stat; struct old_timeval32; struct robust_list_head; struct futex_waitv; -struct getcpu_cache; struct old_linux_dirent; struct perf_event_attr; struct file_handle; @@ -718,7 +717,7 @@ asmlinkage long sys_getrusage(int who, struct rusage __user *ru); asmlinkage long sys_umask(int mask); asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); -asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); +asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, void __user *cache); asmlinkage long sys_gettimeofday(struct __kernel_old_timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_settimeofday(struct __kernel_old_timeval __user *tv, @@ -961,6 +960,7 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, int flags, uint32_t sig); +asmlinkage long sys_rseq_slice_yield(void); asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); asmlinkage long sys_open_tree_attr(int dfd, const char __user *path, unsigned flags, diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index b449665c686a..5226efde9ad4 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -8,9 +8,11 @@ */ #include <linux/err.h> +#include <linux/platform_data/simplefb.h> +#include <linux/screen_info.h> #include <linux/types.h> -#include <linux/platform_data/simplefb.h> +#include <video/edid.h> struct device; struct platform_device; @@ -60,6 +62,16 @@ struct efifb_dmi_info { int flags; }; +struct sysfb_display_info { + struct screen_info screen; + +#if defined(CONFIG_FIRMWARE_EDID) + struct edid_info edid; +#endif +}; + +extern struct sysfb_display_info sysfb_primary_display; + #ifdef CONFIG_SYSFB void sysfb_disable(struct device *dev); @@ -82,16 +94,17 @@ static inline bool sysfb_handles_screen_info(void) #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; -void sysfb_apply_efi_quirks(void); -void sysfb_set_efifb_fwnode(struct platform_device *pd); +void sysfb_apply_efi_quirks(struct screen_info *si); +void sysfb_set_efifb_fwnode(const struct screen_info *si, struct platform_device *pd); #else /* CONFIG_EFI */ -static inline void sysfb_apply_efi_quirks(void) +static inline void sysfb_apply_efi_quirks(struct screen_info *si) { } -static inline void sysfb_set_efifb_fwnode(struct platform_device *pd) +static inline void sysfb_set_efifb_fwnode(const struct screen_info *si, + struct platform_device *pd) { } diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index 1f3e5dad6d0d..ee5f0bd41f43 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -76,6 +76,9 @@ struct tee_device { /** * struct tee_driver_ops - driver operations vtable * @get_version: returns version of driver + * @get_tee_revision: returns revision string (diagnostic only); + * do not infer feature support from this, use + * TEE_IOC_VERSION instead * @open: called for a context when the device file is opened * @close_context: called when the device file is closed * @release: called to release the context @@ -95,9 +98,12 @@ struct tee_device { * client closes the device file, even if there are existing references to the * context. The TEE driver can use @close_context to start cleaning up. */ + struct tee_driver_ops { void (*get_version)(struct tee_device *teedev, struct tee_ioctl_version_data *vers); + int (*get_tee_revision)(struct tee_device *teedev, + char *buf, size_t len); int (*open)(struct tee_context *ctx); void (*close_context)(struct tee_context *ctx); void (*release)(struct tee_context *ctx); @@ -123,6 +129,9 @@ struct tee_driver_ops { int (*shm_unregister)(struct tee_context *ctx, struct tee_shm *shm); }; +/* Size for TEE revision string buffer used by get_tee_revision(). */ +#define TEE_REVISION_STR_SIZE 128 + /** * struct tee_desc - Describes the TEE driver to the subsystem * @name: name of driver diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 88a6f9697c89..e561a26f537a 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -315,6 +315,9 @@ struct tee_client_device { * @driver: driver structure */ struct tee_client_driver { + int (*probe)(struct tee_client_device *); + void (*remove)(struct tee_client_device *); + void (*shutdown)(struct tee_client_device *); const struct tee_client_device_id *id_table; struct device_driver driver; }; @@ -322,4 +325,13 @@ struct tee_client_driver { #define to_tee_client_driver(d) \ container_of_const(d, struct tee_client_driver, driver) +#define tee_client_driver_register(drv) \ + __tee_client_driver_register(drv, THIS_MODULE) +int __tee_client_driver_register(struct tee_client_driver *, struct module *); +void tee_client_driver_unregister(struct tee_client_driver *); + +#define module_tee_client_driver(__tee_client_driver) \ + module_driver(__tee_client_driver, tee_client_driver_register, \ + tee_client_driver_unregister) + #endif /*__TEE_DRV_H*/ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index b40de9bab4b7..051e42902690 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -46,15 +46,17 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SYSCALL_AUDIT, SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH, SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP, + SYSCALL_WORK_BIT_SYSCALL_RSEQ_SLICE, }; -#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) -#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) -#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) -#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) -#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) -#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH) -#define SYSCALL_WORK_SYSCALL_EXIT_TRAP BIT(SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP) +#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) +#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) +#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) +#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) +#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) +#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH) +#define SYSCALL_WORK_SYSCALL_EXIT_TRAP BIT(SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP) +#define SYSCALL_WORK_SYSCALL_RSEQ_SLICE BIT(SYSCALL_WORK_BIT_SYSCALL_RSEQ_SLICE) #endif #include <asm/thread_info.h> diff --git a/include/linux/tick.h b/include/linux/tick.h index ac76ae9fa36d..738007d6f577 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -126,6 +126,7 @@ enum tick_dep_bits { #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; +extern bool tick_nohz_is_active(void); extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped_cpu(int cpu); extern void tick_nohz_idle_stop_tick(void); @@ -142,6 +143,7 @@ extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) +static inline bool tick_nohz_is_active(void) { return false; } static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline void tick_nohz_idle_stop_tick(void) { } diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index dce03a5cafb7..7de6b350e559 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -115,6 +115,15 @@ extern void timecounter_init(struct timecounter *tc, */ extern u64 timecounter_read(struct timecounter *tc); +/* + * This is like cyclecounter_cyc2ns(), but it is used for computing a + * time previous to the time stored in the cycle counter. + */ +static inline u64 cc_cyc2ns_backwards(const struct cyclecounter *cc, u64 cycles, u64 frac) +{ + return ((cycles * cc->mult) - frac) >> cc->shift; +} + /** * timecounter_cyc2time - convert a cycle counter to same * time base as values returned by @@ -131,7 +140,25 @@ extern u64 timecounter_read(struct timecounter *tc); * * Returns: cycle counter converted to nanoseconds since the initial time stamp */ -extern u64 timecounter_cyc2time(const struct timecounter *tc, - u64 cycle_tstamp); +static inline u64 timecounter_cyc2time(const struct timecounter *tc, u64 cycle_tstamp) +{ + const struct cyclecounter *cc = tc->cc; + u64 delta = (cycle_tstamp - tc->cycle_last) & cc->mask; + u64 nsec = tc->nsec, frac = tc->frac; + + /* + * Instead of always treating cycle_tstamp as more recent than + * tc->cycle_last, detect when it is too far in the future and + * treat it as old time stamp instead. + */ + if (unlikely(delta > cc->mask / 2)) { + delta = (tc->cycle_last - cycle_tstamp) & cc->mask; + nsec -= cc_cyc2ns_backwards(cc, delta, frac); + } else { + nsec += cyclecounter_cyc2ns(cc, delta, tc->mask, &frac); + } + + return nsec; +} #endif diff --git a/include/linux/tnum.h b/include/linux/tnum.h index c52b862dad45..fa4654ffb621 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -63,6 +63,11 @@ struct tnum tnum_union(struct tnum t1, struct tnum t2); /* Return @a with all but the lowest @size bytes cleared */ struct tnum tnum_cast(struct tnum a, u8 size); +/* Swap the bytes of a tnum */ +struct tnum tnum_bswap16(struct tnum a); +struct tnum tnum_bswap32(struct tnum a); +struct tnum tnum_bswap64(struct tnum a); + /* Returns true if @a is a known constant */ static inline bool tnum_is_const(struct tnum a) { diff --git a/include/linux/tsm.h b/include/linux/tsm.h index a3b7ab668eff..22e05b2aac69 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -123,7 +123,4 @@ int tsm_report_unregister(const struct tsm_report_ops *ops); struct tsm_dev *tsm_register(struct device *parent, struct pci_tsm_ops *ops); void tsm_unregister(struct tsm_dev *tsm_dev); struct tsm_dev *find_tsm_dev(int id); -struct pci_ide; -int tsm_ide_stream_register(struct pci_ide *ide); -void tsm_ide_stream_unregister(struct pci_ide *ide); #endif /* __TSM_H */ diff --git a/include/linux/types.h b/include/linux/types.h index d4437e9c452c..d673747eda8a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -171,6 +171,11 @@ typedef u64 phys_addr_t; typedef u32 phys_addr_t; #endif +struct phys_vec { + phys_addr_t paddr; + size_t len; +}; + typedef phys_addr_t resource_size_t; /* diff --git a/include/linux/uio.h b/include/linux/uio.h index 5b127043a151..a9bc5b3067e3 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -389,6 +389,9 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0); +ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv, + size_t max_size, unsigned short *nr_vecs, + unsigned short max_vecs, iov_iter_extraction_t extraction_flags); /** * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h index 7f7282516bf5..64618618febd 100644 --- a/include/linux/unwind_user.h +++ b/include/linux/unwind_user.h @@ -5,8 +5,22 @@ #include <linux/unwind_user_types.h> #include <asm/unwind_user.h> -#ifndef ARCH_INIT_USER_FP_FRAME - #define ARCH_INIT_USER_FP_FRAME +#ifndef CONFIG_HAVE_UNWIND_USER_FP + +#define ARCH_INIT_USER_FP_FRAME(ws) + +#endif + +#ifndef ARCH_INIT_USER_FP_ENTRY_FRAME +#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) +#endif + +#ifndef unwind_user_at_function_start +static inline bool unwind_user_at_function_start(struct pt_regs *regs) +{ + return false; +} +#define unwind_user_at_function_start unwind_user_at_function_start #endif int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries); diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index ee3d36eda45d..f548fea2adec 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -242,6 +242,7 @@ extern void arch_uprobe_clear_state(struct mm_struct *mm); extern void arch_uprobe_init_state(struct mm_struct *mm); extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr); extern void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr); +extern unsigned long arch_uprobe_get_xol_area(void); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 2eb528058d0d..71564868b8f6 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -119,7 +119,7 @@ * a fuss about it. This makes the programmer responsible for tagging * the functions that can be garbage-collected. * - * With the macro it is possible to write the following: + * With the macro it is possible to write the following:: * * static int foo_suspend(struct device *dev) * { diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 3398a345bda8..1909b945b3ea 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -303,6 +303,7 @@ int calculate_pressure_threshold(struct zone *zone); int calculate_normal_threshold(struct zone *zone); void set_pgdat_percpu_threshold(pg_data_t *pgdat, int (*calculate_pressure)(struct zone *)); +void vmstat_flush_workqueue(void); #else /* CONFIG_SMP */ /* @@ -403,6 +404,7 @@ static inline void __dec_node_page_state(struct page *page, static inline void refresh_zone_stat_thresholds(void) { } static inline void cpu_vm_stats_fold(int cpu) { } static inline void quiet_vmstat(void) { } +static inline void vmstat_flush_workqueue(void) { } static inline void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) { } diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index dabc351cc127..a4749f56398f 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -588,7 +588,7 @@ struct workqueue_attrs *alloc_workqueue_attrs_noprof(void); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); -extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask); +extern int workqueue_unbound_housekeeping_update(const struct cpumask *hk); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 45ff6f7a872b..c47d4b9b88b3 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -44,7 +44,7 @@ struct ww_class { unsigned int is_wait_die; }; -struct ww_mutex { +context_lock_struct(ww_mutex) { struct WW_MUTEX_BASE base; struct ww_acquire_ctx *ctx; #ifdef DEBUG_WW_MUTEXES @@ -52,7 +52,7 @@ struct ww_mutex { #endif }; -struct ww_acquire_ctx { +context_lock_struct(ww_acquire_ctx) { struct task_struct *task; unsigned long stamp; unsigned int acquired; @@ -141,6 +141,7 @@ static inline void ww_mutex_init(struct ww_mutex *lock, */ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, struct ww_class *ww_class) + __acquires(ctx) __no_context_analysis { ctx->task = current; ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp); @@ -179,6 +180,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, * data structures. */ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) + __releases(ctx) __acquires_shared(ctx) __no_context_analysis { #ifdef DEBUG_WW_MUTEXES lockdep_assert_held(ctx); @@ -196,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) * mutexes have been released with ww_mutex_unlock. */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) + __releases_shared(ctx) __no_context_analysis { #ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); @@ -245,7 +248,8 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); +extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + __cond_acquires(0, lock) __must_hold(ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible @@ -278,7 +282,8 @@ extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acq * A mutex acquired with this function must be released with ww_mutex_unlock. */ extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); + struct ww_acquire_ctx *ctx) + __cond_acquires(0, lock) __must_hold(ctx); /** * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex @@ -305,6 +310,7 @@ extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, */ static inline void ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + __acquires(lock) __must_hold(ctx) __no_context_analysis { int ret; #ifdef DEBUG_WW_MUTEXES @@ -342,6 +348,7 @@ ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) static inline int __must_check ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + __cond_acquires(0, lock) __must_hold(ctx) { #ifdef DEBUG_WW_MUTEXES DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); @@ -349,10 +356,11 @@ ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, return ww_mutex_lock_interruptible(lock, ctx); } -extern void ww_mutex_unlock(struct ww_mutex *lock); +extern void ww_mutex_unlock(struct ww_mutex *lock) __releases(lock); extern int __must_check ww_mutex_trylock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); + struct ww_acquire_ctx *ctx) + __cond_acquires(true, lock) __must_hold(ctx); /*** * ww_mutex_destroy - mark a w/w mutex unusable @@ -363,6 +371,7 @@ extern int __must_check ww_mutex_trylock(struct ww_mutex *lock, * this function is called. */ static inline void ww_mutex_destroy(struct ww_mutex *lock) + __must_not_hold(lock) { #ifndef CONFIG_PREEMPT_RT mutex_destroy(&lock->base); diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 64e9afe7d647..296b5ee5c979 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -114,7 +114,7 @@ struct simple_xattr { struct rb_node rb_node; char *name; size_t size; - char value[]; + char value[] __counted_by(size); }; void simple_xattrs_init(struct simple_xattrs *xattrs); |
