diff options
Diffstat (limited to 'include/linux')
150 files changed, 2387 insertions, 1440 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1c5bb1e887cd..5ff5d99f6ead 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1595,18 +1595,6 @@ static inline void acpi_use_parent_companion(struct device *dev) ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } -#ifdef CONFIG_ACPI_HMAT -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access); -#else -static inline int hmat_update_target_coordinates(int nid, - struct access_coordinate *coord, - enum access_coordinate_class access) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ACPI_NUMA bool acpi_node_backed_by_real_pxm(int nid); #else diff --git a/include/linux/acpi_rimt.h b/include/linux/acpi_rimt.h new file mode 100644 index 000000000000..fad3adc4d899 --- /dev/null +++ b/include/linux/acpi_rimt.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024-2025, Ventana Micro Systems Inc. + * Author: Sunil V L <sunilvl@ventanamicro.com> + */ + +#ifndef _ACPI_RIMT_H +#define _ACPI_RIMT_H + +#ifdef CONFIG_ACPI_RIMT +int rimt_iommu_register(struct device *dev); +#else +static inline int rimt_iommu_register(struct device *dev) +{ + return -ENODEV; +} +#endif + +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_ACPI_RIMT) +int rimt_iommu_configure_id(struct device *dev, const u32 *id_in); +#else +static inline int rimt_iommu_configure_id(struct device *dev, const u32 *id_in) +{ + return -ENODEV; +} +#endif + +#endif /* _ACPI_RIMT_H */ diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 9ef2633e2c08..d40ac39bfbe8 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -221,6 +221,16 @@ static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) ref->ct = NULL; } +static inline void alloc_tag_set_inaccurate(struct alloc_tag *tag) +{ + tag->ct.flags |= CODETAG_FLAG_INACCURATE; +} + +static inline bool alloc_tag_is_inaccurate(struct alloc_tag *tag) +{ + return !!(tag->ct.flags & CODETAG_FLAG_INACCURATE); +} + #define alloc_tag_record(p) ((p) = current->alloc_tag) #else /* CONFIG_MEM_ALLOC_PROFILING */ @@ -230,6 +240,8 @@ static inline bool mem_alloc_profiling_enabled(void) { return false; } static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes) {} static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {} +static inline void alloc_tag_set_inaccurate(struct alloc_tag *tag) {} +static inline bool alloc_tag_is_inaccurate(struct alloc_tag *tag) { return false; } #define alloc_tag_record(p) do {} while (0) #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index e1634897e159..cd7ee4df9045 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -128,6 +128,7 @@ #define FFA_FEAT_RXTX_MIN_SZ_4K 0 #define FFA_FEAT_RXTX_MIN_SZ_64K 1 #define FFA_FEAT_RXTX_MIN_SZ_16K 2 +#define FFA_FEAT_RXTX_MIN_SZ_MASK GENMASK(1, 0) /* FFA Bus/Device/Driver related */ struct ffa_device { diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e721148c95d0..3e64f14739dd 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -66,16 +66,6 @@ static inline void wb_stat_mod(struct bdi_writeback *wb, percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); } -static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) -{ - wb_stat_mod(wb, item, 1); -} - -static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) -{ - wb_stat_mod(wb, item, -1); -} - static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { return percpu_counter_read_positive(&wb->stat[item]); @@ -118,12 +108,10 @@ int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit * * BDI_CAP_WRITEBACK: Supports dirty page writeback, and dirty pages * should contribute to accounting - * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold */ #define BDI_CAP_WRITEBACK (1 << 0) -#define BDI_CAP_WRITEBACK_ACCT (1 << 1) -#define BDI_CAP_STRICTLIMIT (1 << 2) +#define BDI_CAP_STRICTLIMIT (1 << 1) extern struct backing_dev_info noop_backing_dev_info; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 066e5309bd45..dad5cb5b3812 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -199,7 +199,7 @@ struct gendisk { unsigned int zone_wplugs_hash_bits; atomic_t nr_zone_wplugs; spinlock_t zone_wplugs_lock; - struct mempool_s *zone_wplugs_pool; + struct mempool *zone_wplugs_pool; struct hlist_head *zone_wplugs_hash; struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h index 1af241525a17..f6e0795db484 100644 --- a/include/linux/bpfptr.h +++ b/include/linux/bpfptr.h @@ -67,7 +67,7 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset, static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) { - void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN); + void *p = kvmalloc_node_align_noprof(len, 1, GFP_USER | __GFP_NOWARN, NUMA_NO_NODE); if (!p) return ERR_PTR(-ENOMEM); diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 0a80e1f9aa20..3fc0efa0825b 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -22,11 +22,8 @@ struct page; * @bv_len: Number of bytes in the address range. * @bv_offset: Start of the address range relative to the start of @bv_page. * - * The following holds for a bvec if n * PAGE_SIZE < bv_offset + bv_len: - * - * nth_page(@bv_page, n) == @bv_page + n - * - * This holds because page_is_mergeable() checks the above property. + * All pages within a bio_vec starting from @bv_page are contiguous and + * can simply be iterated (see bvec_advance()). */ struct bio_vec { struct page *bv_page; diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index c9a4c96c9943..b3705e8bbe2b 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -173,6 +173,22 @@ static inline void cpu_to_le32_array(u32 *buf, unsigned int words) } } +static inline void memcpy_from_le32(u32 *dst, const __le32 *src, size_t words) +{ + size_t i; + + for (i = 0; i < words; i++) + dst[i] = le32_to_cpu(src[i]); +} + +static inline void memcpy_to_le32(__le32 *dst, const u32 *src, size_t words) +{ + size_t i; + + for (i = 0; i < words; i++) + dst[i] = cpu_to_le32(src[i]); +} + static inline void be16_add_cpu(__be16 *var, u16 val) { *var = cpu_to_be16(be16_to_cpu(*var) + val); diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 7af499bdbecb..d60ce9708ea2 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -47,8 +47,6 @@ #define AT91_PMC_PCSR 0x18 /* Peripheral Clock Status Register */ #define AT91_PMC_PLL_ACR 0x18 /* PLL Analog Control Register [for SAM9X60] */ -#define AT91_PMC_PLL_ACR_DEFAULT_UPLL UL(0x12020010) /* Default PLL ACR value for UPLL */ -#define AT91_PMC_PLL_ACR_DEFAULT_PLLA UL(0x00020010) /* Default PLL ACR value for PLLA */ #define AT91_PMC_PLL_ACR_UTMIVR (1 << 12) /* UPLL Voltage regulator Control */ #define AT91_PMC_PLL_ACR_UTMIBG (1 << 13) /* UPLL Bandgap Control */ diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index e656f63efdce..54a3fa370004 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -34,14 +34,14 @@ struct clk_omap_reg { * @clk_ref: struct clk_hw pointer to the clock's reference clock input * @control_reg: register containing the DPLL mode bitfield * @enable_mask: mask of the DPLL mode bitfield in @control_reg - * @last_rounded_rate: cache of the last rate result of omap2_dpll_round_rate() - * @last_rounded_m: cache of the last M result of omap2_dpll_round_rate() + * @last_rounded_rate: cache of the last rate result of omap2_dpll_determine_rate() + * @last_rounded_m: cache of the last M result of omap2_dpll_determine_rate() * @last_rounded_m4xen: cache of the last M4X result of - * omap4_dpll_regm4xen_round_rate() + * omap4_dpll_regm4xen_determine_rate() * @last_rounded_lpmode: cache of the last lpmode result of * omap4_dpll_lpmode_recalc() * @max_multiplier: maximum valid non-bypass multiplier value (actual) - * @last_rounded_n: cache of the last N result of omap2_dpll_round_rate() + * @last_rounded_n: cache of the last N result of omap2_dpll_determine_rate() * @min_divider: minimum valid non-bypass divider value (actual) * @max_divider: maximum valid non-bypass divider value (actual) * @max_rate: maximum clock rate for the DPLL diff --git a/include/linux/codetag.h b/include/linux/codetag.h index 457ed8fd3214..8ea2a5f7c98a 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -16,13 +16,16 @@ struct module; #define CODETAG_SECTION_START_PREFIX "__start_" #define CODETAG_SECTION_STOP_PREFIX "__stop_" +/* codetag flags */ +#define CODETAG_FLAG_INACCURATE (1 << 0) + /* * An instance of this structure is created in a special ELF section at every * code location being tagged. At runtime, the special section is treated as * an array of these. */ struct codetag { - unsigned int flags; /* used in later patches */ + unsigned int flags; unsigned int lineno; const char *modname; const char *function; diff --git a/include/linux/console.h b/include/linux/console.h index 8f10d0a85bb4..031a58dc2b91 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -666,6 +666,8 @@ void vcs_remove_sysfs(int index); */ extern atomic_t ignore_console_lock_warning; +DEFINE_LOCK_GUARD_0(console_lock, console_lock(), console_unlock()); + extern void console_init(void); /* For deferred console takeover */ diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 59b4fec5f254..13b35637bd5a 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -159,6 +159,9 @@ struct vc_data { struct uni_pagedict *uni_pagedict; struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ u32 **vc_uni_lines; /* unicode screen content */ + u16 *vc_saved_screen; + unsigned int vc_saved_cols; + unsigned int vc_saved_rows; /* additional information is in vt_kern.h */ }; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 4ac65c68bbf4..6de59ce8ef8c 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -474,35 +474,6 @@ static inline bool is_coresight_device(void __iomem *base) return cid == CORESIGHT_CID; } -/* - * Attempt to find and enable "APB clock" for the given device - * - * Returns: - * - * clk - Clock is found and enabled - * NULL - clock is not found - * ERROR - Clock is found but failed to enable - */ -static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) -{ - struct clk *pclk; - int ret; - - pclk = clk_get(dev, "apb_pclk"); - if (IS_ERR(pclk)) { - pclk = clk_get(dev, "apb"); - if (IS_ERR(pclk)) - return NULL; - } - - ret = clk_prepare_enable(pclk); - if (ret) { - clk_put(pclk); - return ERR_PTR(ret); - } - return pclk; -} - #define CORESIGHT_PIDRn(i) (0xFE0 + ((i) * 4)) static inline u32 coresight_get_pid(struct csdev_access *csa) @@ -733,4 +704,6 @@ void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode, struct coresight_device *sink); +int coresight_get_enable_clocks(struct device *dev, struct clk **pclk, + struct clk **atclk); #endif /* _LINUX_COREISGHT_H */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 40966512ea18..0465d1e6f72a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -26,12 +26,10 @@ *********************************************************************/ /* * Frequency values here are CPU kHz - * - * Maximum transition latency is in nanoseconds - if it's unknown, - * CPUFREQ_ETERNAL shall be used. */ -#define CPUFREQ_ETERNAL (-1) +#define CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS NSEC_PER_MSEC + #define CPUFREQ_NAME_LEN 16 /* Print length for names. Extra 1 space for accommodating '\n' in prints */ #define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1) diff --git a/include/linux/damon.h b/include/linux/damon.h index 9e62b2a85538..cae8c613c5fc 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -110,7 +110,7 @@ struct damon_target { * * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. - * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. + * @DAMOS_PAGEOUT: Reclaim the region. * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists. @@ -121,10 +121,10 @@ struct damon_target { * @NR_DAMOS_ACTIONS: Total number of DAMOS actions * * The support of each action is up to running &struct damon_operations. - * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR supports all actions except - * &enum DAMOS_LRU_PRIO and &enum DAMOS_LRU_DEPRIO. &enum DAMON_OPS_PADDR - * supports only &enum DAMOS_PAGEOUT, &enum DAMOS_LRU_PRIO, &enum - * DAMOS_LRU_DEPRIO, and &DAMOS_STAT. + * Refer to 'Operation Action' section of Documentation/mm/damon/design.rst for + * status of the supports. + * + * Note that DAMOS_PAGEOUT doesn't trigger demotions. */ enum damos_action { DAMOS_WILLNEED, @@ -748,7 +748,8 @@ struct damon_attrs { * Accesses to other fields must be protected by themselves. * * @ops: Set of monitoring operations for given use cases. - * + * @addr_unit: Scale factor for core to ops address conversion. + * @min_sz_region: Minimum region size. * @adaptive_targets: Head of monitoring targets (&damon_target) list. * @schemes: Head of schemes (&damos) list. */ @@ -790,6 +791,8 @@ struct damon_ctx { struct mutex kdamond_lock; struct damon_operations ops; + unsigned long addr_unit; + unsigned long min_sz_region; struct list_head adaptive_targets; struct list_head schemes; @@ -878,7 +881,7 @@ static inline void damon_insert_region(struct damon_region *r, void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, - unsigned int nr_ranges); + unsigned int nr_ranges, unsigned long min_sz_region); void damon_update_region_access_rate(struct damon_region *r, bool accessed, struct damon_attrs *attrs); @@ -935,6 +938,7 @@ static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs } +bool damon_initialized(void); int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); bool damon_is_running(struct damon_ctx *ctx); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index cc3e1c1a3454..c83e02b94389 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -95,7 +95,10 @@ struct dentry { seqcount_spinlock_t d_seq; /* per dentry seqlock */ struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ - struct qstr d_name; + union { + struct qstr __d_name; /* for use ONLY in fs/dcache.c */ + const struct qstr d_name; + }; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ union shortname_store d_shortname; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index f3bc0bcd7098..c249912456f9 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -149,7 +149,5 @@ void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir); int dma_direct_supported(struct device *dev, u64 mask); -dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir, unsigned long attrs); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 332b80c42b6f..10882d00cb17 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -395,15 +395,15 @@ void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); #ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT -bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr); -bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle); +bool arch_dma_map_phys_direct(struct device *dev, phys_addr_t addr); +bool arch_dma_unmap_phys_direct(struct device *dev, dma_addr_t dma_handle); bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, int nents); bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, int nents); #else -#define arch_dma_map_page_direct(d, a) (false) -#define arch_dma_unmap_page_direct(d, a) (false) +#define arch_dma_map_phys_direct(d, a) (false) +#define arch_dma_unmap_phys_direct(d, a) (false) #define arch_dma_map_sg_direct(d, s, n) (false) #define arch_dma_unmap_sg_direct(d, s, n) (false) #endif diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 55c03e5fe8cb..8248ff9363ee 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -59,6 +59,26 @@ #define DMA_ATTR_PRIVILEGED (1UL << 9) /* + * DMA_ATTR_MMIO - Indicates memory-mapped I/O (MMIO) region for DMA mapping + * + * This attribute indicates the physical address is not normal system + * memory. It may not be used with kmap*()/phys_to_virt()/phys_to_page() + * functions, it may not be cacheable, and access using CPU load/store + * instructions may not be allowed. + * + * Usually this will be used to describe MMIO addresses, or other non-cacheable + * register addresses. When DMA mapping this sort of address we call + * the operation Peer to Peer as a one device is DMA'ing to another device. + * For PCI devices the p2pdma APIs must be used to determine if DMA_ATTR_MMIO + * is appropriate. + * + * For architectures that require cache flushing for DMA coherence + * DMA_ATTR_MMIO will not perform any cache flushing. The address + * provided must never be mapped cacheable into the CPU. + */ +#define DMA_ATTR_MMIO (1UL << 10) + +/* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a * given device and there may be a translation between the CPU physical address @@ -118,6 +138,10 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, unsigned long attrs); void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); +dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs); unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, @@ -172,6 +196,15 @@ static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { } +static inline dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void dma_unmap_phys(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ +} static inline unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6de7c05d6bd8..99efe2b9b4ea 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -594,9 +594,9 @@ struct dma_descriptor_metadata_ops { * @phys: physical address of the descriptor * @chan: target channel for this operation * @tx_submit: accept the descriptor, assign ordered cookie and mark the + * descriptor pending. To be pushed on .issue_pending() call * @desc_free: driver's callback function to free a resusable descriptor * after completion - * descriptor pending. To be pushed on .issue_pending() call * @callback: routine to call after this operation is complete * @callback_result: error result from a DMA transaction * @callback_param: general parameter to pass to the callback routine diff --git a/include/linux/entry-kvm.h b/include/linux/entry-virt.h index 16149f6625e4..42c89e3e5ca7 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-virt.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_ENTRYKVM_H -#define __LINUX_ENTRYKVM_H +#ifndef __LINUX_ENTRYVIRT_H +#define __LINUX_ENTRYVIRT_H #include <linux/static_call_types.h> #include <linux/resume_user_mode.h> @@ -10,7 +10,7 @@ #include <linux/tick.h> /* Transfer to guest mode work */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK #ifndef ARCH_XFER_TO_GUEST_MODE_WORK # define ARCH_XFER_TO_GUEST_MODE_WORK (0) @@ -21,8 +21,6 @@ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ ARCH_XFER_TO_GUEST_MODE_WORK) -struct kvm_vcpu; - /** * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest * mode work handling function. @@ -32,12 +30,10 @@ struct kvm_vcpu; * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be * replaced by architecture specific code. */ -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work); +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); #ifndef arch_xfer_to_guest_mode_work -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work) +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) { return 0; } @@ -46,11 +42,10 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, /** * xfer_to_guest_mode_handle_work - Check and handle pending work which needs * to be handled before going to guest mode - * @vcpu: Pointer to current's VCPU data * * Returns: 0 or an error code */ -int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); +int xfer_to_guest_mode_handle_work(void); /** * xfer_to_guest_mode_prepare - Perform last minute preparation work that @@ -95,6 +90,6 @@ static inline bool xfer_to_guest_mode_work_pending(void) lockdep_assert_irqs_disabled(); return __xfer_to_guest_mode_work_pending(); } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ #endif diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3aac58a520c7..d0cf10d5e0f7 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -276,7 +276,7 @@ struct export_operations { int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); - struct file * (*open)(struct path *path, unsigned int oflags); + struct file * (*open)(const struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 2f8b8bfc0e73..6afb4a13b81d 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -79,6 +79,7 @@ enum stop_cp_reason { STOP_CP_REASON_FLUSH_FAIL, STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_CORRUPTED_FREE_BITMAP, + STOP_CP_REASON_CORRUPTED_NID, STOP_CP_REASON_MAX, }; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index b303472255be..32884c9721e5 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -47,6 +47,7 @@ extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); extern void thaw_kernel_threads(void); +extern void thaw_process(struct task_struct *p); static inline bool try_to_freeze(void) { @@ -80,6 +81,7 @@ static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} static inline void thaw_kernel_threads(void) {} +static inline void thaw_process(struct task_struct *p) {} static inline bool try_to_freeze(void) { return false; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 9e9d7c757efe..c895146c1444 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -236,6 +236,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_ATIME_SET (1 << 7) #define ATTR_MTIME_SET (1 << 8) #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ +#define ATTR_CTIME_SET (1 << 10) #define ATTR_KILL_SUID (1 << 11) #define ATTR_KILL_SGID (1 << 12) #define ATTR_FILE (1 << 13) @@ -537,7 +538,7 @@ struct address_space { /* * Returns true if any of the pages in the mapping are marked with the tag. */ -static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag) +static inline bool mapping_tagged(const struct address_space *mapping, xa_mark_t tag) { return xa_marked(&mapping->i_pages, tag); } @@ -585,7 +586,7 @@ static inline void i_mmap_assert_write_locked(struct address_space *mapping) /* * Might pages of this file be mapped into userspace? */ -static inline int mapping_mapped(struct address_space *mapping) +static inline int mapping_mapped(const struct address_space *mapping) { return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } @@ -599,7 +600,7 @@ static inline int mapping_mapped(struct address_space *mapping) * If i_mmap_writable is negative, no new writable mappings are allowed. You * can only deny writable mappings, if none exists right now. */ -static inline int mapping_writably_mapped(struct address_space *mapping) +static inline int mapping_writably_mapped(const struct address_space *mapping) { return atomic_read(&mapping->i_mmap_writable) > 0; } @@ -1192,6 +1193,8 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_cred: stashed credentials of creator/opener * @f_owner: file owner * @f_path: path of the file + * @__f_path: writable alias for @f_path; *ONLY* for core VFS and only before + * the file gets open * @f_pos_lock: lock protecting file position * @f_pipe: specific to pipes * @f_pos: file position @@ -1217,7 +1220,10 @@ struct file { const struct cred *f_cred; struct fown_struct *f_owner; /* --- cacheline 1 boundary (64 bytes) --- */ - struct path f_path; + union { + const struct path f_path; + struct path __f_path; + }; union { /* regular files (with FMODE_ATOMIC_POS) and directories */ struct mutex f_pos_lock; @@ -1434,6 +1440,8 @@ struct sb_writers { struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; +struct mount; + struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ @@ -1468,7 +1476,7 @@ struct super_block { __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ - struct list_head s_mounts; /* list of mounts; _not_ for fs use */ + struct mount *s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ struct file *s_bdev_file; struct backing_dev_info *s_bdi; @@ -2385,6 +2393,8 @@ static inline bool can_mmap_file(struct file *file) return true; } +int __compat_vma_mmap_prepare(const struct file_operations *f_op, + struct file *file, struct vm_area_struct *vma); int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) @@ -2873,7 +2883,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -struct path *backing_file_user_path(const struct file *f); +const struct path *backing_file_user_path(const struct file *f); /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file @@ -3712,7 +3722,8 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, * happens when a directory is casefolded and the filesystem is strict * about its encoding. */ -static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) +static inline bool generic_ci_validate_strict_name(struct inode *dir, + const struct qstr *name) { if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb)) return true; @@ -3727,18 +3738,42 @@ static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qst return !utf8_validate(dir->i_sb->s_encoding, name); } #else -static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) +static inline bool generic_ci_validate_strict_name(struct inode *dir, + const struct qstr *name) { return true; } #endif +static inline struct unicode_map *sb_encoding(const struct super_block *sb) +{ +#if IS_ENABLED(CONFIG_UNICODE) + return sb->s_encoding; +#else + return NULL; +#endif +} + static inline bool sb_has_encoding(const struct super_block *sb) { + return !!sb_encoding(sb); +} + +/* + * Compare if two super blocks have the same encoding and flags + */ +static inline bool sb_same_encoding(const struct super_block *sb1, + const struct super_block *sb2) +{ #if IS_ENABLED(CONFIG_UNICODE) - return !!sb->s_encoding; + if (sb1->s_encoding == sb2->s_encoding) + return true; + + return (sb1->s_encoding && sb2->s_encoding && + (sb1->s_encoding->version == sb2->s_encoding->version) && + (sb1->s_encoding_flags == sb2->s_encoding_flags)); #else - return false; + return true; #endif } diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 671f031be173..0d6c8a6d7be2 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -134,8 +134,13 @@ extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_ty extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); -extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, - const char *value, size_t v_size); +extern int vfs_parse_fs_qstr(struct fs_context *fc, const char *key, + const struct qstr *value); +static inline int vfs_parse_fs_string(struct fs_context *fc, const char *key, + const char *value) +{ + return vfs_parse_fs_qstr(fc, key, value ? &QSTR(value) : NULL); +} int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d4034ddaf392..0d954ea7b179 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -273,6 +273,8 @@ struct fsnotify_group { int f_flags; /* event_f_flags from fanotify_init() */ struct ucounts *ucounts; mempool_t error_events_pool; + /* chained on perm_group_list */ + struct list_head perm_grp_list; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 5ebf26fcdcfa..0ceb4e09306c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -354,7 +354,7 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp, } #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) -struct page *alloc_pages_nolock_noprof(int nid, unsigned int order); +struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order); #define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__)) extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); diff --git a/include/linux/hid.h b/include/linux/hid.h index c32425b5d011..e1b673ad7457 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -156,6 +156,7 @@ struct hid_item { #define HID_UP_TELEPHONY 0x000b0000 #define HID_UP_CONSUMER 0x000c0000 #define HID_UP_DIGITIZER 0x000d0000 +#define HID_UP_HAPTIC 0x000e0000 #define HID_UP_PID 0x000f0000 #define HID_UP_BATTERY 0x00850000 #define HID_UP_CAMERA 0x00900000 @@ -316,6 +317,28 @@ struct hid_item { #define HID_DG_TOOLSERIALNUMBER 0x000d005b #define HID_DG_LATENCYMODE 0x000d0060 +#define HID_HP_SIMPLECONTROLLER 0x000e0001 +#define HID_HP_WAVEFORMLIST 0x000e0010 +#define HID_HP_DURATIONLIST 0x000e0011 +#define HID_HP_AUTOTRIGGER 0x000e0020 +#define HID_HP_MANUALTRIGGER 0x000e0021 +#define HID_HP_AUTOTRIGGERASSOCIATEDCONTROL 0x000e0022 +#define HID_HP_INTENSITY 0x000e0023 +#define HID_HP_REPEATCOUNT 0x000e0024 +#define HID_HP_RETRIGGERPERIOD 0x000e0025 +#define HID_HP_WAVEFORMVENDORPAGE 0x000e0026 +#define HID_HP_WAVEFORMVENDORID 0x000e0027 +#define HID_HP_WAVEFORMCUTOFFTIME 0x000e0028 +#define HID_HP_WAVEFORMNONE 0x000e1001 +#define HID_HP_WAVEFORMSTOP 0x000e1002 +#define HID_HP_WAVEFORMCLICK 0x000e1003 +#define HID_HP_WAVEFORMBUZZCONTINUOUS 0x000e1004 +#define HID_HP_WAVEFORMRUMBLECONTINUOUS 0x000e1005 +#define HID_HP_WAVEFORMPRESS 0x000e1006 +#define HID_HP_WAVEFORMRELEASE 0x000e1007 +#define HID_HP_VENDORWAVEFORMMIN 0x000e2001 +#define HID_HP_VENDORWAVEFORMMAX 0x000e2fff + #define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065 #define HID_BAT_CHARGING 0x00850044 @@ -426,6 +449,12 @@ struct hid_item { #define HID_BOOT_PROTOCOL 0 /* + * HID units + */ +#define HID_UNIT_GRAM 0x0101 +#define HID_UNIT_NEWTON 0xe111 + +/* * This is the global environment of the parser. This information is * persistent for main-items. The global environment can be saved and * restored with PUSH/POP statements. @@ -818,7 +847,7 @@ struct hid_usage_id { * zero from them. */ struct hid_driver { - char *name; + const char *name; const struct hid_device_id *id_table; struct list_head dyn_list; diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 36053c3d6d64..0574c21ca45d 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -7,7 +7,7 @@ */ #ifdef CONFIG_KMAP_LOCAL void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); -void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void *__kmap_local_page_prot(const struct page *page, pgprot_t prot); void kunmap_local_indexed(const void *vaddr); void kmap_local_fork(struct task_struct *tsk); void __kmap_local_sched_out(void); @@ -33,7 +33,7 @@ static inline void kmap_flush_tlb(unsigned long addr) { } #endif void *kmap_high(struct page *page); -void kunmap_high(struct page *page); +void kunmap_high(const struct page *page); void __kmap_flush_unused(void); struct page *__kmap_to_page(void *addr); @@ -50,7 +50,7 @@ static inline void *kmap(struct page *page) return addr; } -static inline void kunmap(struct page *page) +static inline void kunmap(const struct page *page) { might_sleep(); if (!PageHighMem(page)) @@ -68,12 +68,12 @@ static inline void kmap_flush_unused(void) __kmap_flush_unused(); } -static inline void *kmap_local_page(struct page *page) +static inline void *kmap_local_page(const struct page *page) { return __kmap_local_page_prot(page, kmap_prot); } -static inline void *kmap_local_page_try_from_panic(struct page *page) +static inline void *kmap_local_page_try_from_panic(const struct page *page) { if (!PageHighMem(page)) return page_address(page); @@ -81,13 +81,13 @@ static inline void *kmap_local_page_try_from_panic(struct page *page) return NULL; } -static inline void *kmap_local_folio(struct folio *folio, size_t offset) +static inline void *kmap_local_folio(const struct folio *folio, size_t offset) { - struct page *page = folio_page(folio, offset / PAGE_SIZE); + const struct page *page = folio_page(folio, offset / PAGE_SIZE); return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE; } -static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +static inline void *kmap_local_page_prot(const struct page *page, pgprot_t prot) { return __kmap_local_page_prot(page, prot); } @@ -102,7 +102,7 @@ static inline void __kunmap_local(const void *vaddr) kunmap_local_indexed(vaddr); } -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void *kmap_atomic_prot(const struct page *page, pgprot_t prot) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) migrate_disable(); @@ -113,7 +113,7 @@ static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) return __kmap_local_page_prot(page, prot); } -static inline void *kmap_atomic(struct page *page) +static inline void *kmap_atomic(const struct page *page) { return kmap_atomic_prot(page, kmap_prot); } @@ -173,32 +173,32 @@ static inline void *kmap(struct page *page) return page_address(page); } -static inline void kunmap_high(struct page *page) { } +static inline void kunmap_high(const struct page *page) { } static inline void kmap_flush_unused(void) { } -static inline void kunmap(struct page *page) +static inline void kunmap(const struct page *page) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(page_address(page)); #endif } -static inline void *kmap_local_page(struct page *page) +static inline void *kmap_local_page(const struct page *page) { return page_address(page); } -static inline void *kmap_local_page_try_from_panic(struct page *page) +static inline void *kmap_local_page_try_from_panic(const struct page *page) { return page_address(page); } -static inline void *kmap_local_folio(struct folio *folio, size_t offset) +static inline void *kmap_local_folio(const struct folio *folio, size_t offset) { return folio_address(folio) + offset; } -static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +static inline void *kmap_local_page_prot(const struct page *page, pgprot_t prot) { return kmap_local_page(page); } @@ -215,7 +215,7 @@ static inline void __kunmap_local(const void *addr) #endif } -static inline void *kmap_atomic(struct page *page) +static inline void *kmap_atomic(const struct page *page) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) migrate_disable(); @@ -225,7 +225,7 @@ static inline void *kmap_atomic(struct page *page) return page_address(page); } -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void *kmap_atomic_prot(const struct page *page, pgprot_t prot) { return kmap_atomic(page); } diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 6234f316468c..105cc4c00cc3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -43,7 +43,7 @@ static inline void *kmap(struct page *page); * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of * pages in the low memory area. */ -static inline void kunmap(struct page *page); +static inline void kunmap(const struct page *page); /** * kmap_to_page - Get the page for a kmap'ed address @@ -93,7 +93,7 @@ static inline void kmap_flush_unused(void); * disabling migration in order to keep the virtual address stable across * preemption. No caller of kmap_local_page() can rely on this side effect. */ -static inline void *kmap_local_page(struct page *page); +static inline void *kmap_local_page(const struct page *page); /** * kmap_local_folio - Map a page in this folio for temporary usage @@ -129,7 +129,7 @@ static inline void *kmap_local_page(struct page *page); * Context: Can be invoked from any context. * Return: The virtual address of @offset. */ -static inline void *kmap_local_folio(struct folio *folio, size_t offset); +static inline void *kmap_local_folio(const struct folio *folio, size_t offset); /** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! @@ -176,7 +176,7 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset); * kunmap_atomic(vaddr2); * kunmap_atomic(vaddr1); */ -static inline void *kmap_atomic(struct page *page); +static inline void *kmap_atomic(const struct page *page); /* Highmem related interfaces for management code */ static inline unsigned long nr_free_highpages(void); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 0c4c84b8c3be..c4690e365ade 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -104,6 +104,8 @@ #define UACCE_MODE_SVA 1 /* use uacce sva mode */ #define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce" +#define QM_ECC_MBIT BIT(2) + enum qm_stop_reason { QM_NORMAL, QM_SOFT_RESET, @@ -125,6 +127,7 @@ enum qm_hw_ver { QM_HW_V2 = 0x21, QM_HW_V3 = 0x30, QM_HW_V4 = 0x50, + QM_HW_V5 = 0x51, }; enum qm_fun_type { @@ -239,19 +242,22 @@ enum acc_err_result { ACC_ERR_RECOVERED, }; -struct hisi_qm_err_info { - char *acpi_rst; - u32 msi_wr_port; +struct hisi_qm_err_mask { u32 ecc_2bits_mask; - u32 qm_shutdown_mask; - u32 dev_shutdown_mask; - u32 qm_reset_mask; - u32 dev_reset_mask; + u32 shutdown_mask; + u32 reset_mask; u32 ce; u32 nfe; u32 fe; }; +struct hisi_qm_err_info { + char *acpi_rst; + u32 msi_wr_port; + struct hisi_qm_err_mask qm_err; + struct hisi_qm_err_mask dev_err; +}; + struct hisi_qm_err_status { u32 is_qm_ecc_mbit; u32 is_dev_ecc_mbit; @@ -272,6 +278,8 @@ struct hisi_qm_err_ini { enum acc_err_result (*get_err_result)(struct hisi_qm *qm); bool (*dev_is_abnormal)(struct hisi_qm *qm); int (*set_priv_status)(struct hisi_qm *qm); + void (*disable_axi_error)(struct hisi_qm *qm); + void (*enable_axi_error)(struct hisi_qm *qm); }; struct hisi_qm_cap_info { diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7748489fde1b..f327d62fc985 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -94,12 +94,15 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define THP_ORDERS_ALL \ (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT) -#define TVA_SMAPS (1 << 0) /* Will be used for procfs */ -#define TVA_IN_PF (1 << 1) /* Page fault handler */ -#define TVA_ENFORCE_SYSFS (1 << 2) /* Obey sysfs configuration */ +enum tva_type { + TVA_SMAPS, /* Exposing "THPeligible:" in smaps. */ + TVA_PAGEFAULT, /* Serving a page fault. */ + TVA_KHUGEPAGED, /* Khugepaged collapse. */ + TVA_FORCED_COLLAPSE, /* Forced collapse (e.g. MADV_COLLAPSE). */ +}; -#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \ - (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order))) +#define thp_vma_allowable_order(vma, vm_flags, type, order) \ + (!!thp_vma_allowable_orders(vma, vm_flags, type, BIT(order))) #define split_folio(f) split_folio_to_list(f, NULL) @@ -264,14 +267,14 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, vm_flags_t vm_flags, - unsigned long tva_flags, + enum tva_type type, unsigned long orders); /** * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma * @vma: the vm area to check * @vm_flags: use these vm_flags instead of vma->vm_flags - * @tva_flags: Which TVA flags to honour + * @type: TVA type * @orders: bitfield of all orders to consider * * Calculates the intersection of the requested hugepage orders and the allowed @@ -285,11 +288,14 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, vm_flags_t vm_flags, - unsigned long tva_flags, + enum tva_type type, unsigned long orders) { - /* Optimization to check if required orders are enabled early. */ - if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) { + /* + * Optimization to check if required orders are enabled early. Only + * forced collapse ignores sysfs configs. + */ + if (type != TVA_FORCED_COLLAPSE && vma_is_anonymous(vma)) { unsigned long mask = READ_ONCE(huge_anon_orders_always); if (vm_flags & VM_HUGEPAGE) @@ -303,7 +309,7 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, return 0; } - return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders); + return __thp_vma_allowable_orders(vma, vm_flags, type, orders); } struct thpsize { @@ -318,16 +324,32 @@ struct thpsize { (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) +/* + * Check whether THPs are explicitly disabled for this VMA, for example, + * through madvise or prctl. + */ static inline bool vma_thp_disabled(struct vm_area_struct *vma, - vm_flags_t vm_flags) -{ + vm_flags_t vm_flags, bool forced_collapse) +{ + /* Are THPs disabled for this VMA? */ + if (vm_flags & VM_NOHUGEPAGE) + return true; + /* Are THPs disabled for all VMAs in the whole process? */ + if (mm_flags_test(MMF_DISABLE_THP_COMPLETELY, vma->vm_mm)) + return true; /* - * Explicitly disabled through madvise or prctl, or some - * architectures may disable THP for some mappings, for - * example, s390 kvm. + * Are THPs disabled only for VMAs where we didn't get an explicit + * advise to use them? */ - return (vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags); + if (vm_flags & VM_HUGEPAGE) + return false; + /* + * Forcing a collapse (e.g., madv_collapse), is a clear advice to + * use THPs. + */ + if (forced_collapse) + return false; + return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm); } static inline bool thp_disabled_by_hw(void) @@ -479,6 +501,8 @@ extern unsigned long huge_zero_pfn; static inline bool is_huge_zero_folio(const struct folio *folio) { + VM_WARN_ON_ONCE(!folio); + return READ_ONCE(huge_zero_folio) == folio; } @@ -495,6 +519,17 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); void mm_put_huge_zero_folio(struct mm_struct *mm); +static inline struct folio *get_persistent_huge_zero_folio(void) +{ + if (!IS_ENABLED(CONFIG_PERSISTENT_HUGE_ZERO_FOLIO)) + return NULL; + + if (unlikely(!huge_zero_folio)) + return NULL; + + return huge_zero_folio; +} + static inline bool thp_migration_supported(void) { return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); @@ -526,7 +561,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, vm_flags_t vm_flags, - unsigned long tva_flags, + enum tva_type type, unsigned long orders) { return 0; @@ -553,22 +588,26 @@ static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order) { - return 0; + VM_WARN_ON_ONCE_PAGE(1, page); + return -EINVAL; } static inline int split_huge_page(struct page *page) { - return 0; + VM_WARN_ON_ONCE_PAGE(1, page); + return -EINVAL; } static inline int split_folio_to_list(struct folio *folio, struct list_head *list) { - return 0; + VM_WARN_ON_ONCE_FOLIO(1, folio); + return -EINVAL; } static inline int try_folio_split(struct folio *folio, struct page *page, struct list_head *list) { - return 0; + VM_WARN_ON_ONCE_FOLIO(1, folio); + return -EINVAL; } static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} @@ -685,6 +724,11 @@ static inline int change_huge_pud(struct mmu_gather *tlb, { return 0; } + +static inline struct folio *get_persistent_huge_zero_folio(void) +{ + return NULL; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, @@ -698,4 +742,26 @@ static inline int split_folio_to_order(struct folio *folio, int new_order) return split_folio_to_list_to_order(folio, NULL, new_order); } +/** + * largest_zero_folio - Get the largest zero size folio available + * + * This function shall be used when mm_get_huge_zero_folio() cannot be + * used as there is no appropriate mm lifetime to tie the huge zero folio + * from the caller. + * + * Deduce the size of the folio with folio_size instead of assuming the + * folio size. + * + * Return: pointer to PMD sized zero folio if CONFIG_PERSISTENT_HUGE_ZERO_FOLIO + * is enabled or a single page sized zero folio + */ +static inline struct folio *largest_zero_folio(void) +{ + struct folio *folio = get_persistent_huge_zero_folio(); + + if (folio) + return folio; + + return page_folio(ZERO_PAGE(0)); +} #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 526d27e88b3b..8e63e46b8e1f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -788,9 +788,14 @@ static inline unsigned huge_page_shift(struct hstate *h) return h->order + PAGE_SHIFT; } +static inline bool order_is_gigantic(unsigned int order) +{ + return order > MAX_PAGE_ORDER; +} + static inline bool hstate_is_gigantic(struct hstate *h) { - return huge_page_order(h) > MAX_PAGE_ORDER; + return order_is_gigantic(huge_page_order(h)); } static inline unsigned int pages_per_huge_page(const struct hstate *h) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index a59c5c3e95fb..59826c89171c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -707,11 +707,6 @@ struct vmbus_channel_msginfo { unsigned char msg[]; }; -struct vmbus_close_msg { - struct vmbus_channel_msginfo info; - struct vmbus_channel_close_channel msg; -}; - enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -800,7 +795,7 @@ struct vmbus_channel { struct hv_ring_buffer_info outbound; /* send to parent */ struct hv_ring_buffer_info inbound; /* receive from parent */ - struct vmbus_close_msg close_msg; + struct vmbus_channel_close_channel close_msg; /* Statistics */ u64 interrupts; /* Host to Guest interrupts */ diff --git a/include/linux/idr.h b/include/linux/idr.h index 2267902d29a7..789e23e67444 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -334,14 +334,6 @@ static inline void ida_init(struct ida *ida) xa_init_flags(&ida->xa, IDA_INIT_FLAGS); } -/* - * ida_simple_get() and ida_simple_remove() are deprecated. Use - * ida_alloc() and ida_free() instead respectively. - */ -#define ida_simple_get(ida, start, end, gfp) \ - ida_alloc_range(ida, start, (end) - 1, gfp) -#define ida_simple_remove(ida, id) ida_free(ida, id) - static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 6a4479616479..a38b277c2c02 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -382,6 +382,24 @@ int iio_read_channel_scale(struct iio_channel *chan, int *val, int *val2); /** + * iio_multiply_value() - Multiply an IIO value + * @result: Destination pointer for the multiplication result + * @multiplier: Multiplier. + * @type: One of the IIO_VAL_* constants. This decides how the @val and + * @val2 parameters are interpreted. + * @val: Value being multiplied. + * @val2: Value being multiplied. @val2 use depends on type. + * + * Multiply an IIO value with a s64 multiplier storing the result as + * IIO_VAL_INT. This is typically used for scaling. + * + * Returns: + * IIO_VAL_INT on success or a negative error-number on failure. + */ +int iio_multiply_value(int *result, s64 multiplier, + unsigned int type, int val, int val2); + +/** * iio_convert_raw_to_processed() - Converts a raw value to a processed value * @chan: The channel being queried * @raw: The raw IIO to convert diff --git a/include/linux/iio/frequency/adf4350.h b/include/linux/iio/frequency/adf4350.h index de45cf2ee1e4..ce2086f97e3f 100644 --- a/include/linux/iio/frequency/adf4350.h +++ b/include/linux/iio/frequency/adf4350.h @@ -51,7 +51,7 @@ /* REG3 Bit Definitions */ #define ADF4350_REG3_12BIT_CLKDIV(x) ((x) << 3) -#define ADF4350_REG3_12BIT_CLKDIV_MODE(x) ((x) << 16) +#define ADF4350_REG3_12BIT_CLKDIV_MODE(x) ((x) << 15) #define ADF4350_REG3_12BIT_CSR_EN (1 << 18) #define ADF4351_REG3_CHARGE_CANCELLATION_EN (1 << 21) #define ADF4351_REG3_ANTI_BACKLASH_3ns_EN (1 << 22) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d11668f14a3e..872ebdf0dd77 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -271,14 +271,14 @@ struct iio_chan_spec { unsigned int num_ext_scan_type; }; }; - long info_mask_separate; - long info_mask_separate_available; - long info_mask_shared_by_type; - long info_mask_shared_by_type_available; - long info_mask_shared_by_dir; - long info_mask_shared_by_dir_available; - long info_mask_shared_by_all; - long info_mask_shared_by_all_available; + unsigned long info_mask_separate; + unsigned long info_mask_separate_available; + unsigned long info_mask_shared_by_type; + unsigned long info_mask_shared_by_type_available; + unsigned long info_mask_shared_by_dir; + unsigned long info_mask_shared_by_dir_available; + unsigned long info_mask_shared_by_all; + unsigned long info_mask_shared_by_all_available; const struct iio_event_spec *event_spec; unsigned int num_event_specs; const struct iio_chan_spec_ext_info *ext_info; @@ -779,7 +779,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * them safe for use with non-coherent DMA. * * A number of drivers also use this on buffers that include a 64-bit timestamp - * that is used with iio_push_to_buffer_with_ts(). Therefore, in the case where + * that is used with iio_push_to_buffers_with_ts(). Therefore, in the case where * DMA alignment is not sufficient for proper timestamp alignment, we align to * 8 bytes instead. */ @@ -794,7 +794,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * @name: identifier name of the buffer * @count: number of elements in the buffer * - * Declares a buffer that is safe to use with iio_push_to_buffer_with_ts(). In + * Declares a buffer that is safe to use with iio_push_to_buffers_with_ts(). In * addition to allocating enough space for @count elements of @type, it also * allocates space for a s64 timestamp at the end of the buffer and ensures * proper alignment of the timestamp. diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index ad2761efcc83..34eebad12d2c 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -70,6 +70,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_ZEROPOINT, IIO_CHAN_INFO_TROUGH, IIO_CHAN_INFO_CONVDELAY, + IIO_CHAN_INFO_POWERFACTOR, }; #endif /* _IIO_TYPES_H_ */ diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 2cf89a538b18..d30286298a00 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -17,6 +17,7 @@ #define INPUT_MT_DROP_UNUSED 0x0004 /* drop contacts not seen in frame */ #define INPUT_MT_TRACK 0x0008 /* use in-kernel tracking */ #define INPUT_MT_SEMI_MT 0x0010 /* semi-mt device, finger count handled manually */ +#define INPUT_MT_TOTAL_FORCE 0x0020 /* calculate total force from slots pressure */ /** * struct input_mt_slot - represents the state of an input MT slot diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 138fbd89b1e6..8a823c6f2b4a 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -180,6 +180,7 @@ struct io_pgtable_cfg { struct { u64 ttbr[4]; u32 n_ttbrs; + u32 n_levels; } apple_dart_cfg; struct { diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h index 508beaa44c39..a92b3ff9b934 100644 --- a/include/linux/iommu-dma.h +++ b/include/linux/iommu-dma.h @@ -21,10 +21,9 @@ static inline bool use_dma_iommu(struct device *dev) } #endif /* CONFIG_IOMMU_DMA */ -dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, +dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, unsigned long attrs); int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs); @@ -43,10 +42,6 @@ size_t iommu_dma_opt_mapping_size(void); size_t iommu_dma_max_mapping_size(struct device *dev); void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, unsigned long attrs); -dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs); -void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs); struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); void iommu_dma_free_noncontiguous(struct device *dev, size_t size, diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index ca1713fac6e3..a470a73a805a 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -36,8 +36,6 @@ struct gic_kvm_info { bool has_v4_1; /* Deactivation impared, subpar stuff */ bool no_hw_deactivation; - /* v3 compat support (GICv5 hosts, only) */ - bool has_gcie_v3_compat; }; #ifdef CONFIG_KVM diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h index 6f612d69ea0c..9eca967d8526 100644 --- a/include/linux/kasan-enabled.h +++ b/include/linux/kasan-enabled.h @@ -4,32 +4,46 @@ #include <linux/static_key.h> -#ifdef CONFIG_KASAN_HW_TAGS - +#if defined(CONFIG_ARCH_DEFER_KASAN) || defined(CONFIG_KASAN_HW_TAGS) +/* + * Global runtime flag for KASAN modes that need runtime control. + * Used by ARCH_DEFER_KASAN architectures and HW_TAGS mode. + */ DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); +/* + * Runtime control for shadow memory initialization or HW_TAGS mode. + * Uses static key for architectures that need deferred KASAN or HW_TAGS. + */ static __always_inline bool kasan_enabled(void) { return static_branch_likely(&kasan_flag_enabled); } -static inline bool kasan_hw_tags_enabled(void) +static inline void kasan_enable(void) { - return kasan_enabled(); + static_branch_enable(&kasan_flag_enabled); } - -#else /* CONFIG_KASAN_HW_TAGS */ - -static inline bool kasan_enabled(void) +#else +/* For architectures that can enable KASAN early, use compile-time check. */ +static __always_inline bool kasan_enabled(void) { return IS_ENABLED(CONFIG_KASAN); } +static inline void kasan_enable(void) {} +#endif /* CONFIG_ARCH_DEFER_KASAN || CONFIG_KASAN_HW_TAGS */ + +#ifdef CONFIG_KASAN_HW_TAGS +static inline bool kasan_hw_tags_enabled(void) +{ + return kasan_enabled(); +} +#else static inline bool kasan_hw_tags_enabled(void) { return false; } - #endif /* CONFIG_KASAN_HW_TAGS */ #endif /* LINUX_KASAN_ENABLED_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index fe5ce9215821..d12e1a5f5a9a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s, } bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init, - bool still_accessible); + bool still_accessible, bool no_quarantine); /** * kasan_slab_free - Poison, initialize, and quarantine a slab object. * @object: Object to be freed. @@ -226,11 +226,13 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init, * @Return true if KASAN took ownership of the object; false otherwise. */ static __always_inline bool kasan_slab_free(struct kmem_cache *s, - void *object, bool init, - bool still_accessible) + void *object, bool init, + bool still_accessible, + bool no_quarantine) { if (kasan_enabled()) - return __kasan_slab_free(s, object, init, still_accessible); + return __kasan_slab_free(s, object, init, still_accessible, + no_quarantine); return false; } @@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object) } static inline bool kasan_slab_free(struct kmem_cache *s, void *object, - bool init, bool still_accessible) + bool init, bool still_accessible, + bool no_quarantine) { return false; } @@ -543,6 +546,12 @@ void kasan_report_async(void); #endif /* CONFIG_KASAN_HW_TAGS */ +#ifdef CONFIG_KASAN_GENERIC +void __init kasan_init_generic(void); +#else +static inline void kasan_init_generic(void) { } +#endif + #ifdef CONFIG_KASAN_SW_TAGS void __init kasan_init_sw_tags(void); #else diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 989315dabb86..5b46924fdff5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -164,11 +164,23 @@ extern int root_mountflags; extern bool early_boot_irqs_disabled; -/* - * Values used for system_state. Ordering of the states must not be changed +/** + * enum system_states - Values used for system_state. + * + * @SYSTEM_BOOTING: %0, no init needed + * @SYSTEM_SCHEDULING: system is ready for scheduling; OK to use RCU + * @SYSTEM_FREEING_INITMEM: system is freeing all of initmem; almost running + * @SYSTEM_RUNNING: system is up and running + * @SYSTEM_HALT: system entered clean system halt state + * @SYSTEM_POWER_OFF: system entered shutdown/clean power off state + * @SYSTEM_RESTART: system entered emergency power off or normal restart + * @SYSTEM_SUSPEND: system entered suspend or hibernate state + * + * Note: + * Ordering of the states must not be changed * as code checks for <, <=, >, >= STATE. */ -extern enum system_states { +enum system_states { SYSTEM_BOOTING, SYSTEM_SCHEDULING, SYSTEM_FREEING_INITMEM, @@ -177,7 +189,8 @@ extern enum system_states { SYSTEM_POWER_OFF, SYSTEM_RESTART, SYSTEM_SUSPEND, -} system_state; +}; +extern enum system_states system_state; /* * General tracing related utility functions - trace_printk(), diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 39fe3e6cd282..ff7e231b0485 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -395,6 +395,9 @@ struct kimage { /* Information for loading purgatory */ struct purgatory_info purgatory_info; + + /* Force carrying over the DTB from the current boot */ + bool force_dtb; #endif #ifdef CONFIG_CRASH_HOTPLUG @@ -461,7 +464,7 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \ - KEXEC_FILE_NO_CMA) + KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 348844cffb13..559d13a3bc44 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -40,6 +40,7 @@ struct kho_serialization; #ifdef CONFIG_KEXEC_HANDOVER bool kho_is_enabled(void); +bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); int kho_preserve_phys(phys_addr_t phys, size_t size); @@ -60,6 +61,11 @@ static inline bool kho_is_enabled(void) return false; } +static inline bool is_kho_boot(void) +{ + return false; +} + static inline int kho_preserve_folio(struct folio *folio) { return -EOPNOTSUPP; diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index ff6120463745..eb1946a70cff 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -2,6 +2,8 @@ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H +#include <linux/mm.h> + extern unsigned int khugepaged_max_ptes_none __read_mostly; #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; @@ -20,13 +22,13 @@ extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { - if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags)) + if (mm_flags_test(MMF_VM_HUGEPAGE, oldmm)) __khugepaged_enter(mm); } static inline void khugepaged_exit(struct mm_struct *mm) { - if (test_bit(MMF_VM_HUGEPAGE, &mm->flags)) + if (mm_flags_test(MMF_VM_HUGEPAGE, mm)) __khugepaged_exit(mm); } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h index 2b1432cc16d5..f2fd221107bb 100644 --- a/include/linux/kmsan.h +++ b/include/linux/kmsan.h @@ -182,8 +182,7 @@ void kmsan_iounmap_page_range(unsigned long start, unsigned long end); /** * kmsan_handle_dma() - Handle a DMA data transfer. - * @page: first page of the buffer. - * @offset: offset of the buffer within the first page. + * @phys: physical address of the buffer. * @size: buffer size. * @dir: one of possible dma_data_direction values. * @@ -192,7 +191,7 @@ void kmsan_iounmap_page_range(unsigned long start, unsigned long end); * * initializes the buffer, if it is copied from device; * * does both, if this is a DMA_BIDIRECTIONAL transfer. */ -void kmsan_handle_dma(struct page *page, size_t offset, size_t size, +void kmsan_handle_dma(phys_addr_t phys, size_t size, enum dma_data_direction dir); /** @@ -372,8 +371,8 @@ static inline void kmsan_iounmap_page_range(unsigned long start, { } -static inline void kmsan_handle_dma(struct page *page, size_t offset, - size_t size, enum dma_data_direction dir) +static inline void kmsan_handle_dma(phys_addr_t phys, size_t size, + enum dma_data_direction dir) { } diff --git a/include/linux/ksm.h b/include/linux/ksm.h index c17b955e7b0b..067538fc4d58 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -56,13 +56,19 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm) static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { /* Adding mm to ksm is best effort on fork. */ - if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) + if (mm_flags_test(MMF_VM_MERGEABLE, oldmm)) { + long nr_ksm_zero_pages = atomic_long_read(&mm->ksm_zero_pages); + + mm->ksm_merging_pages = 0; + mm->ksm_rmap_items = 0; + atomic_long_add(nr_ksm_zero_pages, &ksm_zero_pages); __ksm_enter(mm); + } } static inline int ksm_execve(struct mm_struct *mm) { - if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + if (mm_flags_test(MMF_VM_MERGE_ANY, mm)) return __ksm_enter(mm); return 0; @@ -70,7 +76,7 @@ static inline int ksm_execve(struct mm_struct *mm) static inline void ksm_exit(struct mm_struct *mm) { - if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) + if (mm_flags_test(MMF_VM_MERGEABLE, mm)) __ksm_exit(mm); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..fa36e70df088 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H - +#include <linux/entry-virt.h> #include <linux/types.h> #include <linux/hardirq.h> #include <linux/list.h> @@ -52,9 +52,10 @@ /* * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally * used in kvm, other bits are visible for userspace which are defined in - * include/linux/kvm_h. + * include/uapi/linux/kvm.h. */ -#define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_GMEM_ONLY (1UL << 17) /* * Bit 63 of the memslot generation number is an "update in-progress flag", @@ -206,6 +207,7 @@ struct kvm_io_range { struct kvm_io_bus { int dev_count; int ioeventfd_count; + struct rcu_head rcu; struct kvm_io_range range[]; }; @@ -602,7 +604,7 @@ struct kvm_memory_slot { short id; u16 as_id; -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD struct { /* * Writes protected by kvm->slots_lock. Acquiring a @@ -615,7 +617,7 @@ struct kvm_memory_slot { #endif }; -static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot) +static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot) { return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); } @@ -719,17 +721,17 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) } #endif -/* - * Arch code must define kvm_arch_has_private_mem if support for private memory - * is enabled. - */ -#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) +#ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline bool kvm_arch_has_private_mem(struct kvm *kvm) { return false; } #endif +#ifdef CONFIG_KVM_GUEST_MEMFD +bool kvm_arch_supports_gmem_mmap(struct kvm *kvm); +#endif + #ifndef kvm_arch_has_readonly_mem static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) { @@ -860,7 +862,7 @@ struct kvm { struct notifier_block pm_notifier; #endif #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES - /* Protected by slots_locks (for writes) and RCU (for reads) */ + /* Protected by slots_lock (for writes) and RCU (for reads) */ struct xarray mem_attr_array; #endif char stats_id[KVM_STATS_NAME_SIZE]; @@ -966,11 +968,15 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm) return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET); } +/* + * Get a bus reference under the update-side lock. No long-term SRCU reader + * references are permitted, to avoid stale reads vs concurrent IO + * registrations. + */ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) { - return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, - lockdep_is_held(&kvm->slots_lock) || - !refcount_read(&kvm->users_count)); + return rcu_dereference_protected(kvm->buses[idx], + lockdep_is_held(&kvm->slots_lock)); } static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) @@ -2444,13 +2450,24 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_INTR; vcpu->stat.signal_exits++; } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ + +static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +{ + int r = xfer_to_guest_mode_handle_work(); + + if (r) { + WARN_ON_ONCE(r != -EINTR); + kvm_handle_signal_exit(vcpu); + } + return r; +} +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ /* * If more than one page is being (un)accounted, @virt must be the address of @@ -2490,6 +2507,14 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; } +static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot) +{ + if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD)) + return false; + + return slot->flags & KVM_MEMSLOT_GMEM_ONLY; +} + #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) { @@ -2505,8 +2530,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { - return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && - kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; + return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; } #else static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) @@ -2515,7 +2539,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) } #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, struct page **page, int *max_order); @@ -2528,13 +2552,13 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, KVM_BUG_ON(1, kvm); return -EIO; } -#endif /* CONFIG_KVM_PRIVATE_MEM */ +#endif /* CONFIG_KVM_GUEST_MEMFD */ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); #endif -#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 827ecc0b7e10..490464c205b4 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -3,6 +3,23 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +#include <linux/bits.h> +#include <linux/export.h> +#include <linux/types.h> +#include <asm/kvm_types.h> + +#ifdef KVM_SUB_MODULES +#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) \ + EXPORT_SYMBOL_FOR_MODULES(symbol, __stringify(KVM_SUB_MODULES)) +#else +#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) +#endif + +#ifndef __ASSEMBLER__ + +#include <linux/mutex.h> +#include <linux/spinlock_types.h> + struct kvm; struct kvm_async_pf; struct kvm_device_ops; @@ -19,13 +36,6 @@ struct kvm_memslots; enum kvm_mr_change; -#include <linux/bits.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/spinlock_types.h> - -#include <asm/kvm_types.h> - /* * Address types: * @@ -116,5 +126,6 @@ struct kvm_vcpu_stat_generic { }; #define KVM_STATS_NAME_SIZE 48 +#endif /* !__ASSEMBLER__ */ #endif /* __KVM_TYPES_H__ */ diff --git a/include/linux/list.h b/include/linux/list.h index 7f7657e41620..00ea8e5fb88b 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -20,8 +20,16 @@ * using the generic single-entry routines. */ +/** + * LIST_HEAD_INIT - initialize a &struct list_head's links to point to itself + * @name: name of the list_head + */ #define LIST_HEAD_INIT(name) { &(name), &(name) } +/** + * LIST_HEAD - definition of a &struct list_head with initialization values + * @name: name of the list_head + */ #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) @@ -701,16 +709,6 @@ static inline void list_splice_tail_init(struct list_head *list, for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) /** - * list_for_each_rcu - Iterate over a list in an RCU-safe fashion - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - !list_is_head(pos, (head)); \ - pos = rcu_dereference(pos->next)) - -/** * list_for_each_continue - continue iteration over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 2ba846419524..0d91d060e3e9 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -66,6 +66,8 @@ */ #define local_trylock(lock) __local_trylock(this_cpu_ptr(lock)) +#define local_lock_is_locked(lock) __local_lock_is_locked(lock) + /** * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable * interrupts if acquired diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index d80b5306a2c0..a4dc479157b5 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -17,7 +17,10 @@ typedef struct { /* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ typedef struct { - local_lock_t llock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + struct task_struct *owner; +#endif u8 acquired; } local_trylock_t; @@ -31,7 +34,7 @@ typedef struct { .owner = NULL, # define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ - .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + LOCAL_LOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { @@ -81,7 +84,7 @@ do { \ local_lock_debug_init(lock); \ } while (0) -#define __local_trylock_init(lock) __local_lock_init(lock.llock) +#define __local_trylock_init(lock) __local_lock_init((local_lock_t *)lock) #define __spinlock_nested_bh_init(lock) \ do { \ @@ -162,6 +165,9 @@ do { \ !!tl; \ }) +/* preemption or migration must be disabled before calling __local_lock_is_locked */ +#define __local_lock_is_locked(lock) READ_ONCE(this_cpu_ptr(lock)->acquired) + #define __local_lock_release(lock) \ do { \ local_trylock_t *tl; \ @@ -282,4 +288,8 @@ do { \ __local_trylock(lock); \ }) +/* migration must be disabled before calling __local_lock_is_locked */ +#define __local_lock_is_locked(__lock) \ + (rt_mutex_owner(&this_cpu_ptr(__lock)->lock) == current) + #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index adbe234a6f6c..8c42b4bde09c 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -85,7 +85,7 @@ LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, struct lsm_context *cp) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, - struct qstr *name, const struct cred *old, struct cred *new) + const struct qstr *name, const struct cred *old, struct cred *new) #ifdef CONFIG_SECURITY_PATH LSM_HOOK(int, 0, path_unlink, const struct path *dir, struct dentry *dentry) diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h new file mode 100644 index 000000000000..e135c6564d0c --- /dev/null +++ b/include/linux/mailbox/riscv-rpmi-message.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Ventana Micro Systems Inc. */ + +#ifndef _LINUX_RISCV_RPMI_MESSAGE_H_ +#define _LINUX_RISCV_RPMI_MESSAGE_H_ + +#include <linux/errno.h> +#include <linux/mailbox_client.h> +#include <linux/types.h> +#include <linux/wordpart.h> + +/* RPMI version encode/decode macros */ +#define RPMI_VER_MAJOR(__ver) upper_16_bits(__ver) +#define RPMI_VER_MINOR(__ver) lower_16_bits(__ver) +#define RPMI_MKVER(__maj, __min) (((u32)(__maj) << 16) | (u16)(__min)) + +/* RPMI message header */ +struct rpmi_message_header { + __le16 servicegroup_id; + u8 service_id; + u8 flags; + __le16 datalen; + __le16 token; +}; + +/* RPMI message */ +struct rpmi_message { + struct rpmi_message_header header; + u8 data[]; +}; + +/* RPMI notification event */ +struct rpmi_notification_event { + __le16 event_datalen; + u8 event_id; + u8 reserved; + u8 event_data[]; +}; + +/* RPMI error codes */ +enum rpmi_error_codes { + RPMI_SUCCESS = 0, + RPMI_ERR_FAILED = -1, + RPMI_ERR_NOTSUPP = -2, + RPMI_ERR_INVALID_PARAM = -3, + RPMI_ERR_DENIED = -4, + RPMI_ERR_INVALID_ADDR = -5, + RPMI_ERR_ALREADY = -6, + RPMI_ERR_EXTENSION = -7, + RPMI_ERR_HW_FAULT = -8, + RPMI_ERR_BUSY = -9, + RPMI_ERR_INVALID_STATE = -10, + RPMI_ERR_BAD_RANGE = -11, + RPMI_ERR_TIMEOUT = -12, + RPMI_ERR_IO = -13, + RPMI_ERR_NO_DATA = -14, + RPMI_ERR_RESERVED_START = -15, + RPMI_ERR_RESERVED_END = -127, + RPMI_ERR_VENDOR_START = -128, +}; + +static inline int rpmi_to_linux_error(int rpmi_error) +{ + switch (rpmi_error) { + case RPMI_SUCCESS: + return 0; + case RPMI_ERR_INVALID_PARAM: + case RPMI_ERR_BAD_RANGE: + case RPMI_ERR_INVALID_STATE: + return -EINVAL; + case RPMI_ERR_DENIED: + return -EPERM; + case RPMI_ERR_INVALID_ADDR: + case RPMI_ERR_HW_FAULT: + return -EFAULT; + case RPMI_ERR_ALREADY: + return -EALREADY; + case RPMI_ERR_BUSY: + return -EBUSY; + case RPMI_ERR_TIMEOUT: + return -ETIMEDOUT; + case RPMI_ERR_IO: + return -ECOMM; + case RPMI_ERR_FAILED: + case RPMI_ERR_NOTSUPP: + case RPMI_ERR_NO_DATA: + case RPMI_ERR_EXTENSION: + default: + return -EOPNOTSUPP; + } +} + +/* RPMI service group IDs */ +#define RPMI_SRVGRP_SYSTEM_MSI 0x00002 +#define RPMI_SRVGRP_CLOCK 0x00008 + +/* RPMI clock service IDs */ +enum rpmi_clock_service_id { + RPMI_CLK_SRV_ENABLE_NOTIFICATION = 0x01, + RPMI_CLK_SRV_GET_NUM_CLOCKS = 0x02, + RPMI_CLK_SRV_GET_ATTRIBUTES = 0x03, + RPMI_CLK_SRV_GET_SUPPORTED_RATES = 0x04, + RPMI_CLK_SRV_SET_CONFIG = 0x05, + RPMI_CLK_SRV_GET_CONFIG = 0x06, + RPMI_CLK_SRV_SET_RATE = 0x07, + RPMI_CLK_SRV_GET_RATE = 0x08, + RPMI_CLK_SRV_ID_MAX_COUNT +}; + +/* RPMI system MSI service IDs */ +enum rpmi_sysmsi_service_id { + RPMI_SYSMSI_SRV_ENABLE_NOTIFICATION = 0x01, + RPMI_SYSMSI_SRV_GET_ATTRIBUTES = 0x02, + RPMI_SYSMSI_SRV_GET_MSI_ATTRIBUTES = 0x03, + RPMI_SYSMSI_SRV_SET_MSI_STATE = 0x04, + RPMI_SYSMSI_SRV_GET_MSI_STATE = 0x05, + RPMI_SYSMSI_SRV_SET_MSI_TARGET = 0x06, + RPMI_SYSMSI_SRV_GET_MSI_TARGET = 0x07, + RPMI_SYSMSI_SRV_ID_MAX_COUNT +}; + +/* RPMI Linux mailbox attribute IDs */ +enum rpmi_mbox_attribute_id { + RPMI_MBOX_ATTR_SPEC_VERSION, + RPMI_MBOX_ATTR_MAX_MSG_DATA_SIZE, + RPMI_MBOX_ATTR_SERVICEGROUP_ID, + RPMI_MBOX_ATTR_SERVICEGROUP_VERSION, + RPMI_MBOX_ATTR_IMPL_ID, + RPMI_MBOX_ATTR_IMPL_VERSION, + RPMI_MBOX_ATTR_MAX_ID +}; + +/* RPMI Linux mailbox message types */ +enum rpmi_mbox_message_type { + RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE, + RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE, + RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE, + RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE, + RPMI_MBOX_MSG_TYPE_NOTIFICATION_EVENT, + RPMI_MBOX_MSG_MAX_TYPE +}; + +/* RPMI Linux mailbox message instance */ +struct rpmi_mbox_message { + enum rpmi_mbox_message_type type; + union { + struct { + enum rpmi_mbox_attribute_id id; + u32 value; + } attr; + + struct { + u32 service_id; + void *request; + unsigned long request_len; + void *response; + unsigned long max_response_len; + unsigned long out_response_len; + } data; + + struct { + u16 event_datalen; + u8 event_id; + u8 *event_data; + } notif; + }; + int error; +}; + +/* RPMI Linux mailbox message helper routines */ +static inline void rpmi_mbox_init_get_attribute(struct rpmi_mbox_message *msg, + enum rpmi_mbox_attribute_id id) +{ + msg->type = RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE; + msg->attr.id = id; + msg->attr.value = 0; + msg->error = 0; +} + +static inline void rpmi_mbox_init_set_attribute(struct rpmi_mbox_message *msg, + enum rpmi_mbox_attribute_id id, + u32 value) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE; + msg->attr.id = id; + msg->attr.value = value; + msg->error = 0; +} + +static inline void rpmi_mbox_init_send_with_response(struct rpmi_mbox_message *msg, + u32 service_id, + void *request, + unsigned long request_len, + void *response, + unsigned long max_response_len) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE; + msg->data.service_id = service_id; + msg->data.request = request; + msg->data.request_len = request_len; + msg->data.response = response; + msg->data.max_response_len = max_response_len; + msg->data.out_response_len = 0; + msg->error = 0; +} + +static inline void rpmi_mbox_init_send_without_response(struct rpmi_mbox_message *msg, + u32 service_id, + void *request, + unsigned long request_len) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE; + msg->data.service_id = service_id; + msg->data.request = request; + msg->data.request_len = request_len; + msg->data.response = NULL; + msg->data.max_response_len = 0; + msg->data.out_response_len = 0; + msg->error = 0; +} + +static inline void *rpmi_mbox_get_msg_response(struct rpmi_mbox_message *msg) +{ + return msg ? msg->data.response : NULL; +} + +static inline int rpmi_mbox_send_message(struct mbox_chan *chan, + struct rpmi_mbox_message *msg) +{ + int ret; + + /* Send message for the underlying mailbox channel */ + ret = mbox_send_message(chan, msg); + if (ret < 0) + return ret; + + /* Explicitly signal txdone for mailbox channel */ + ret = msg->error; + mbox_client_txdone(chan, ret); + return ret; +} + +#endif /* _LINUX_RISCV_RPMI_MESSAGE_H_ */ diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index ad01c4082358..80a427c7ca29 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -66,6 +66,7 @@ struct mbox_chan_ops { * no interrupt rises. Ignored if 'txdone_irq' is set. * @txpoll_period: If 'txdone_poll' is in effect, the API polls for * last TX's status after these many millisecs + * @fw_xlate: Controller driver specific mapping of channel via fwnode * @of_xlate: Controller driver specific mapping of channel via DT * @poll_hrt: API private. hrtimer used to poll for TXDONE on all * channels. @@ -79,6 +80,8 @@ struct mbox_controller { bool txdone_irq; bool txdone_poll; unsigned txpoll_period; + struct mbox_chan *(*fw_xlate)(struct mbox_controller *mbox, + const struct fwnode_reference_args *sp); struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox, const struct of_phandle_args *sp); /* Internal to API */ diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index bafe143b1f78..66f98a3da8d8 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -57,17 +57,17 @@ * MT_FLAGS_ALLOC_RANGE flag. * * Node types: - * 0x??1 = Root - * 0x?00 = 16 bit nodes - * 0x010 = 32 bit nodes - * 0x110 = 64 bit nodes + * 0b??1 = Root + * 0b?00 = 16 bit nodes + * 0b010 = 32 bit nodes + * 0b110 = 64 bit nodes * * Slot size and location in the parent pointer: * type : slot location - * 0x??1 : Root - * 0x?00 : 16 bit values, type in 0-1, slot in 2-6 - * 0x010 : 32 bit values, type in 0-2, slot in 3-6 - * 0x110 : 64 bit values, type in 0-2, slot in 3-6 + * 0b??1 : Root + * 0b?00 : 16 bit values, type in 0-1, slot in 2-6 + * 0b010 : 32 bit values, type in 0-2, slot in 3-6 + * 0b110 : 64 bit values, type in 0-2, slot in 3-6 */ /* @@ -194,7 +194,6 @@ enum store_type { #define MAPLE_RESERVED_RANGE 4096 #ifdef CONFIG_LOCKDEP -typedef struct lockdep_map *lockdep_map_p; #define mt_lock_is_held(mt) \ (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock)) @@ -207,7 +206,6 @@ typedef struct lockdep_map *lockdep_map_p; #define mt_on_stack(mt) (mt).ma_external_lock = NULL #else -typedef struct { /* nothing */ } lockdep_map_p; #define mt_lock_is_held(mt) 1 #define mt_write_lock_is_held(mt) 1 #define mt_set_external_lock(mt, lock) do { } while (0) @@ -230,8 +228,10 @@ typedef struct { /* nothing */ } lockdep_map_p; */ struct maple_tree { union { - spinlock_t ma_lock; - lockdep_map_p ma_external_lock; + spinlock_t ma_lock; +#ifdef CONFIG_LOCKDEP + struct lockdep_map *ma_external_lock; +#endif }; unsigned int ma_flags; void __rcu *ma_root; @@ -442,7 +442,9 @@ struct ma_state { struct maple_enode *node; /* The node containing this entry */ unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ - struct maple_alloc *alloc; /* Allocated nodes for this operation */ + struct slab_sheaf *sheaf; /* Allocated nodes for this operation */ + struct maple_node *alloc; /* A single allocated node for fast path writes */ + unsigned long node_request; /* The number of nodes to allocate for this operation */ enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; @@ -481,6 +483,9 @@ struct ma_wr_state { #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) +/* + * When changing MA_STATE, remember to also change rust/kernel/maple_tree.rs + */ #define MA_STATE(name, mt, first, end) \ struct ma_state name = { \ .tree = mt, \ @@ -490,7 +495,9 @@ struct ma_wr_state { .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ + .sheaf = NULL, \ .alloc = NULL, \ + .node_request = 0, \ .mas_flags = 0, \ .store_type = wr_invalid, \ } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fcda8481de9a..221118b5a16e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -324,28 +324,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, - unsigned long *out_spfn, - unsigned long *out_epfn); - -/** - * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific - * free memblock areas from a given point - * @i: u64 used as loop variable - * @zone: zone in which all of the memory blocks reside - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL - * - * Walks over free (memory && !reserved) areas of memblock in a specific - * zone, continuing from current position. Available as soon as memblock is - * initialized. - */ -#define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \ - for (; i != U64_MAX; \ - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) - -#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ /** * for_each_free_mem_range - iterate through free memblock areas diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fb27e3d2fdac..16fe0306e50e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -341,17 +341,25 @@ enum page_memcg_data_flags { __NR_MEMCG_DATA_FLAGS = (1UL << 2), }; +#define __OBJEXTS_ALLOC_FAIL MEMCG_DATA_OBJEXTS #define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS #else /* CONFIG_MEMCG */ +#define __OBJEXTS_ALLOC_FAIL (1UL << 0) #define __FIRST_OBJEXT_FLAG (1UL << 0) #endif /* CONFIG_MEMCG */ enum objext_flags { - /* slabobj_ext vector failed to allocate */ - OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG, + /* + * Use bit 0 with zero other bits to signal that slabobj_ext vector + * failed to allocate. The same bit 0 with valid upper bits means + * MEMCG_DATA_OBJEXTS. + */ + OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL, + /* slabobj_ext vector allocated with kmalloc_nolock() */ + OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG, /* the next bit after the last actual flag */ __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), }; @@ -900,7 +908,13 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } -void mem_cgroup_handle_over_high(gfp_t gfp_mask); +void __mem_cgroup_handle_over_high(gfp_t gfp_mask); + +static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) +{ + if (unlikely(current->memcg_nr_pages_over_high)) + __mem_cgroup_handle_over_high(gfp_mask); +} unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); @@ -1053,6 +1067,8 @@ extern int mem_cgroup_init(void); #define MEM_CGROUP_ID_SHIFT 0 +#define root_mem_cgroup (NULL) + static inline struct mem_cgroup *folio_memcg(struct folio *folio) { return NULL; diff --git a/include/linux/memory.h b/include/linux/memory.h index 40eb70ccb09d..0c214256216f 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -115,13 +115,13 @@ struct notifier_block; struct mem_section; /* - * Priorities for the hotplug memory callback routines (stored in decreasing - * order in the callback chain) + * Priorities for the hotplug memory callback routines. Invoked from + * high to low. Higher priorities correspond to higher numbers. */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 -#define HMAT_CALLBACK_PRI 2 #define CXL_CALLBACK_PRI 5 +#define HMAT_CALLBACK_PRI 6 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 @@ -159,7 +159,7 @@ static inline unsigned long memory_block_advised_max_size(void) extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, - struct vmem_altmap *altmap, + int nid, struct vmem_altmap *altmap, struct memory_group *group); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); @@ -202,8 +202,7 @@ static inline unsigned long phys_to_block_id(unsigned long phys) } #ifdef CONFIG_NUMA -void memory_block_add_nid(struct memory_block *mem, int nid, - enum meminit_context context); +void memory_block_add_nid_early(struct memory_block *mem, int nid); #endif /* CONFIG_NUMA */ int memory_block_advise_max_size(unsigned long size); unsigned long memory_block_advised_max_size(void); diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 7b151441341b..34941a4b9026 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -15,7 +15,7 @@ struct kmem_cache; typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); typedef void (mempool_free_t)(void *element, void *pool_data); -typedef struct mempool_s { +typedef struct mempool { spinlock_t lock; int min_nr; /* nr of elements at *elements */ int curr_nr; /* Current nr of elements at *elements */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 4aa151914eab..e5951ba12a28 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -157,45 +157,52 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) return 1 << pgmap->vmemmap_shift; } -static inline bool is_device_private_page(const struct page *page) +static inline bool folio_is_device_private(const struct folio *folio) { return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && - is_zone_device_page(page) && - page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE; + folio_is_zone_device(folio) && + folio->pgmap->type == MEMORY_DEVICE_PRIVATE; } -static inline bool folio_is_device_private(const struct folio *folio) +static inline bool is_device_private_page(const struct page *page) { - return is_device_private_page(&folio->page); + return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && + folio_is_device_private(page_folio(page)); } -static inline bool is_pci_p2pdma_page(const struct page *page) +static inline bool folio_is_pci_p2pdma(const struct folio *folio) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && - is_zone_device_page(page) && - page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; + folio_is_zone_device(folio) && + folio->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; } -static inline bool is_device_coherent_page(const struct page *page) +static inline bool is_pci_p2pdma_page(const struct page *page) { - return is_zone_device_page(page) && - page_pgmap(page)->type == MEMORY_DEVICE_COHERENT; + return IS_ENABLED(CONFIG_PCI_P2PDMA) && + folio_is_pci_p2pdma(page_folio(page)); } static inline bool folio_is_device_coherent(const struct folio *folio) { - return is_device_coherent_page(&folio->page); + return folio_is_zone_device(folio) && + folio->pgmap->type == MEMORY_DEVICE_COHERENT; } -static inline bool is_fsdax_page(const struct page *page) +static inline bool is_device_coherent_page(const struct page *page) { - return is_zone_device_page(page) && - page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX; + return folio_is_device_coherent(page_folio(page)); } static inline bool folio_is_fsdax(const struct folio *folio) { - return is_fsdax_page(&folio->page); + return folio_is_zone_device(folio) && + folio->pgmap->type == MEMORY_DEVICE_FS_DAX; +} + +static inline bool is_fsdax_page(const struct page *page) +{ + return folio_is_fsdax(page_folio(page)); } #ifdef CONFIG_ZONE_DEVICE @@ -204,8 +211,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); -struct dev_pagemap *get_dev_pagemap(unsigned long pfn, - struct dev_pagemap *pgmap); +struct dev_pagemap *get_dev_pagemap(unsigned long pfn); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); @@ -227,8 +233,7 @@ static inline void devm_memunmap_pages(struct device *dev, { } -static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, - struct dev_pagemap *pgmap) +static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn) { return NULL; } diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h index 85eca44f39ab..38892ba7b8a4 100644 --- a/include/linux/mfd/88pm886.h +++ b/include/linux/mfd/88pm886.h @@ -10,6 +10,7 @@ #define PM886_IRQ_ONKEY 0 #define PM886_PAGE_OFFSET_REGULATORS 1 +#define PM886_PAGE_OFFSET_GPADC 2 #define PM886_REG_ID 0x00 @@ -70,6 +71,63 @@ #define PM886_LDO_VSEL_MASK 0x0f #define PM886_BUCK_VSEL_MASK 0x7f +/* GPADC enable/disable registers */ +#define PM886_REG_GPADC_CONFIG(n) (n) + +#define PM886_GPADC_VSC_EN BIT(0) +#define PM886_GPADC_VBAT_EN BIT(1) +#define PM886_GPADC_GNDDET1_EN BIT(3) +#define PM886_GPADC_VBUS_EN BIT(4) +#define PM886_GPADC_VCHG_PWR_EN BIT(5) +#define PM886_GPADC_VCF_OUT_EN BIT(6) +#define PM886_GPADC_CONFIG1_EN_ALL \ + (PM886_GPADC_VSC_EN | \ + PM886_GPADC_VBAT_EN | \ + PM886_GPADC_GNDDET1_EN | \ + PM886_GPADC_VBUS_EN | \ + PM886_GPADC_VCHG_PWR_EN | \ + PM886_GPADC_VCF_OUT_EN) + +#define PM886_GPADC_TINT_EN BIT(0) +#define PM886_GPADC_PMODE_EN BIT(1) +#define PM886_GPADC_GPADC0_EN BIT(2) +#define PM886_GPADC_GPADC1_EN BIT(3) +#define PM886_GPADC_GPADC2_EN BIT(4) +#define PM886_GPADC_GPADC3_EN BIT(5) +#define PM886_GPADC_MIC_DET_EN BIT(6) +#define PM886_GPADC_CONFIG2_EN_ALL \ + (PM886_GPADC_TINT_EN | \ + PM886_GPADC_GPADC0_EN | \ + PM886_GPADC_GPADC1_EN | \ + PM886_GPADC_GPADC2_EN | \ + PM886_GPADC_GPADC3_EN | \ + PM886_GPADC_MIC_DET_EN) + +/* No CONFIG3_EN_ALL because this is the only bit there. */ +#define PM886_GPADC_GND_DET2_EN BIT(0) + +/* GPADC channel registers */ +#define PM886_REG_GPADC_VSC 0x40 +#define PM886_REG_GPADC_VCHG_PWR 0x4c +#define PM886_REG_GPADC_VCF_OUT 0x4e +#define PM886_REG_GPADC_TINT 0x50 +#define PM886_REG_GPADC_GPADC0 0x54 +#define PM886_REG_GPADC_GPADC1 0x56 +#define PM886_REG_GPADC_GPADC2 0x58 +#define PM886_REG_GPADC_VBAT 0xa0 +#define PM886_REG_GPADC_GND_DET1 0xa4 +#define PM886_REG_GPADC_GND_DET2 0xa6 +#define PM886_REG_GPADC_VBUS 0xa8 +#define PM886_REG_GPADC_GPADC3 0xaa +#define PM886_REG_GPADC_MIC_DET 0xac +#define PM886_REG_GPADC_VBAT_SLP 0xb0 + +/* VBAT_SLP is the last register and is 2 bytes wide like other channels. */ +#define PM886_GPADC_MAX_REGISTER (PM886_REG_GPADC_VBAT_SLP + 1) + +#define PM886_GPADC_BIAS_LEVELS 16 +#define PM886_GPADC_INDEX_TO_BIAS_uA(i) (1 + (i) * 5) + struct pm886_chip { struct i2c_client *client; unsigned int chip_id; diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 9009e27b5f44..1f0ac122c3bf 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -12,14 +12,6 @@ typedef void free_folio_t(struct folio *folio, unsigned long private); struct migration_target_control; -/* - * Return values from addresss_space_operations.migratepage(): - * - negative errno on page migration failure; - * - zero on page migration success; - */ -#define MIGRATEPAGE_SUCCESS 0 -#define MIGRATEPAGE_UNMAP 1 - /** * struct movable_operations - Driver page migration * @isolate_page: @@ -35,8 +27,7 @@ struct migration_target_control; * @src page. The driver should copy the contents of the * @src page to the @dst page and set up the fields of @dst page. * Both pages are locked. - * If page migration is successful, the driver should - * return MIGRATEPAGE_SUCCESS. + * If page migration is successful, the driver should return 0. * If the driver cannot migrate the page at the moment, it can return * -EAGAIN. The VM interprets this as a temporary migration failure and * will retry it later. Any other error value is a permanent migration diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 3e6deb00fc85..7d0aa718499c 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -70,7 +70,16 @@ #define UHID_MINOR 239 #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 +#define EISA_EEPROM_MINOR 241 #define RFKILL_MINOR 242 + +/* + * Misc char device minor code space division related to below macro: + * + * < 255 : Fixed minor code + * == 255 : Indicator to request dynamic minor code + * > 255 : Dynamic minor code requested, 1048320 minor codes totally. + */ #define MISC_DYNAMIC_MINOR 255 struct miscdevice { diff --git a/include/linux/mm.h b/include/linux/mm.h index 0905eb6b55ec..a3f97c551ad8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -34,6 +34,8 @@ #include <linux/slab.h> #include <linux/cacheinfo.h> #include <linux/rcuwait.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> struct mempolicy; struct anon_vma; @@ -69,6 +71,15 @@ static inline void totalram_pages_add(long count) extern void * high_memory; +/* + * Convert between pages and MB + * 20 is the shift for 1MB (2^20 = 1MB) + * PAGE_SHIFT is the shift for page size (e.g., 12 for 4KB pages) + * So (20 - PAGE_SHIFT) converts between pages and MB + */ +#define PAGES_TO_MB(pages) ((pages) >> (20 - PAGE_SHIFT)) +#define MB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) + #ifdef CONFIG_SYSCTL extern int sysctl_legacy_va_layout; #else @@ -198,11 +209,13 @@ extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) -#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) -#define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio)) +bool page_range_contiguous(const struct page *page, unsigned long nr_pages); #else -#define nth_page(page,n) ((page) + (n)) -#define folio_page_idx(folio, p) ((p) - &(folio)->page) +static inline bool page_range_contiguous(const struct page *page, + unsigned long nr_pages) +{ + return true; +} #endif /* to align the pointer to the (next) page boundary */ @@ -214,6 +227,20 @@ extern unsigned long sysctl_admin_reserve_kbytes; /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) +/** + * folio_page_idx - Return the number of a page in a folio. + * @folio: The folio. + * @page: The folio page. + * + * This function expects that the page is actually part of the folio. + * The returned number is relative to the start of the folio. + */ +static inline unsigned long folio_page_idx(const struct folio *folio, + const struct page *page) +{ + return page - &folio->page; +} + static inline struct folio *lru_to_folio(struct list_head *head) { return list_entry((head)->prev, struct folio, lru); @@ -648,13 +675,21 @@ struct vm_operations_struct { struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx); #endif +#ifdef CONFIG_FIND_NORMAL_PAGE /* - * Called by vm_normal_page() for special PTEs to find the - * page for @addr. This is useful if the default behavior - * (using pte_page()) would not find the correct page. + * Called by vm_normal_page() for special PTEs in @vma at @addr. This + * allows for returning a "normal" page from vm_normal_page() even + * though the PTE indicates that the "struct page" either does not exist + * or should not be touched: "special". + * + * Do not add new users: this really only works when a "normal" page + * was mapped, but then the PTE got changed to something weird (+ + * marked special) that would not make pte_pfn() identify the originally + * inserted page. */ - struct page *(*find_special_page)(struct vm_area_struct *vma, - unsigned long addr); + struct page *(*find_normal_page)(struct vm_area_struct *vma, + unsigned long addr); +#endif /* CONFIG_FIND_NORMAL_PAGE */ }; #ifdef CONFIG_NUMA_BALANCING @@ -684,7 +719,7 @@ static inline void release_fault_lock(struct vm_fault *vmf) mmap_read_unlock(vmf->vma->vm_mm); } -static inline void assert_fault_locked(struct vm_fault *vmf) +static inline void assert_fault_locked(const struct vm_fault *vmf) { if (vmf->flags & FAULT_FLAG_VMA_LOCK) vma_assert_locked(vmf->vma); @@ -697,12 +732,42 @@ static inline void release_fault_lock(struct vm_fault *vmf) mmap_read_unlock(vmf->vma->vm_mm); } -static inline void assert_fault_locked(struct vm_fault *vmf) +static inline void assert_fault_locked(const struct vm_fault *vmf) { mmap_assert_locked(vmf->vma->vm_mm); } #endif /* CONFIG_PER_VMA_LOCK */ +static inline bool mm_flags_test(int flag, const struct mm_struct *mm) +{ + return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); +} + +static inline bool mm_flags_test_and_set(int flag, struct mm_struct *mm) +{ + return test_and_set_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); +} + +static inline bool mm_flags_test_and_clear(int flag, struct mm_struct *mm) +{ + return test_and_clear_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); +} + +static inline void mm_flags_set(int flag, struct mm_struct *mm) +{ + set_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); +} + +static inline void mm_flags_clear(int flag, struct mm_struct *mm) +{ + clear_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); +} + +static inline void mm_flags_clear_all(struct mm_struct *mm) +{ + bitmap_zero(ACCESS_PRIVATE(&mm->flags, __mm_flags), NUM_MM_FLAG_BITS); +} + extern const struct vm_operations_struct vma_dummy_vm_ops; static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) @@ -810,7 +875,7 @@ static inline bool vma_is_initial_stack(const struct vm_area_struct *vma) vma->vm_end >= vma->vm_mm->start_stack; } -static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) +static inline bool vma_is_temporary_stack(const struct vm_area_struct *vma) { int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); @@ -824,7 +889,7 @@ static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) return false; } -static inline bool vma_is_foreign(struct vm_area_struct *vma) +static inline bool vma_is_foreign(const struct vm_area_struct *vma) { if (!current->mm) return true; @@ -835,7 +900,7 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) return false; } -static inline bool vma_is_accessible(struct vm_area_struct *vma) +static inline bool vma_is_accessible(const struct vm_area_struct *vma) { return vma->vm_flags & VM_ACCESS_FLAGS; } @@ -846,7 +911,7 @@ static inline bool is_shared_maywrite(vm_flags_t vm_flags) (VM_SHARED | VM_MAYWRITE); } -static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) +static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma) { return is_shared_maywrite(vma->vm_flags); } @@ -930,14 +995,14 @@ static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) * The vma_is_shmem is not inline because it is used only by slow * paths in userfault. */ -bool vma_is_shmem(struct vm_area_struct *vma); -bool vma_is_anon_shmem(struct vm_area_struct *vma); +bool vma_is_shmem(const struct vm_area_struct *vma); +bool vma_is_anon_shmem(const struct vm_area_struct *vma); #else -static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } -static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; } +static inline bool vma_is_shmem(const struct vm_area_struct *vma) { return false; } +static inline bool vma_is_anon_shmem(const struct vm_area_struct *vma) { return false; } #endif -int vma_is_stack_for_current(struct vm_area_struct *vma); +int vma_is_stack_for_current(const struct vm_area_struct *vma); /* flush_tlb_range() takes a vma, not a mm, and can care about flags */ #define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) } @@ -953,12 +1018,12 @@ static inline unsigned int folio_large_order(const struct folio *folio) } #ifdef NR_PAGES_IN_LARGE_FOLIO -static inline long folio_large_nr_pages(const struct folio *folio) +static inline unsigned long folio_large_nr_pages(const struct folio *folio) { return folio->_nr_pages; } #else -static inline long folio_large_nr_pages(const struct folio *folio) +static inline unsigned long folio_large_nr_pages(const struct folio *folio) { return 1L << folio_large_order(folio); } @@ -971,11 +1036,11 @@ static inline long folio_large_nr_pages(const struct folio *folio) * set before the order is initialised, or this may be a tail page. * See compaction.c for some good examples. */ -static inline unsigned int compound_order(struct page *page) +static inline unsigned int compound_order(const struct page *page) { - struct folio *folio = (struct folio *)page; + const struct folio *folio = (struct folio *)page; - if (!test_bit(PG_head, &folio->flags)) + if (!test_bit(PG_head, &folio->flags.f)) return 0; return folio_large_order(folio); } @@ -1191,7 +1256,7 @@ int folio_mc_copy(struct folio *dst, struct folio *src); unsigned long nr_free_buffer_pages(void); /* Returns the number of bytes in this potentially compound page. */ -static inline unsigned long page_size(struct page *page) +static inline unsigned long page_size(const struct page *page) { return PAGE_SIZE << compound_order(page); } @@ -1505,21 +1570,26 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags) */ static inline int page_zone_id(struct page *page) { - return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; + return (page->flags.f >> ZONEID_PGSHIFT) & ZONEID_MASK; } #ifdef NODE_NOT_IN_PAGE_FLAGS -int page_to_nid(const struct page *page); +int memdesc_nid(memdesc_flags_t mdf); #else -static inline int page_to_nid(const struct page *page) +static inline int memdesc_nid(memdesc_flags_t mdf) { - return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK; + return (mdf.f >> NODES_PGSHIFT) & NODES_MASK; } #endif +static inline int page_to_nid(const struct page *page) +{ + return memdesc_nid(PF_POISONED_CHECK(page)->flags); +} + static inline int folio_nid(const struct folio *folio) { - return page_to_nid(&folio->page); + return memdesc_nid(folio->flags); } #ifdef CONFIG_NUMA_BALANCING @@ -1588,14 +1658,14 @@ static inline void page_cpupid_reset_last(struct page *page) #else static inline int folio_last_cpupid(struct folio *folio) { - return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; + return (folio->flags.f >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; } int folio_xchg_last_cpupid(struct folio *folio, int cpupid); static inline void page_cpupid_reset_last(struct page *page) { - page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT; + page->flags.f |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT; } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ @@ -1691,7 +1761,7 @@ static inline u8 page_kasan_tag(const struct page *page) u8 tag = KASAN_TAG_KERNEL; if (kasan_enabled()) { - tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + tag = (page->flags.f >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; tag ^= 0xff; } @@ -1706,12 +1776,12 @@ static inline void page_kasan_tag_set(struct page *page, u8 tag) return; tag ^= 0xff; - old_flags = READ_ONCE(page->flags); + old_flags = READ_ONCE(page->flags.f); do { flags = old_flags; flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; - } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } while (unlikely(!try_cmpxchg(&page->flags.f, &old_flags, flags))); } static inline void page_kasan_tag_reset(struct page *page) @@ -1742,28 +1812,33 @@ static inline pg_data_t *page_pgdat(const struct page *page) return NODE_DATA(page_to_nid(page)); } -static inline struct zone *folio_zone(const struct folio *folio) +static inline pg_data_t *folio_pgdat(const struct folio *folio) { - return page_zone(&folio->page); + return NODE_DATA(folio_nid(folio)); } -static inline pg_data_t *folio_pgdat(const struct folio *folio) +static inline struct zone *folio_zone(const struct folio *folio) { - return page_pgdat(&folio->page); + return &folio_pgdat(folio)->node_zones[folio_zonenum(folio)]; } #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { - page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); - page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; + page->flags.f &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); + page->flags.f |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline unsigned long page_to_section(const struct page *page) +static inline unsigned long memdesc_section(memdesc_flags_t mdf) { - return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; + return (mdf.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } -#endif +#else /* !SECTION_IN_PAGE_FLAGS */ +static inline unsigned long memdesc_section(memdesc_flags_t mdf) +{ + return 0; +} +#endif /* SECTION_IN_PAGE_FLAGS */ /** * folio_pfn - Return the Page Frame Number of a folio. @@ -1785,7 +1860,7 @@ static inline struct folio *pfn_folio(unsigned long pfn) } #ifdef CONFIG_MMU -static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) +static inline pte_t mk_pte(const struct page *page, pgprot_t pgprot) { return pfn_pte(page_to_pfn(page), pgprot); } @@ -1800,7 +1875,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) * * Return: A page table entry suitable for mapping this folio. */ -static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot) +static inline pte_t folio_mk_pte(const struct folio *folio, pgprot_t pgprot) { return pfn_pte(folio_pfn(folio), pgprot); } @@ -1816,7 +1891,7 @@ static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot) * * Return: A page table entry suitable for mapping this folio. */ -static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot) +static inline pmd_t folio_mk_pmd(const struct folio *folio, pgprot_t pgprot) { return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot)); } @@ -1832,7 +1907,7 @@ static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot) * * Return: A page table entry suitable for mapping this folio. */ -static inline pud_t folio_mk_pud(struct folio *folio, pgprot_t pgprot) +static inline pud_t folio_mk_pud(const struct folio *folio, pgprot_t pgprot) { return pud_mkhuge(pfn_pud(folio_pfn(folio), pgprot)); } @@ -1900,7 +1975,7 @@ static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma, { VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & 1)); - if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) + if (!mm_flags_test(MMF_HAS_PINNED, vma->vm_mm)) return false; return folio_maybe_dma_pinned(folio); @@ -1966,14 +2041,14 @@ static inline bool folio_is_longterm_pinnable(struct folio *folio) static inline void set_page_zone(struct page *page, enum zone_type zone) { - page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); - page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; + page->flags.f &= ~(ZONES_MASK << ZONES_PGSHIFT); + page->flags.f |= (zone & ZONES_MASK) << ZONES_PGSHIFT; } static inline void set_page_node(struct page *page, unsigned long node) { - page->flags &= ~(NODES_MASK << NODES_PGSHIFT); - page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; + page->flags.f &= ~(NODES_MASK << NODES_PGSHIFT); + page->flags.f |= (node & NODES_MASK) << NODES_PGSHIFT; } static inline void set_page_links(struct page *page, enum zone_type zone, @@ -1992,30 +2067,46 @@ static inline void set_page_links(struct page *page, enum zone_type zone, * * Return: A positive power of two. */ -static inline long folio_nr_pages(const struct folio *folio) +static inline unsigned long folio_nr_pages(const struct folio *folio) { if (!folio_test_large(folio)) return 1; return folio_large_nr_pages(folio); } -/* Only hugetlbfs can allocate folios larger than MAX_ORDER */ -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -#define MAX_FOLIO_NR_PAGES (1UL << PUD_ORDER) +#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) +/* + * We don't expect any folios that exceed buddy sizes (and consequently + * memory sections). + */ +#define MAX_FOLIO_ORDER MAX_PAGE_ORDER +#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +/* + * Only pages within a single memory section are guaranteed to be + * contiguous. By limiting folios to a single memory section, all folio + * pages are guaranteed to be contiguous. + */ +#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT #else -#define MAX_FOLIO_NR_PAGES MAX_ORDER_NR_PAGES +/* + * There is no real limit on the folio size. We limit them to the maximum we + * currently expect (e.g., hugetlb, dax). + */ +#define MAX_FOLIO_ORDER PUD_ORDER #endif +#define MAX_FOLIO_NR_PAGES (1UL << MAX_FOLIO_ORDER) + /* * compound_nr() returns the number of pages in this potentially compound * page. compound_nr() can be called on a tail page, and is defined to * return 1 in that case. */ -static inline long compound_nr(struct page *page) +static inline unsigned long compound_nr(const struct page *page) { - struct folio *folio = (struct folio *)page; + const struct folio *folio = (struct folio *)page; - if (!test_bit(PG_head, &folio->flags)) + if (!test_bit(PG_head, &folio->flags.f)) return 1; return folio_large_nr_pages(folio); } @@ -2351,6 +2442,8 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); +struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t pud); void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); @@ -2529,7 +2622,7 @@ void folio_add_pin(struct folio *folio); int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, - struct task_struct *task, bool bypass_rlim); + const struct task_struct *task, bool bypass_rlim); struct kvec; struct page *get_dump_page(unsigned long addr, int *locked); @@ -2654,7 +2747,7 @@ static inline void update_hiwater_rss(struct mm_struct *mm) unsigned long _rss = get_mm_rss(mm); if (data_race(mm->hiwater_rss) < _rss) - (mm)->hiwater_rss = _rss; + data_race(mm->hiwater_rss = _rss); } static inline void update_hiwater_vm(struct mm_struct *mm) @@ -2846,16 +2939,22 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU */ +enum pt_flags { + PT_reserved = PG_reserved, + /* High bits are used for zone/node/section */ +}; + static inline struct ptdesc *virt_to_ptdesc(const void *x) { return page_ptdesc(virt_to_page(x)); } -static inline void *ptdesc_to_virt(const struct ptdesc *pt) -{ - return page_to_virt(ptdesc_page(pt)); -} - +/** + * ptdesc_address - Virtual address of page table. + * @pt: Page table descriptor. + * + * Return: The first byte of the page table described by @pt. + */ static inline void *ptdesc_address(const struct ptdesc *pt) { return folio_address(ptdesc_folio(pt)); @@ -2863,7 +2962,7 @@ static inline void *ptdesc_address(const struct ptdesc *pt) static inline bool pagetable_is_reserved(struct ptdesc *pt) { - return folio_test_reserved(ptdesc_folio(pt)); + return test_bit(PT_reserved, &pt->pt_flags.f); } /** @@ -2973,21 +3072,26 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ +static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc) +{ + return compound_nr(ptdesc_page(ptdesc)); +} + static inline void __pagetable_ctor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); + pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); - __folio_set_pgtable(folio); - lruvec_stat_add_folio(folio, NR_PAGETABLE); + __SetPageTable(ptdesc_page(ptdesc)); + mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc)); } static inline void pagetable_dtor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); + pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); ptlock_free(ptdesc); - __folio_clear_pgtable(folio); - lruvec_stat_sub_folio(folio, NR_PAGETABLE); + __ClearPageTable(ptdesc_page(ptdesc)); + mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc)); } static inline void pagetable_dtor_free(struct ptdesc *ptdesc) @@ -3292,7 +3396,7 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); avc; avc = anon_vma_interval_tree_iter_next(avc, start, last)) /* mmap.c */ -extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); +extern int __vm_enough_memory(const struct mm_struct *mm, long pages, int cap_sys_admin); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void exit_mmap(struct mm_struct *); bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma, @@ -3432,7 +3536,7 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) return mtree_load(&mm->mm_mt, addr); } -static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) +static inline unsigned long stack_guard_start_gap(const struct vm_area_struct *vma) { if (vma->vm_flags & VM_GROWSDOWN) return stack_guard_gap; @@ -3444,7 +3548,7 @@ static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) return 0; } -static inline unsigned long vm_start_gap(struct vm_area_struct *vma) +static inline unsigned long vm_start_gap(const struct vm_area_struct *vma) { unsigned long gap = stack_guard_start_gap(vma); unsigned long vm_start = vma->vm_start; @@ -3455,7 +3559,7 @@ static inline unsigned long vm_start_gap(struct vm_area_struct *vma) return vm_start; } -static inline unsigned long vm_end_gap(struct vm_area_struct *vma) +static inline unsigned long vm_end_gap(const struct vm_area_struct *vma) { unsigned long vm_end = vma->vm_end; @@ -3467,7 +3571,7 @@ static inline unsigned long vm_end_gap(struct vm_area_struct *vma) return vm_end; } -static inline unsigned long vma_pages(struct vm_area_struct *vma) +static inline unsigned long vma_pages(const struct vm_area_struct *vma) { return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } @@ -3484,7 +3588,7 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, return vma; } -static inline bool range_in_vma(struct vm_area_struct *vma, +static inline bool range_in_vma(const struct vm_area_struct *vma, unsigned long start, unsigned long end) { return (vma && vma->vm_start <= start && end <= vma->vm_end); @@ -3600,7 +3704,7 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) * Indicates whether GUP can follow a PROT_NONE mapped page, or whether * a (NUMA hinting) fault is required. */ -static inline bool gup_can_follow_protnone(struct vm_area_struct *vma, +static inline bool gup_can_follow_protnone(const struct vm_area_struct *vma, unsigned int flags) { /* @@ -3730,7 +3834,7 @@ static inline bool debug_guardpage_enabled(void) return static_branch_unlikely(&_debug_guardpage_enabled); } -static inline bool page_is_guard(struct page *page) +static inline bool page_is_guard(const struct page *page) { if (!debug_guardpage_enabled()) return false; @@ -3761,7 +3865,7 @@ static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} static inline unsigned int debug_guardpage_minorder(void) { return 0; } static inline bool debug_guardpage_enabled(void) { return false; } -static inline bool page_is_guard(struct page *page) { return false; } +static inline bool page_is_guard(const struct page *page) { return false; } static inline bool set_page_guard(struct zone *zone, struct page *page, unsigned int order) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, @@ -3784,7 +3888,7 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr) } #endif /* __HAVE_ARCH_GATE_AREA */ -extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); +bool process_shares_mm(const struct task_struct *p, const struct mm_struct *mm); void drop_slab(void); @@ -3843,7 +3947,7 @@ void vmemmap_free(unsigned long start, unsigned long end, #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap) { /* number of pfns from base where pfn_to_page() is valid */ if (altmap) @@ -3857,7 +3961,7 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap, altmap->alloc -= nr_pfns; } #else -static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap) { return 0; } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 89b518ff097e..f6a2b2d20016 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -25,7 +25,7 @@ * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise * ram or swap backed folio. */ -static inline int folio_is_file_lru(struct folio *folio) +static inline int folio_is_file_lru(const struct folio *folio) { return !folio_test_swapbacked(folio); } @@ -84,7 +84,7 @@ static __always_inline void __folio_clear_lru_flags(struct folio *folio) * Return: The LRU list a folio should be on, as an index * into the array of LRU lists. */ -static __always_inline enum lru_list folio_lru_list(struct folio *folio) +static __always_inline enum lru_list folio_lru_list(const struct folio *folio) { enum lru_list lru; @@ -141,9 +141,9 @@ static inline int lru_tier_from_refs(int refs, bool workingset) return workingset ? MAX_NR_TIERS - 1 : order_base_2(refs); } -static inline int folio_lru_refs(struct folio *folio) +static inline int folio_lru_refs(const struct folio *folio) { - unsigned long flags = READ_ONCE(folio->flags); + unsigned long flags = READ_ONCE(folio->flags.f); if (!(flags & BIT(PG_referenced))) return 0; @@ -154,14 +154,14 @@ static inline int folio_lru_refs(struct folio *folio) return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1; } -static inline int folio_lru_gen(struct folio *folio) +static inline int folio_lru_gen(const struct folio *folio) { - unsigned long flags = READ_ONCE(folio->flags); + unsigned long flags = READ_ONCE(folio->flags.f); return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } -static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +static inline bool lru_gen_is_active(const struct lruvec *lruvec, int gen) { unsigned long max_seq = lruvec->lrugen.max_seq; @@ -217,12 +217,13 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); } -static inline unsigned long lru_gen_folio_seq(struct lruvec *lruvec, struct folio *folio, +static inline unsigned long lru_gen_folio_seq(const struct lruvec *lruvec, + const struct folio *folio, bool reclaiming) { int gen; int type = folio_is_file_lru(folio); - struct lru_gen_folio *lrugen = &lruvec->lrugen; + const struct lru_gen_folio *lrugen = &lruvec->lrugen; /* * +-----------------------------------+-----------------------------------+ @@ -268,7 +269,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; /* see the comment on MIN_NR_GENS about PG_active */ - set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); + set_mask_bits(&folio->flags.f, LRU_GEN_MASK | BIT(PG_active), flags); lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ @@ -293,7 +294,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, /* for folio_migrate_flags() */ flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0; - flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags); + flags = set_mask_bits(&folio->flags.f, LRU_GEN_MASK, flags); gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; lru_gen_update_size(lruvec, folio, gen, -1); @@ -302,11 +303,11 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return true; } -static inline void folio_migrate_refs(struct folio *new, struct folio *old) +static inline void folio_migrate_refs(struct folio *new, const struct folio *old) { - unsigned long refs = READ_ONCE(old->flags) & LRU_REFS_MASK; + unsigned long refs = READ_ONCE(old->flags.f) & LRU_REFS_MASK; - set_mask_bits(&new->flags, LRU_REFS_MASK, refs); + set_mask_bits(&new->flags.f, LRU_REFS_MASK, refs); } #else /* !CONFIG_LRU_GEN */ @@ -330,7 +331,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return false; } -static inline void folio_migrate_refs(struct folio *new, struct folio *old) +static inline void folio_migrate_refs(struct folio *new, const struct folio *old) { } @@ -508,7 +509,7 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm) atomic_dec(&mm->tlb_flush_pending); } -static inline bool mm_tlb_flush_pending(struct mm_struct *mm) +static inline bool mm_tlb_flush_pending(const struct mm_struct *mm) { /* * Must be called after having acquired the PTL; orders against that @@ -521,7 +522,7 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending); } -static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +static inline bool mm_tlb_flush_nested(const struct mm_struct *mm) { /* * Similar to mm_tlb_flush_pending(), we must have acquired the PTL @@ -605,7 +606,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, return false; } -static inline bool vma_has_recency(struct vm_area_struct *vma) +static inline bool vma_has_recency(const struct vm_area_struct *vma) { if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) return false; @@ -616,4 +617,40 @@ static inline bool vma_has_recency(struct vm_area_struct *vma) return true; } +/** + * num_pages_contiguous() - determine the number of contiguous pages + * that represent contiguous PFNs + * @pages: an array of page pointers + * @nr_pages: length of the array, at least 1 + * + * Determine the number of contiguous pages that represent contiguous PFNs + * in @pages, starting from the first page. + * + * In some kernel configs contiguous PFNs will not have contiguous struct + * pages. In these configurations num_pages_contiguous() will return a num + * smaller than ideal number. The caller should continue to check for pfn + * contiguity after each call to num_pages_contiguous(). + * + * Returns the number of contiguous pages. + */ +static inline size_t num_pages_contiguous(struct page **pages, size_t nr_pages) +{ + struct page *cur_page = pages[0]; + unsigned long section = memdesc_section(cur_page->flags); + size_t i; + + for (i = 1; i < nr_pages; i++) { + if (++cur_page != pages[i]) + break; + /* + * In unproblematic kernel configs, page_to_section() == 0 and + * the whole check will get optimized out. + */ + if (memdesc_section(cur_page->flags) != section) + break; + } + + return i; +} + #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f625c35128b..90e5790c318f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -20,6 +20,7 @@ #include <linux/seqlock.h> #include <linux/percpu_counter.h> #include <linux/types.h> +#include <linux/bitmap.h> #include <asm/mmu.h> @@ -33,6 +34,10 @@ struct address_space; struct futex_private_hash; struct mem_cgroup; +typedef struct { + unsigned long f; +} memdesc_flags_t; + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -71,7 +76,7 @@ struct mem_cgroup; #endif struct page { - unsigned long flags; /* Atomic flags, some possibly + memdesc_flags_t flags; /* Atomic flags, some possibly * updated asynchronously */ /* * Five words (20/40 bytes) are available in this union. @@ -89,21 +94,10 @@ struct page { union { struct list_head lru; - /* Or, for the Unevictable "LRU list" slot */ - struct { - /* Always even, to negate PageTail */ - void *__filler; - /* Count page's or folio's mlocks */ - unsigned int mlock_count; - }; - /* Or, free page */ struct list_head buddy_list; struct list_head pcp_list; - struct { - struct llist_node pcp_llist; - unsigned int order; - }; + struct llist_node pcp_llist; }; struct address_space *mapping; union { @@ -114,7 +108,8 @@ struct page { * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. * Used for swp_entry_t if swapcache flag set. - * Indicates order in the buddy system if PageBuddy. + * Indicates order in the buddy system if PageBuddy + * or on pcp_llist. */ unsigned long private; }; @@ -382,11 +377,13 @@ struct folio { union { struct { /* public: */ - unsigned long flags; + memdesc_flags_t flags; union { struct list_head lru; /* private: avoid cluttering the output */ + /* For the Unevictable "LRU list" slot */ struct { + /* Avoid compound_head */ void *__filler; /* public: */ unsigned int mlock_count; @@ -525,7 +522,7 @@ FOLIO_MATCH(compound_head, _head_3); /** * struct ptdesc - Memory descriptor for page tables. - * @__page_flags: Same as page flags. Powerpc only. + * @pt_flags: enum pt_flags plus zone/node/section. * @pt_rcu_head: For freeing page table pages. * @pt_list: List of used page tables. Used for s390 gmap shadow pages * (which are not linked into the user page tables) and x86 @@ -547,7 +544,7 @@ FOLIO_MATCH(compound_head, _head_3); * understanding of the issues. */ struct ptdesc { - unsigned long __page_flags; + memdesc_flags_t pt_flags; union { struct rcu_head pt_rcu_head; @@ -585,7 +582,7 @@ struct ptdesc { #define TABLE_MATCH(pg, pt) \ static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) -TABLE_MATCH(flags, __page_flags); +TABLE_MATCH(flags, pt_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); @@ -627,7 +624,7 @@ static inline void ptdesc_pmd_pts_dec(struct ptdesc *ptdesc) atomic_dec(&ptdesc->pt_share_count); } -static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc) +static inline int ptdesc_pmd_pts_count(const struct ptdesc *ptdesc) { return atomic_read(&ptdesc->pt_share_count); } @@ -660,7 +657,7 @@ static inline void set_page_private(struct page *page, unsigned long private) page->private = private; } -static inline void *folio_get_private(struct folio *folio) +static inline void *folio_get_private(const struct folio *folio) { return folio->private; } @@ -785,13 +782,14 @@ struct pfnmap_track_ctx { */ struct vm_area_desc { /* Immutable state. */ - struct mm_struct *mm; + const struct mm_struct *const mm; + struct file *const file; /* May vary from vm_file in stacked callers. */ unsigned long start; unsigned long end; /* Mutable fields. Populated with initial state. */ pgoff_t pgoff; - struct file *file; + struct file *vm_file; vm_flags_t vm_flags; pgprot_t page_prot; @@ -932,6 +930,15 @@ struct mm_cid { }; #endif +/* + * Opaque type representing current mm_struct flag state. Must be accessed via + * mm_flags_xxx() helper functions. + */ +#define NUM_MM_FLAG_BITS (64) +typedef struct { + DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS); +} __private mm_flags_t; + struct kioctx_table; struct iommu_mm_data; struct mm_struct { @@ -1031,10 +1038,10 @@ struct mm_struct { * counters */ /* - * With some kernel config, the current mmap_lock's offset - * inside 'mm_struct' is at 0x120, which is very optimal, as + * Typically the current mmap_lock's offset is 56 bytes from + * the last cacheline boundary, which is very optimal, as * its two hot fields 'count' and 'owner' sit in 2 different - * cachelines, and when mmap_lock is highly contended, both + * cachelines, and when mmap_lock is highly contended, both * of the 2 fields will be accessed frequently, current layout * will help to reduce cache bouncing. * @@ -1119,7 +1126,7 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; - unsigned long flags; /* Must use atomic bitops to access */ + mm_flags_t flags; /* Must use mm_flags_* hlpers to access */ #ifdef CONFIG_AIO spinlock_t ioctx_lock; @@ -1229,6 +1236,40 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; +/* Set the first system word of mm flags, non-atomically. */ +static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value) +{ + unsigned long *bitmap = ACCESS_PRIVATE(&mm->flags, __mm_flags); + + bitmap_copy(bitmap, &value, BITS_PER_LONG); +} + +/* Obtain a read-only view of the bitmap. */ +static inline const unsigned long *__mm_flags_get_bitmap(const struct mm_struct *mm) +{ + return (const unsigned long *)ACCESS_PRIVATE(&mm->flags, __mm_flags); +} + +/* Read the first system word of mm flags, non-atomically. */ +static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm) +{ + const unsigned long *bitmap = __mm_flags_get_bitmap(mm); + + return bitmap_read(bitmap, 0, BITS_PER_LONG); +} + +/* + * Update the first system word of mm flags ONLY, applying the specified mask to + * it, then setting all flags specified by bits. + */ +static inline void __mm_flags_set_mask_bits_word(struct mm_struct *mm, + unsigned long mask, unsigned long bits) +{ + unsigned long *bitmap = ACCESS_PRIVATE(&mm->flags, __mm_flags); + + set_mask_bits(bitmap, mask, bits); +} + #define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; @@ -1729,7 +1770,7 @@ enum { * the modes are SUID_DUMP_* defined in linux/sched/coredump.h */ #define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) +#define MMF_DUMPABLE_MASK (BIT(MMF_DUMPABLE_BITS) - 1) /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 @@ -1744,13 +1785,13 @@ enum { #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS #define MMF_DUMP_FILTER_BITS 9 #define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) + ((BIT(MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) + (BIT(MMF_DUMP_ANON_PRIVATE) | BIT(MMF_DUMP_ANON_SHARED) | \ + BIT(MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS -# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) +# define MMF_DUMP_MASK_DEFAULT_ELF BIT(MMF_DUMP_ELF_HEADERS) #else # define MMF_DUMP_MASK_DEFAULT_ELF 0 #endif @@ -1758,19 +1799,16 @@ enum { #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when mm is available for khugepaged */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +#define MMF_HUGE_ZERO_FOLIO 18 /* mm has ever used the global huge zero folio */ #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ -#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_DISABLE_THP_EXCEPT_ADVISED 23 /* no THP except when advised (e.g., VM_HUGEPAGE) */ +#define MMF_DISABLE_THP_COMPLETELY 24 /* no THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (BIT(MMF_DISABLE_THP_COMPLETELY) | \ + BIT(MMF_DISABLE_THP_EXCEPT_ADVISED)) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* @@ -1783,27 +1821,33 @@ enum { #define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ #define MMF_HAS_MDWE 28 -#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) - +#define MMF_HAS_MDWE_MASK BIT(MMF_HAS_MDWE) #define MMF_HAS_MDWE_NO_INHERIT 29 #define MMF_VM_MERGE_ANY 30 -#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) +#define MMF_VM_MERGE_ANY_MASK BIT(MMF_VM_MERGE_ANY) #define MMF_TOPDOWN 31 /* mm searches top down by default */ -#define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN) +#define MMF_TOPDOWN_MASK BIT(MMF_TOPDOWN) -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ +#define MMF_INIT_LEGACY_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) -static inline unsigned long mmf_init_flags(unsigned long flags) +/* Legacy flags must fit within 32 bits. */ +static_assert((u64)MMF_INIT_LEGACY_MASK <= (u64)UINT_MAX); + +/* + * Initialise legacy flags according to masks, propagating selected flags on + * fork. Further flag manipulation can be performed by the caller. + */ +static inline unsigned long mmf_init_legacy_flags(unsigned long flags) { if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) flags &= ~((1UL << MMF_HAS_MDWE) | (1UL << MMF_HAS_MDWE_NO_INHERIT)); - return flags & MMF_INIT_MASK; + return flags & MMF_INIT_LEGACY_MASK; } #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mman.h b/include/linux/mman.h index de9e8e6229a4..0ba8a7e8b90a 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -201,7 +201,7 @@ static inline bool arch_memory_deny_write_exec_supported(void) static inline bool map_deny_write_exec(unsigned long old, unsigned long new) { /* If MDWE is disabled, we have nothing to deny. */ - if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + if (!mm_flags_test(MMF_HAS_MDWE, current->mm)) return false; /* If the new VMA is not executable, we have nothing to deny. */ diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 11a078de9150..2c9fffa58714 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -148,91 +148,6 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) } /* - * Try to read-lock a vma. The function is allowed to occasionally yield false - * locked result to avoid performance overhead, in which case we fall back to - * using mmap_lock. The function should never yield false unlocked result. - * False locked result is possible if mm_lock_seq overflows or if vma gets - * reused and attached to a different mm before we lock it. - * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got - * detached. - * - * WARNING! The vma passed to this function cannot be used if the function - * fails to lock it because in certain cases RCU lock is dropped and then - * reacquired. Once RCU lock is dropped the vma can be concurently freed. - */ -static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, - struct vm_area_struct *vma) -{ - int oldcnt; - - /* - * Check before locking. A race might cause false locked result. - * We can use READ_ONCE() for the mm_lock_seq here, and don't need - * ACQUIRE semantics, because this is just a lockless check whose result - * we don't rely on for anything - the mm_lock_seq read against which we - * need ordering is below. - */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) - return NULL; - - /* - * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() - * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. - * Acquire fence is required here to avoid reordering against later - * vm_lock_seq check and checks inside lock_vma_under_rcu(). - */ - if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, - VMA_REF_LIMIT))) { - /* return EAGAIN if vma got detached from under us */ - return oldcnt ? NULL : ERR_PTR(-EAGAIN); - } - - rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); - - /* - * If vma got attached to another mm from under us, that mm is not - * stable and can be freed in the narrow window after vma->vm_refcnt - * is dropped and before rcuwait_wake_up(mm) is called. Grab it before - * releasing vma->vm_refcnt. - */ - if (unlikely(vma->vm_mm != mm)) { - /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ - struct mm_struct *other_mm = vma->vm_mm; - - /* - * __mmdrop() is a heavy operation and we don't need RCU - * protection here. Release RCU lock during these operations. - * We reinstate the RCU read lock as the caller expects it to - * be held when this function returns even on error. - */ - rcu_read_unlock(); - mmgrab(other_mm); - vma_refcount_put(vma); - mmdrop(other_mm); - rcu_read_lock(); - return NULL; - } - - /* - * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. - * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq - * modification invalidates all existing locks. - * - * We must use ACQUIRE semantics for the mm_lock_seq so that if we are - * racing with vma_end_write_all(), we only start reading from the VMA - * after it has been unlocked. - * This pairs with RELEASE semantics in vma_end_write_all(). - */ - if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { - vma_refcount_put(vma); - return NULL; - } - - return vma; -} - -/* * Use only while holding mmap read lock which guarantees that locking will not * fail (nobody can concurrently write-lock the vma). vma_start_read() should * not be used in such cases because it might fail due to mm_lock_seq overflow. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0c5da9141983..7fb7331c5725 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -234,7 +234,21 @@ enum node_stat_item { #endif #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ - PGPROMOTE_CANDIDATE, /* candidate pages to promote */ + /** + * Candidate pages for promotion based on hint fault latency. This + * counter is used to control the promotion rate and adjust the hot + * threshold. + */ + PGPROMOTE_CANDIDATE, + /** + * Not rate-limited (NRL) candidate pages for those can be promoted + * without considering hot threshold because of enough free pages in + * fast-tier node. These promotions bypass the regular hotness checks + * and do NOT influence the promotion rate-limiter or + * threshold-adjustment logic. + * This is for statistics/monitoring purposes. + */ + PGPROMOTE_CANDIDATE_NRL, #endif /* PGDEMOTE_*: pages demoted */ PGDEMOTE_KSWAPD, @@ -245,6 +259,7 @@ enum node_stat_item { NR_HUGETLB, #endif NR_BALLOON_PAGES, + NR_KERNEL_FILE_PAGES, NR_VM_NODE_STAT_ITEMS }; @@ -1089,7 +1104,7 @@ static inline unsigned long promo_wmark_pages(const struct zone *z) return wmark_pages(z, WMARK_PROMO); } -static inline unsigned long zone_managed_pages(struct zone *zone) +static inline unsigned long zone_managed_pages(const struct zone *zone) { return (unsigned long)atomic_long_read(&zone->managed_pages); } @@ -1113,12 +1128,12 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone); } -static inline bool zone_is_initialized(struct zone *zone) +static inline bool zone_is_initialized(const struct zone *zone) { return zone->initialized; } -static inline bool zone_is_empty(struct zone *zone) +static inline bool zone_is_empty(const struct zone *zone) { return zone->spanned_pages == 0; } @@ -1169,26 +1184,31 @@ static inline bool zone_is_empty(struct zone *zone) #define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) +static inline enum zone_type memdesc_zonenum(memdesc_flags_t flags) +{ + ASSERT_EXCLUSIVE_BITS(flags.f, ZONES_MASK << ZONES_PGSHIFT); + return (flags.f >> ZONES_PGSHIFT) & ZONES_MASK; +} + static inline enum zone_type page_zonenum(const struct page *page) { - ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); - return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; + return memdesc_zonenum(page->flags); } static inline enum zone_type folio_zonenum(const struct folio *folio) { - return page_zonenum(&folio->page); + return memdesc_zonenum(folio->flags); } #ifdef CONFIG_ZONE_DEVICE -static inline bool is_zone_device_page(const struct page *page) +static inline bool memdesc_is_zone_device(memdesc_flags_t mdf) { - return page_zonenum(page) == ZONE_DEVICE; + return memdesc_zonenum(mdf) == ZONE_DEVICE; } static inline struct dev_pagemap *page_pgmap(const struct page *page) { - VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page); + VM_WARN_ON_ONCE_PAGE(!memdesc_is_zone_device(page->flags), page); return page_folio(page)->pgmap; } @@ -1203,9 +1223,9 @@ static inline struct dev_pagemap *page_pgmap(const struct page *page) static inline bool zone_device_pages_have_same_pgmap(const struct page *a, const struct page *b) { - if (is_zone_device_page(a) != is_zone_device_page(b)) + if (memdesc_is_zone_device(a->flags) != memdesc_is_zone_device(b->flags)) return false; - if (!is_zone_device_page(a)) + if (!memdesc_is_zone_device(a->flags)) return true; return page_pgmap(a) == page_pgmap(b); } @@ -1213,7 +1233,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, extern void memmap_init_zone_device(struct zone *, unsigned long, unsigned long, struct dev_pagemap *); #else -static inline bool is_zone_device_page(const struct page *page) +static inline bool memdesc_is_zone_device(memdesc_flags_t mdf) { return false; } @@ -1228,9 +1248,14 @@ static inline struct dev_pagemap *page_pgmap(const struct page *page) } #endif +static inline bool is_zone_device_page(const struct page *page) +{ + return memdesc_is_zone_device(page->flags); +} + static inline bool folio_is_zone_device(const struct folio *folio) { - return is_zone_device_page(&folio->page); + return memdesc_is_zone_device(folio->flags); } static inline bool is_zone_movable_page(const struct page *page) @@ -1248,7 +1273,7 @@ static inline bool folio_is_zone_movable(const struct folio *folio) * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty * intersection with the given zone */ -static inline bool zone_intersects(struct zone *zone, +static inline bool zone_intersects(const struct zone *zone, unsigned long start_pfn, unsigned long nr_pages) { if (zone_is_empty(zone)) @@ -1415,7 +1440,7 @@ typedef struct pglist_data { int kswapd_order; enum zone_type kswapd_highest_zoneidx; - int kswapd_failures; /* Number of 'reclaimed == 0' runs */ + atomic_t kswapd_failures; /* Number of 'reclaimed == 0' runs */ #ifdef CONFIG_COMPACTION int kcompactd_max_order; @@ -1556,12 +1581,12 @@ static inline int local_memory_node(int node_id) { return node_id; }; #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) #ifdef CONFIG_ZONE_DEVICE -static inline bool zone_is_zone_device(struct zone *zone) +static inline bool zone_is_zone_device(const struct zone *zone) { return zone_idx(zone) == ZONE_DEVICE; } #else -static inline bool zone_is_zone_device(struct zone *zone) +static inline bool zone_is_zone_device(const struct zone *zone) { return false; } @@ -1573,19 +1598,19 @@ static inline bool zone_is_zone_device(struct zone *zone) * populated_zone(). If the whole zone is reserved then we can easily * end up with populated_zone() && !managed_zone(). */ -static inline bool managed_zone(struct zone *zone) +static inline bool managed_zone(const struct zone *zone) { return zone_managed_pages(zone); } /* Returns true if a zone has memory */ -static inline bool populated_zone(struct zone *zone) +static inline bool populated_zone(const struct zone *zone) { return zone->present_pages; } #ifdef CONFIG_NUMA -static inline int zone_to_nid(struct zone *zone) +static inline int zone_to_nid(const struct zone *zone) { return zone->node; } @@ -1595,7 +1620,7 @@ static inline void zone_set_nid(struct zone *zone, int nid) zone->node = nid; } #else -static inline int zone_to_nid(struct zone *zone) +static inline int zone_to_nid(const struct zone *zone) { return 0; } @@ -1622,7 +1647,7 @@ static inline int is_highmem_idx(enum zone_type idx) * @zone: pointer to struct zone variable * Return: 1 for a highmem zone, 0 otherwise */ -static inline int is_highmem(struct zone *zone) +static inline int is_highmem(const struct zone *zone) { return is_highmem_idx(zone_idx(zone)); } @@ -1688,12 +1713,12 @@ static inline struct zone *zonelist_zone(struct zoneref *zoneref) return zoneref->zone; } -static inline int zonelist_zone_idx(struct zoneref *zoneref) +static inline int zonelist_zone_idx(const struct zoneref *zoneref) { return zoneref->zone_idx; } -static inline int zonelist_node_idx(struct zoneref *zoneref) +static inline int zonelist_node_idx(const struct zoneref *zoneref) { return zone_to_nid(zoneref->zone); } @@ -1996,7 +2021,7 @@ static inline struct page *__section_mem_map_addr(struct mem_section *section) return (struct page *)map; } -static inline int present_section(struct mem_section *section) +static inline int present_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); } @@ -2006,12 +2031,12 @@ static inline int present_section_nr(unsigned long nr) return present_section(__nr_to_section(nr)); } -static inline int valid_section(struct mem_section *section) +static inline int valid_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); } -static inline int early_section(struct mem_section *section) +static inline int early_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_EARLY)); } @@ -2021,27 +2046,27 @@ static inline int valid_section_nr(unsigned long nr) return valid_section(__nr_to_section(nr)); } -static inline int online_section(struct mem_section *section) +static inline int online_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_ONLINE)); } #ifdef CONFIG_ZONE_DEVICE -static inline int online_device_section(struct mem_section *section) +static inline int online_device_section(const struct mem_section *section) { unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE; return section && ((section->section_mem_map & flags) == flags); } #else -static inline int online_device_section(struct mem_section *section) +static inline int online_device_section(const struct mem_section *section) { return 0; } #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT -static inline int preinited_vmemmap_section(struct mem_section *section) +static inline int preinited_vmemmap_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_VMEMMAP_PREINIT)); @@ -2051,7 +2076,7 @@ void sparse_vmemmap_init_nid_early(int nid); void sparse_vmemmap_init_nid_late(int nid); #else -static inline int preinited_vmemmap_section(struct mem_section *section) +static inline int preinited_vmemmap_section(const struct mem_section *section) { return 0; } diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 3a25122d83e2..6907aedc4f74 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -349,6 +349,19 @@ static inline void kernel_param_unlock(struct module *mod) __module_param_call("", name, ¶m_ops_##type, &var, perm, \ -1, KERNEL_PARAM_FL_UNSAFE) +/** + * __core_param_cb - similar like core_param, with a set/get ops instead of type. + * @name: the name of the cmdline and sysfs parameter (often the same as var) + * @var: the variable + * @ops: the set & get operations for this parameter. + * @perm: visibility in sysfs + * + * Ideally this should be called 'core_param_cb', but the name has been + * used for module core parameter, so add the '__' prefix + */ +#define __core_param_cb(name, ops, arg, perm) \ + __module_param_call("", name, ops, arg, perm, -1, 0) + #endif /* !MODULE */ /** diff --git a/include/linux/mount.h b/include/linux/mount.h index 5f9c053b0897..acfe7ef86a1b 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -33,7 +33,6 @@ enum mount_flags { MNT_NOSYMFOLLOW = 0x80, MNT_SHRINKABLE = 0x100, - MNT_WRITE_HOLD = 0x200, MNT_INTERNAL = 0x4000, @@ -52,7 +51,7 @@ enum mount_flags { | MNT_READONLY | MNT_NOSYMFOLLOW, MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, - MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | + MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_LOCKED }; @@ -77,7 +76,7 @@ extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); extern void mnt_make_shortterm(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(const struct path *path); -extern bool __mnt_is_readonly(struct vfsmount *mnt); +extern bool __mnt_is_readonly(const struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); extern struct vfsmount *clone_private_mount(const struct path *path); @@ -104,8 +103,8 @@ extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); int do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct path *collect_paths(const struct path *, struct path *, unsigned); -extern void drop_collected_paths(struct path *, struct path *); +extern const struct path *collect_paths(const struct path *, struct path *, unsigned); +extern void drop_collected_paths(const struct path *, const struct path *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern int cifs_root_data(char **dev, char **opts); diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 288ef765a44e..75b0b2abc880 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/unaligned.h> +#include <asm/barrier.h> struct device_node; struct module; diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index 4e694b1aabbd..e8201d1b7cf9 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -71,14 +71,10 @@ /* NAND_DEVn_CFG0 bits */ #define DISABLE_STATUS_AFTER_WRITE BIT(4) -#define CW_PER_PAGE 6 #define CW_PER_PAGE_MASK GENMASK(8, 6) -#define UD_SIZE_BYTES 9 #define UD_SIZE_BYTES_MASK GENMASK(18, 9) #define ECC_PARITY_SIZE_BYTES_RS GENMASK(22, 19) -#define SPARE_SIZE_BYTES 23 #define SPARE_SIZE_BYTES_MASK GENMASK(26, 23) -#define NUM_ADDR_CYCLES 27 #define NUM_ADDR_CYCLES_MASK GENMASK(29, 27) #define STATUS_BFR_READ BIT(30) #define SET_RD_MODE_AFTER_STATUS BIT(31) @@ -86,26 +82,20 @@ /* NAND_DEVn_CFG0 bits */ #define DEV0_CFG1_ECC_DISABLE BIT(0) #define WIDE_FLASH BIT(1) -#define NAND_RECOVERY_CYCLES 2 #define NAND_RECOVERY_CYCLES_MASK GENMASK(4, 2) #define CS_ACTIVE_BSY BIT(5) -#define BAD_BLOCK_BYTE_NUM 6 #define BAD_BLOCK_BYTE_NUM_MASK GENMASK(15, 6) #define BAD_BLOCK_IN_SPARE_AREA BIT(16) -#define WR_RD_BSY_GAP 17 #define WR_RD_BSY_GAP_MASK GENMASK(22, 17) #define ENABLE_BCH_ECC BIT(27) /* NAND_DEV0_ECC_CFG bits */ #define ECC_CFG_ECC_DISABLE BIT(0) #define ECC_SW_RESET BIT(1) -#define ECC_MODE 4 #define ECC_MODE_MASK GENMASK(5, 4) #define ECC_MODE_4BIT 0 #define ECC_MODE_8BIT 1 -#define ECC_PARITY_SIZE_BYTES_BCH 8 #define ECC_PARITY_SIZE_BYTES_BCH_MASK GENMASK(12, 8) -#define ECC_NUM_DATA_BYTES 16 #define ECC_NUM_DATA_BYTES_MASK GENMASK(25, 16) #define ECC_FORCE_CLK_OPEN BIT(30) @@ -120,7 +110,6 @@ #define SEQ_READ_START_VLD BIT(4) /* NAND_EBI2_ECC_BUF_CFG bits */ -#define NUM_STEPS 0 #define NUM_STEPS_MASK GENMASK(9, 0) /* NAND_ERASED_CW_DETECT_CFG bits */ @@ -141,11 +130,8 @@ #define ERASED_CW (CODEWORD_ALL_ERASED | CODEWORD_ERASED) /* NAND_READ_LOCATION_n bits */ -#define READ_LOCATION_OFFSET 0 #define READ_LOCATION_OFFSET_MASK GENMASK(9, 0) -#define READ_LOCATION_SIZE 16 #define READ_LOCATION_SIZE_MASK GENMASK(25, 16) -#define READ_LOCATION_LAST 31 #define READ_LOCATION_LAST_MASK BIT(31) /* Version Mask */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 07486168d104..09c8c93e4dba 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1136,4 +1136,9 @@ static inline bool nanddev_bbt_is_initialized(struct nand_device *nand) int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo); int nanddev_mtd_max_bad_blocks(struct mtd_info *mtd, loff_t offs, size_t len); +int nand_check_erased_ecc_chunk(void *data, int datalen, + void *ecc, int ecclen, + void *extraoob, int extraooblen, + int threshold); + #endif /* __LINUX_MTD_NAND_H */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index e84522e31301..d30bdc3fcfd7 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1519,11 +1519,6 @@ int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); void rawnand_sw_bch_cleanup(struct nand_chip *chip); -int nand_check_erased_ecc_chunk(void *data, int datalen, - void *ecc, int ecclen, - void *extraoob, int extraooblen, - int threshold); - int nand_ecc_choose_conf(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail); diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 27a45bdab7ec..927c10d78769 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -355,6 +355,7 @@ struct spinand_manufacturer { extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; +extern const struct spinand_manufacturer fmsh_spinand_manufacturer; extern const struct spinand_manufacturer foresee_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; diff --git a/include/linux/namei.h b/include/linux/namei.h index a7800ef04e76..fed86221c69c 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -61,10 +61,10 @@ struct dentry *kern_path_parent(const char *name, struct path *parent); extern struct dentry *start_creating_path(int, const char *, struct path *, unsigned int); extern struct dentry *start_creating_user_path(int, const char __user *, struct path *, unsigned int); -extern void end_creating_path(struct path *, struct dentry *); +extern void end_creating_path(const struct path *, struct dentry *); extern struct dentry *start_removing_path(const char *, struct path *); extern struct dentry *start_removing_user_path_at(int , const char __user *, struct path *); -static inline void end_removing_path(struct path *path , struct dentry *dentry) +static inline void end_removing_path(const struct path *path , struct dentry *dentry) { end_creating_path(path, dentry); } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 98c96d649bf9..72ee7d210a74 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -21,7 +21,7 @@ #include <linux/rolling_buffer.h> enum netfs_sreq_ref_trace; -typedef struct mempool_s mempool_t; +typedef struct mempool mempool_t; struct folio_queue; /** diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 9aed39abc94b..afe1d8f09d89 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -122,8 +122,6 @@ struct nfs_pageio_descriptor { /* arbitrarily selected limit to number of mirrors */ #define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 -#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) - extern struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx, struct page *page, unsigned int pgbase, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ac4bff6e9913..d56583572c98 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -862,7 +862,7 @@ struct nfs_getaclres { size_t acl_len; size_t acl_data_offset; int acl_flags; - struct page * acl_scratch; + struct folio * acl_scratch; }; struct nfs_setattrres { @@ -1596,7 +1596,7 @@ struct nfs42_listxattrsargs { struct nfs42_listxattrsres { struct nfs4_sequence_res seq_res; - struct page *scratch; + struct folio *scratch; void *xattr_buf; size_t xattr_len; u64 cookie; diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 5c7c92659e73..3d91043254e6 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -63,8 +63,9 @@ struct nfsd_localio_operations { struct nfsd_file __rcu **pnf, const fmode_t); struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); - struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); + void (*nfsd_file_dio_alignment)(struct nfsd_file *, + u32 *, u32 *, u32 *); } ____cacheline_aligned; extern void nfsd_localio_ops_init(void); diff --git a/include/linux/node.h b/include/linux/node.h index 2c7529335b21..866e3323f1fd 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -85,6 +85,8 @@ struct node_cache_attrs { void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, enum access_coordinate_class access); +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid, enum access_coordinate_class access) { } + +static inline void node_update_perf_attrs(unsigned int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ +} #endif struct node { diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 615a560d9edb..f3b13da78aac 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -103,7 +103,7 @@ struct nvmem_cell_info { * * Note: A default "nvmem<id>" name will be assigned to the device if * no name is specified in its configuration. In such case "<id>" is - * generated with ida_simple_get() and provided id field is ignored. + * generated with ida_alloc() and provided id field is ignored. * * Note: Specifying name and setting id to -1 implies a unique device * whose name is provided as-is (kept unaltered). diff --git a/include/linux/oom.h b/include/linux/oom.h index 1e0fc6931ce9..7b02bc1d0a7e 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -91,7 +91,7 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) */ static inline vm_fault_t check_stable_address_space(struct mm_struct *mm) { - if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags))) + if (unlikely(mm_flags_test(MMF_UNSTABLE, mm))) return VM_FAULT_SIGBUS; return 0; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8d3fa3a91ce4..0091ad1986bf 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -217,7 +217,7 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page * cold cacheline in some cases. */ if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && - test_bit(PG_head, &page->flags)) { + test_bit(PG_head, &page->flags.f)) { /* * We can safely access the field of the @page[1] with PG_head * because the @page is a compound page composed with at least @@ -316,7 +316,7 @@ static __always_inline unsigned long _compound_head(const struct page *page) * check that the page number lies within @folio; the caller is presumed * to have a reference to the page. */ -#define folio_page(folio, n) nth_page(&(folio)->page, n) +#define folio_page(folio, n) (&(folio)->page + (n)) static __always_inline int PageTail(const struct page *page) { @@ -325,14 +325,14 @@ static __always_inline int PageTail(const struct page *page) static __always_inline int PageCompound(const struct page *page) { - return test_bit(PG_head, &page->flags) || + return test_bit(PG_head, &page->flags.f) || READ_ONCE(page->compound_head) & 1; } #define PAGE_POISON_PATTERN -1l static inline int PagePoisoned(const struct page *page) { - return READ_ONCE(page->flags) == PAGE_POISON_PATTERN; + return READ_ONCE(page->flags.f) == PAGE_POISON_PATTERN; } #ifdef CONFIG_DEBUG_VM @@ -349,8 +349,8 @@ static const unsigned long *const_folio_flags(const struct folio *folio, const struct page *page = &folio->page; VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); - VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); - return &page[n].flags; + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); + return &page[n].flags.f; } static unsigned long *folio_flags(struct folio *folio, unsigned n) @@ -358,8 +358,8 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) struct page *page = &folio->page; VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); - VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); - return &page[n].flags; + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); + return &page[n].flags.f; } /* @@ -449,37 +449,37 @@ FOLIO_CLEAR_FLAG(name, page) #define TESTPAGEFLAG(uname, lname, policy) \ FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ static __always_inline int Page##uname(const struct page *page) \ -{ return test_bit(PG_##lname, &policy(page, 0)->flags); } +{ return test_bit(PG_##lname, &policy(page, 0)->flags.f); } #define SETPAGEFLAG(uname, lname, policy) \ FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void SetPage##uname(struct page *page) \ -{ set_bit(PG_##lname, &policy(page, 1)->flags); } +{ set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define CLEARPAGEFLAG(uname, lname, policy) \ FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void ClearPage##uname(struct page *page) \ -{ clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define __SETPAGEFLAG(uname, lname, policy) \ __FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void __SetPage##uname(struct page *page) \ -{ __set_bit(PG_##lname, &policy(page, 1)->flags); } +{ __set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define __CLEARPAGEFLAG(uname, lname, policy) \ __FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void __ClearPage##uname(struct page *page) \ -{ __clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ __clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define TESTSETFLAG(uname, lname, policy) \ FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestSetPage##uname(struct page *page) \ -{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define TESTCLEARFLAG(uname, lname, policy) \ FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestClearPage##uname(struct page *page) \ -{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ @@ -618,6 +618,7 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) #else PAGEFLAG_FALSE(HighMem, highmem) #endif +#define PhysHighMem(__p) (PageHighMem(phys_to_page(__p))) /* Does kmap_local_folio() only allow access to one page of the folio? */ #ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP @@ -846,7 +847,7 @@ static __always_inline bool folio_test_head(const struct folio *folio) static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); - return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); + return test_bit(PG_head, &page->flags.f) && !page_is_fake_head(page); } __SETPAGEFLAG(Head, head, PF_ANY) @@ -1170,28 +1171,28 @@ static __always_inline int PageAnonExclusive(const struct page *page) */ if (PageHuge(page)) page = compound_head(page); - return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); + return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void SetPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); - set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); + set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void ClearPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); - clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); + clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void __ClearPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); - __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); + __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } #ifdef CONFIG_MMU @@ -1241,7 +1242,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ static inline int folio_has_private(const struct folio *folio) { - return !!(folio->flags & PAGE_FLAGS_PRIVATE); + return !!(folio->flags.f & PAGE_FLAGS_PRIVATE); } #undef PF_ANY diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 6a44be0f39f4..e046278a01fa 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -13,12 +13,11 @@ #include <linux/types.h> -#define PB_migratetype_bits 3 /* Bit indices that affect a whole block of pages */ enum pageblock_bits { - PB_migrate, - PB_migrate_end = PB_migrate + PB_migratetype_bits - 1, - /* 3 bits required for migrate types */ + PB_migrate_0, + PB_migrate_1, + PB_migrate_2, PB_compact_skip,/* If set the block is skipped by compaction */ #ifdef CONFIG_MEMORY_ISOLATION @@ -37,11 +36,10 @@ enum pageblock_bits { #define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS)) -#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1) +#define MIGRATETYPE_MASK (BIT(PB_migrate_0)|BIT(PB_migrate_1)|BIT(PB_migrate_2)) #ifdef CONFIG_MEMORY_ISOLATION -#define MIGRATETYPE_AND_ISO_MASK \ - (((1UL << (PB_migrate_end + 1)) - 1) | BIT(PB_migrate_isolate)) +#define MIGRATETYPE_AND_ISO_MASK (MIGRATETYPE_MASK | BIT(PB_migrate_isolate)) #else #define MIGRATETYPE_AND_ISO_MASK MIGRATETYPE_MASK #endif diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 12a12dae727d..09b581c1d878 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -140,7 +140,7 @@ static inline int inode_drain_writes(struct inode *inode) return filemap_write_and_wait(inode->i_mapping); } -static inline bool mapping_empty(struct address_space *mapping) +static inline bool mapping_empty(const struct address_space *mapping) { return xa_empty(&mapping->i_pages); } @@ -166,7 +166,7 @@ static inline bool mapping_empty(struct address_space *mapping) * refcount and the referenced bit, which will be elevated or set in * the process of adding new cache pages to an inode. */ -static inline bool mapping_shrinkable(struct address_space *mapping) +static inline bool mapping_shrinkable(const struct address_space *mapping) { void *head; @@ -211,6 +211,8 @@ enum mapping_flags { folio contents */ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, + AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't + account usage to user cgroups */ /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, @@ -265,7 +267,7 @@ static inline void mapping_clear_unevictable(struct address_space *mapping) clear_bit(AS_UNEVICTABLE, &mapping->flags); } -static inline bool mapping_unevictable(struct address_space *mapping) +static inline bool mapping_unevictable(const struct address_space *mapping) { return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags); } @@ -275,7 +277,7 @@ static inline void mapping_set_exiting(struct address_space *mapping) set_bit(AS_EXITING, &mapping->flags); } -static inline int mapping_exiting(struct address_space *mapping) +static inline int mapping_exiting(const struct address_space *mapping) { return test_bit(AS_EXITING, &mapping->flags); } @@ -285,7 +287,7 @@ static inline void mapping_set_no_writeback_tags(struct address_space *mapping) set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } -static inline int mapping_use_writeback_tags(struct address_space *mapping) +static inline int mapping_use_writeback_tags(const struct address_space *mapping) { return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } @@ -331,7 +333,7 @@ static inline void mapping_set_inaccessible(struct address_space *mapping) set_bit(AS_INACCESSIBLE, &mapping->flags); } -static inline bool mapping_inaccessible(struct address_space *mapping) +static inline bool mapping_inaccessible(const struct address_space *mapping) { return test_bit(AS_INACCESSIBLE, &mapping->flags); } @@ -341,18 +343,18 @@ static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_ set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } -static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_space *mapping) +static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct address_space *mapping) { return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } -static inline gfp_t mapping_gfp_mask(struct address_space * mapping) +static inline gfp_t mapping_gfp_mask(const struct address_space *mapping) { return mapping->gfp_mask; } /* Restricts the given gfp_mask to what the mapping allows. */ -static inline gfp_t mapping_gfp_constraint(struct address_space *mapping, +static inline gfp_t mapping_gfp_constraint(const struct address_space *mapping, gfp_t gfp_mask) { return mapping_gfp_mask(mapping) & gfp_mask; @@ -475,11 +477,17 @@ mapping_min_folio_order(const struct address_space *mapping) } static inline unsigned long -mapping_min_folio_nrpages(struct address_space *mapping) +mapping_min_folio_nrpages(const struct address_space *mapping) { return 1UL << mapping_min_folio_order(mapping); } +static inline unsigned long +mapping_min_folio_nrbytes(const struct address_space *mapping) +{ + return mapping_min_folio_nrpages(mapping) << PAGE_SHIFT; +} + /** * mapping_align_index() - Align index for this mapping. * @mapping: The address_space. @@ -489,7 +497,7 @@ mapping_min_folio_nrpages(struct address_space *mapping) * new folio to the page cache and need to know what index to give it, * call this function. */ -static inline pgoff_t mapping_align_index(struct address_space *mapping, +static inline pgoff_t mapping_align_index(const struct address_space *mapping, pgoff_t index) { return round_down(index, mapping_min_folio_nrpages(mapping)); @@ -499,7 +507,7 @@ static inline pgoff_t mapping_align_index(struct address_space *mapping, * Large folio support currently depends on THP. These dependencies are * being worked on but are not yet fixed. */ -static inline bool mapping_large_folio_support(struct address_space *mapping) +static inline bool mapping_large_folio_support(const struct address_space *mapping) { /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON, @@ -514,7 +522,7 @@ static inline size_t mapping_max_folio_size(const struct address_space *mapping) return PAGE_SIZE << mapping_max_folio_order(mapping); } -static inline int filemap_nr_thps(struct address_space *mapping) +static inline int filemap_nr_thps(const struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS return atomic_read(&mapping->nr_thps); @@ -543,7 +551,7 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) #endif } -struct address_space *folio_mapping(struct folio *); +struct address_space *folio_mapping(const struct folio *folio); /** * folio_flush_mapping - Find the file mapping this folio belongs to. @@ -928,7 +936,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, * * Return: The index of the folio which follows this folio in the file. */ -static inline pgoff_t folio_next_index(struct folio *folio) +static inline pgoff_t folio_next_index(const struct folio *folio) { return folio->index + folio_nr_pages(folio); } @@ -957,7 +965,7 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) * e.g., shmem did not move this folio to the swap cache. * Return: true or false. */ -static inline bool folio_contains(struct folio *folio, pgoff_t index) +static inline bool folio_contains(const struct folio *folio, pgoff_t index) { VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio); return index - folio->index < folio_nr_pages(folio); @@ -1034,13 +1042,13 @@ static inline loff_t page_offset(struct page *page) /* * Get the offset in PAGE_SIZE (even for hugetlb folios). */ -static inline pgoff_t folio_pgoff(struct folio *folio) +static inline pgoff_t folio_pgoff(const struct folio *folio) { return folio->index; } -static inline pgoff_t linear_page_index(struct vm_area_struct *vma, - unsigned long address) +static inline pgoff_t linear_page_index(const struct vm_area_struct *vma, + const unsigned long address) { pgoff_t pgoff; pgoff = (address - vma->vm_start) >> PAGE_SHIFT; @@ -1221,6 +1229,8 @@ void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); +void folio_end_writeback_no_dropbehind(struct folio *folio); +void folio_end_dropbehind(struct folio *folio); void folio_wait_stable(struct folio *folio); void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); @@ -1460,7 +1470,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, * readahead_pos - The byte offset into the file of this readahead request. * @rac: The readahead request. */ -static inline loff_t readahead_pos(struct readahead_control *rac) +static inline loff_t readahead_pos(const struct readahead_control *rac) { return (loff_t)rac->_index * PAGE_SIZE; } @@ -1469,7 +1479,7 @@ static inline loff_t readahead_pos(struct readahead_control *rac) * readahead_length - The number of bytes in this readahead request. * @rac: The readahead request. */ -static inline size_t readahead_length(struct readahead_control *rac) +static inline size_t readahead_length(const struct readahead_control *rac) { return rac->_nr_pages * PAGE_SIZE; } @@ -1478,7 +1488,7 @@ static inline size_t readahead_length(struct readahead_control *rac) * readahead_index - The index of the first page in this readahead request. * @rac: The readahead request. */ -static inline pgoff_t readahead_index(struct readahead_control *rac) +static inline pgoff_t readahead_index(const struct readahead_control *rac) { return rac->_index; } @@ -1487,7 +1497,7 @@ static inline pgoff_t readahead_index(struct readahead_control *rac) * readahead_count - The number of pages in this readahead request. * @rac: The readahead request. */ -static inline unsigned int readahead_count(struct readahead_control *rac) +static inline unsigned int readahead_count(const struct readahead_control *rac) { return rac->_nr_pages; } @@ -1496,12 +1506,12 @@ static inline unsigned int readahead_count(struct readahead_control *rac) * readahead_batch_length - The number of bytes in the current batch. * @rac: The readahead request. */ -static inline size_t readahead_batch_length(struct readahead_control *rac) +static inline size_t readahead_batch_length(const struct readahead_control *rac) { return rac->_batch_count * PAGE_SIZE; } -static inline unsigned long dir_pages(struct inode *inode) +static inline unsigned long dir_pages(const struct inode *inode) { return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -1515,8 +1525,8 @@ static inline unsigned long dir_pages(struct inode *inode) * Return: the number of bytes in the folio up to EOF, * or -EFAULT if the folio was truncated. */ -static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, - struct inode *inode) +static inline ssize_t folio_mkwrite_check_truncate(const struct folio *folio, + const struct inode *inode) { loff_t size = i_size_read(inode); pgoff_t index = size >> PAGE_SHIFT; @@ -1547,7 +1557,8 @@ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, * Return: The number of filesystem blocks covered by this folio. */ static inline -unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) +unsigned int i_blocks_per_folio(const struct inode *inode, + const struct folio *folio) { return folio_size(folio) >> inode->i_blkbits; } diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 5d3a0cccc6bf..63be5a451627 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -51,12 +51,12 @@ static inline void folio_batch_reinit(struct folio_batch *fbatch) fbatch->i = 0; } -static inline unsigned int folio_batch_count(struct folio_batch *fbatch) +static inline unsigned int folio_batch_count(const struct folio_batch *fbatch) { return fbatch->nr; } -static inline unsigned int folio_batch_space(struct folio_batch *fbatch) +static inline unsigned int folio_batch_space(const struct folio_batch *fbatch) { return PAGEVEC_SIZE - fbatch->nr; } diff --git a/include/linux/panic.h b/include/linux/panic.h index 7be742628c25..6f972a66c13e 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -43,6 +43,12 @@ void abort(void); extern atomic_t panic_cpu; #define PANIC_CPU_INVALID -1 +bool panic_try_start(void); +void panic_reset(void); +bool panic_in_progress(void); +bool panic_on_this_cpu(void); +bool panic_on_other_cpu(void); + /* * Only to be used by arch init code. If the user over-wrote the default * CONFIG_PANIC_TIMEOUT, honor it. diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 075c20b161d9..951f81a38f3a 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -21,7 +21,6 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset); int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, int num_clients, bool verbose); -bool pci_has_p2pmem(struct pci_dev *pdev); struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients); void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size); void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size); @@ -45,10 +44,6 @@ static inline int pci_p2pdma_distance_many(struct pci_dev *provider, { return -1; } -static inline bool pci_has_p2pmem(struct pci_dev *pdev) -{ - return false; -} static inline struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 59876de13860..d1fdf81fbe1e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -119,7 +119,8 @@ enum { #define PCI_CB_BRIDGE_MEM_1_WINDOW (PCI_BRIDGE_RESOURCES + 3) /* Total number of bridge resources for P2P and CardBus */ -#define PCI_BRIDGE_RESOURCE_NUM 4 +#define PCI_P2P_BRIDGE_RESOURCE_NUM 3 +#define PCI_BRIDGE_RESOURCE_NUM 4 /* Resources assigned to buses behind the bridge */ PCI_BRIDGE_RESOURCES, @@ -1417,7 +1418,7 @@ void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); -void pci_release_resource(struct pci_dev *dev, int resno); +int pci_release_resource(struct pci_dev *dev, int resno); static inline int pci_rebar_bytes_to_size(u64 bytes) { bytes = roundup_pow_of_two(bytes); @@ -2764,7 +2765,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } -#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) || defined(CONFIG_S390) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 701974639ff2..f82a28040594 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -89,6 +89,7 @@ static inline void riscv_pmu_legacy_skip_init(void) {}; struct riscv_pmu *riscv_pmu_alloc(void); #ifdef CONFIG_RISCV_PMU_SBI int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); +int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig); #endif #endif /* CONFIG_RISCV_PMU */ diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 8a7f4f802c57..38a82d65e58e 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -107,7 +107,8 @@ static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref, if (static_key_enabled(&mem_profiling_compressed)) { pgalloc_tag_idx idx; - idx = (page->flags >> alloc_tag_ref_offs) & alloc_tag_ref_mask; + idx = (page->flags.f >> alloc_tag_ref_offs) & + alloc_tag_ref_mask; idx_to_ref(idx, ref); handle->page = page; } else { @@ -149,11 +150,11 @@ static inline void update_page_tag_ref(union pgtag_ref_handle handle, union code idx = (unsigned long)ref_to_idx(ref); idx = (idx & alloc_tag_ref_mask) << alloc_tag_ref_offs; do { - old_flags = READ_ONCE(page->flags); + old_flags = READ_ONCE(page->flags.f); flags = old_flags; flags &= ~(alloc_tag_ref_mask << alloc_tag_ref_offs); flags |= idx; - } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } while (unlikely(!try_cmpxchg(&page->flags.f, &old_flags, flags))); } else { if (WARN_ON(!handle.ref || !ref)) return; diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 25a7257052ff..32e8457ad535 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1975,6 +1975,32 @@ static inline bool arch_has_pfn_modify_check(void) /* Page-Table Modification Mask */ typedef unsigned int pgtbl_mod_mask; +enum pgtable_level { + PGTABLE_LEVEL_PTE = 0, + PGTABLE_LEVEL_PMD, + PGTABLE_LEVEL_PUD, + PGTABLE_LEVEL_P4D, + PGTABLE_LEVEL_PGD, +}; + +static inline const char *pgtable_level_to_str(enum pgtable_level level) +{ + switch (level) { + case PGTABLE_LEVEL_PTE: + return "pte"; + case PGTABLE_LEVEL_PMD: + return "pmd"; + case PGTABLE_LEVEL_PUD: + return "pud"; + case PGTABLE_LEVEL_P4D: + return "p4d"; + case PGTABLE_LEVEL_PGD: + return "pgd"; + default: + return "unknown"; + } +} + #endif /* !__ASSEMBLY__ */ #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 73de70362b98..63ce16191eb9 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -48,6 +48,7 @@ int pinctrl_select_default_state(struct device *dev); #ifdef CONFIG_PM int pinctrl_pm_select_default_state(struct device *dev); +int pinctrl_pm_select_init_state(struct device *dev); int pinctrl_pm_select_sleep_state(struct device *dev); int pinctrl_pm_select_idle_state(struct device *dev); #else @@ -55,6 +56,10 @@ static inline int pinctrl_pm_select_default_state(struct device *dev) { return 0; } +static inline int pinctrl_pm_select_init_state(struct device *dev) +{ + return 0; +} static inline int pinctrl_pm_select_sleep_state(struct device *dev) { return 0; @@ -143,6 +148,11 @@ static inline int pinctrl_pm_select_default_state(struct device *dev) return 0; } +static inline int pinctrl_pm_select_init_state(struct device *dev) +{ + return 0; +} + static inline int pinctrl_pm_select_sleep_state(struct device *dev) { return 0; diff --git a/include/linux/platform_data/keyboard-spear.h b/include/linux/platform_data/keyboard-spear.h deleted file mode 100644 index 5e3ff653900c..000000000000 --- a/include/linux/platform_data/keyboard-spear.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (C) 2010 ST Microelectronics - * Rajeev Kumar <rajeevkumar.linux@gmail.com> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __PLAT_KEYBOARD_H -#define __PLAT_KEYBOARD_H - -#include <linux/bitops.h> -#include <linux/input.h> -#include <linux/input/matrix_keypad.h> -#include <linux/types.h> - -#define DECLARE_9x9_KEYMAP(_name) \ -int _name[] = { \ - KEY(0, 0, KEY_ESC), \ - KEY(0, 1, KEY_1), \ - KEY(0, 2, KEY_2), \ - KEY(0, 3, KEY_3), \ - KEY(0, 4, KEY_4), \ - KEY(0, 5, KEY_5), \ - KEY(0, 6, KEY_6), \ - KEY(0, 7, KEY_7), \ - KEY(0, 8, KEY_8), \ - KEY(1, 0, KEY_9), \ - KEY(1, 1, KEY_MINUS), \ - KEY(1, 2, KEY_EQUAL), \ - KEY(1, 3, KEY_BACKSPACE), \ - KEY(1, 4, KEY_TAB), \ - KEY(1, 5, KEY_Q), \ - KEY(1, 6, KEY_W), \ - KEY(1, 7, KEY_E), \ - KEY(1, 8, KEY_R), \ - KEY(2, 0, KEY_T), \ - KEY(2, 1, KEY_Y), \ - KEY(2, 2, KEY_U), \ - KEY(2, 3, KEY_I), \ - KEY(2, 4, KEY_O), \ - KEY(2, 5, KEY_P), \ - KEY(2, 6, KEY_LEFTBRACE), \ - KEY(2, 7, KEY_RIGHTBRACE), \ - KEY(2, 8, KEY_ENTER), \ - KEY(3, 0, KEY_LEFTCTRL), \ - KEY(3, 1, KEY_A), \ - KEY(3, 2, KEY_S), \ - KEY(3, 3, KEY_D), \ - KEY(3, 4, KEY_F), \ - KEY(3, 5, KEY_G), \ - KEY(3, 6, KEY_H), \ - KEY(3, 7, KEY_J), \ - KEY(3, 8, KEY_K), \ - KEY(4, 0, KEY_L), \ - KEY(4, 1, KEY_SEMICOLON), \ - KEY(4, 2, KEY_APOSTROPHE), \ - KEY(4, 3, KEY_GRAVE), \ - KEY(4, 4, KEY_LEFTSHIFT), \ - KEY(4, 5, KEY_BACKSLASH), \ - KEY(4, 6, KEY_Z), \ - KEY(4, 7, KEY_X), \ - KEY(4, 8, KEY_C), \ - KEY(5, 0, KEY_V), \ - KEY(5, 1, KEY_B), \ - KEY(5, 2, KEY_N), \ - KEY(5, 3, KEY_M), \ - KEY(5, 4, KEY_COMMA), \ - KEY(5, 5, KEY_DOT), \ - KEY(5, 6, KEY_SLASH), \ - KEY(5, 7, KEY_RIGHTSHIFT), \ - KEY(5, 8, KEY_KPASTERISK), \ - KEY(6, 0, KEY_LEFTALT), \ - KEY(6, 1, KEY_SPACE), \ - KEY(6, 2, KEY_CAPSLOCK), \ - KEY(6, 3, KEY_F1), \ - KEY(6, 4, KEY_F2), \ - KEY(6, 5, KEY_F3), \ - KEY(6, 6, KEY_F4), \ - KEY(6, 7, KEY_F5), \ - KEY(6, 8, KEY_F6), \ - KEY(7, 0, KEY_F7), \ - KEY(7, 1, KEY_F8), \ - KEY(7, 2, KEY_F9), \ - KEY(7, 3, KEY_F10), \ - KEY(7, 4, KEY_NUMLOCK), \ - KEY(7, 5, KEY_SCROLLLOCK), \ - KEY(7, 6, KEY_KP7), \ - KEY(7, 7, KEY_KP8), \ - KEY(7, 8, KEY_KP9), \ - KEY(8, 0, KEY_KPMINUS), \ - KEY(8, 1, KEY_KP4), \ - KEY(8, 2, KEY_KP5), \ - KEY(8, 3, KEY_KP6), \ - KEY(8, 4, KEY_KPPLUS), \ - KEY(8, 5, KEY_KP1), \ - KEY(8, 6, KEY_KP2), \ - KEY(8, 7, KEY_KP3), \ - KEY(8, 8, KEY_KP0), \ -} - -#define DECLARE_6x6_KEYMAP(_name) \ -int _name[] = { \ - KEY(0, 0, KEY_RESERVED), \ - KEY(0, 1, KEY_1), \ - KEY(0, 2, KEY_2), \ - KEY(0, 3, KEY_3), \ - KEY(0, 4, KEY_4), \ - KEY(0, 5, KEY_5), \ - KEY(1, 0, KEY_Q), \ - KEY(1, 1, KEY_W), \ - KEY(1, 2, KEY_E), \ - KEY(1, 3, KEY_R), \ - KEY(1, 4, KEY_T), \ - KEY(1, 5, KEY_Y), \ - KEY(2, 0, KEY_D), \ - KEY(2, 1, KEY_F), \ - KEY(2, 2, KEY_G), \ - KEY(2, 3, KEY_H), \ - KEY(2, 4, KEY_J), \ - KEY(2, 5, KEY_K), \ - KEY(3, 0, KEY_B), \ - KEY(3, 1, KEY_N), \ - KEY(3, 2, KEY_M), \ - KEY(3, 3, KEY_COMMA), \ - KEY(3, 4, KEY_DOT), \ - KEY(3, 5, KEY_SLASH), \ - KEY(4, 0, KEY_F6), \ - KEY(4, 1, KEY_F7), \ - KEY(4, 2, KEY_F8), \ - KEY(4, 3, KEY_F9), \ - KEY(4, 4, KEY_F10), \ - KEY(4, 5, KEY_NUMLOCK), \ - KEY(5, 0, KEY_KP2), \ - KEY(5, 1, KEY_KP3), \ - KEY(5, 2, KEY_KP0), \ - KEY(5, 3, KEY_KPDOT), \ - KEY(5, 4, KEY_RO), \ - KEY(5, 5, KEY_ZENKAKUHANKAKU), \ -} - -#define KEYPAD_9x9 0 -#define KEYPAD_6x6 1 -#define KEYPAD_2x2 2 - -/** - * struct kbd_platform_data - spear keyboard platform data - * keymap: pointer to keymap data (table and size) - * rep: enables key autorepeat - * mode: choose keyboard support(9x9, 6x6, 2x2) - * suspended_rate: rate at which keyboard would operate in suspended mode - * - * This structure is supposed to be used by platform code to supply - * keymaps to drivers that implement keyboards. - */ -struct kbd_platform_data { - const struct matrix_keymap_data *keymap; - bool rep; - unsigned int mode; - unsigned int suspended_rate; -}; - -#endif /* __PLAT_KEYBOARD_H */ diff --git a/include/linux/platform_data/keypad-pxa27x.h b/include/linux/platform_data/keypad-pxa27x.h deleted file mode 100644 index a376442b9935..000000000000 --- a/include/linux/platform_data/keypad-pxa27x.h +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_PXA27x_KEYPAD_H -#define __ASM_ARCH_PXA27x_KEYPAD_H - -#include <linux/input.h> -#include <linux/input/matrix_keypad.h> - -#define MAX_MATRIX_KEY_ROWS (8) -#define MAX_MATRIX_KEY_COLS (8) -#define MATRIX_ROW_SHIFT (3) -#define MAX_DIRECT_KEY_NUM (8) - -/* pxa3xx keypad platform specific parameters - * - * NOTE: - * 1. direct_key_num indicates the number of keys in the direct keypad - * _plus_ the number of rotary-encoder sensor inputs, this can be - * left as 0 if only rotary encoders are enabled, the driver will - * automatically calculate this - * - * 2. direct_key_map is the key code map for the direct keys, if rotary - * encoder(s) are enabled, direct key 0/1(2/3) will be ignored - * - * 3. rotary can be either interpreted as a relative input event (e.g. - * REL_WHEEL/REL_HWHEEL) or specific keys (e.g. UP/DOWN/LEFT/RIGHT) - * - * 4. matrix key and direct key will use the same debounce_interval by - * default, which should be sufficient in most cases - * - * pxa168 keypad platform specific parameter - * - * NOTE: - * clear_wakeup_event callback is a workaround required to clear the - * keypad interrupt. The keypad wake must be cleared in addition to - * reading the MI/DI bits in the KPC register. - */ -struct pxa27x_keypad_platform_data { - - /* code map for the matrix keys */ - const struct matrix_keymap_data *matrix_keymap_data; - unsigned int matrix_key_rows; - unsigned int matrix_key_cols; - - /* direct keys */ - int direct_key_num; - unsigned int direct_key_map[MAX_DIRECT_KEY_NUM]; - /* the key output may be low active */ - int direct_key_low_active; - /* give board a chance to choose the start direct key */ - unsigned int direct_key_mask; - - /* rotary encoders 0 */ - int enable_rotary0; - int rotary0_rel_code; - int rotary0_up_key; - int rotary0_down_key; - - /* rotary encoders 1 */ - int enable_rotary1; - int rotary1_rel_code; - int rotary1_up_key; - int rotary1_down_key; - - /* key debounce interval */ - unsigned int debounce_interval; - - /* clear wakeup event requirement for pxa168 */ - void (*clear_wakeup_event)(void); -}; - -extern void pxa_set_keypad_info(struct pxa27x_keypad_platform_data *info); - -#endif /* __ASM_ARCH_PXA27x_KEYPAD_H */ diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h deleted file mode 100644 index 25390fc3e795..000000000000 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2004 Simtec Electronics - * Ben Dooks <ben@simtec.co.uk> - * - * S3C2410 - NAND device controller platform_device info -*/ - -#ifndef __MTD_NAND_S3C2410_H -#define __MTD_NAND_S3C2410_H - -#include <linux/mtd/rawnand.h> - -/** - * struct s3c2410_nand_set - define a set of one or more nand chips - * @flash_bbt: Openmoko u-boot can create a Bad Block Table - * Setting this flag will allow the kernel to - * look for it at boot time and also skip the NAND - * scan. - * @options: Default value to set into 'struct nand_chip' options. - * @nr_chips: Number of chips in this set - * @nr_partitions: Number of partitions pointed to by @partitions - * @name: Name of set (optional) - * @nr_map: Map for low-layer logical to physical chip numbers (option) - * @partitions: The mtd partition list - * - * define a set of one or more nand chips registered with an unique mtd. Also - * allows to pass flag to the underlying NAND layer. 'disable_ecc' will trigger - * a warning at boot time. - */ -struct s3c2410_nand_set { - unsigned int flash_bbt:1; - - unsigned int options; - int nr_chips; - int nr_partitions; - char *name; - int *nr_map; - struct mtd_partition *partitions; - struct device_node *of_node; -}; - -struct s3c2410_platform_nand { - /* timing information for controller, all times in nanoseconds */ - - int tacls; /* time for active CLE/ALE to nWE/nOE */ - int twrph0; /* active time for nWE/nOE */ - int twrph1; /* time for release CLE/ALE from nWE/nOE inactive */ - - unsigned int ignore_unset_ecc:1; - - enum nand_ecc_engine_type engine_type; - - int nr_sets; - struct s3c2410_nand_set *sets; - - void (*select_chip)(struct s3c2410_nand_set *, - int chip); -}; - -/** - * s3c_nand_set_platdata() - register NAND platform data. - * @nand: The NAND platform data to register with s3c_device_nand. - * - * This function copies the given NAND platform data, @nand and registers - * it with the s3c_device_nand. This allows @nand to be __initdata. -*/ -extern void s3c_nand_set_platdata(struct s3c2410_platform_nand *nand); - -#endif /*__MTD_NAND_S3C2410_H */ diff --git a/include/linux/platform_data/touchscreen-s3c2410.h b/include/linux/platform_data/touchscreen-s3c2410.h deleted file mode 100644 index bf8d3b9d7c6a..000000000000 --- a/include/linux/platform_data/touchscreen-s3c2410.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2005 Arnaud Patard <arnaud.patard@rtp-net.org> -*/ - -#ifndef __TOUCHSCREEN_S3C2410_H -#define __TOUCHSCREEN_S3C2410_H - -struct s3c2410_ts_mach_info { - int delay; - int presc; - int oversampling_shift; - void (*cfg_gpio)(struct platform_device *dev); -}; - -extern void s3c24xx_ts_set_platdata(struct s3c2410_ts_mach_info *); -extern void s3c64xx_ts_set_platdata(struct s3c2410_ts_mach_info *); - -/* defined by architecture to configure gpio */ -extern void s3c24xx_ts_cfg_gpio(struct platform_device *dev); - -#endif /*__TOUCHSCREEN_S3C2410_H */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d88d6b6ccf5b..a3f44f6c2da1 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -21,6 +21,7 @@ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ +#define RPM_TRANSPARENT 0x10 /* Succeed if runtime PM is disabled */ /* * Use this for defining a set of PM operations to be used in all situations @@ -350,13 +351,12 @@ static inline int pm_runtime_force_resume(struct device *dev) { return -ENXIO; } * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing - * or device not in %RPM_ACTIVE state. + * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change + * ongoing or device not in %RPM_ACTIVE state. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM idle and suspend callbacks. */ @@ -370,14 +370,15 @@ static inline int pm_runtime_idle(struct device *dev) * @dev: Target device. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -396,14 +397,15 @@ static inline int pm_runtime_suspend(struct device *dev) * engaging its "idle check" callback. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -433,13 +435,12 @@ static inline int pm_runtime_resume(struct device *dev) * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing - * or device not in %RPM_ACTIVE state. + * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change + * ongoing or device not in %RPM_ACTIVE state. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_request_idle(struct device *dev) { @@ -464,15 +465,16 @@ static inline int pm_request_resume(struct device *dev) * equivalent pm_runtime_autosuspend() for @dev asynchronously. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_request_autosuspend(struct device *dev) { @@ -511,6 +513,19 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +static inline int pm_runtime_get_active(struct device *dev, int rpmflags) +{ + int ret; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT | rpmflags); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. * @dev: Target device. @@ -521,15 +536,7 @@ static inline int pm_runtime_get_sync(struct device *dev) */ static inline int pm_runtime_resume_and_get(struct device *dev) { - int ret; - - ret = __pm_runtime_resume(dev, RPM_GET_PUT); - if (ret < 0) { - pm_runtime_put_noidle(dev); - return ret; - } - - return 0; + return pm_runtime_get_active(dev, 0); } /** @@ -540,23 +547,22 @@ static inline int pm_runtime_resume_and_get(struct device *dev) * equal to 0, queue up a work item for @dev like in pm_request_idle(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_runtime_put(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } -DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) - /** * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. @@ -565,15 +571,16 @@ DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int __pm_runtime_put_autosuspend(struct device *dev) { @@ -590,15 +597,16 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) * in pm_request_autosuspend(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_runtime_put_autosuspend(struct device *dev) { @@ -606,6 +614,29 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) return __pm_runtime_put_autosuspend(dev); } +DEFINE_GUARD(pm_runtime_noresume, struct device *, + pm_runtime_get_noresume(_T), pm_runtime_put_noidle(_T)); + +DEFINE_GUARD(pm_runtime_active, struct device *, + pm_runtime_get_sync(_T), pm_runtime_put(_T)); +DEFINE_GUARD(pm_runtime_active_auto, struct device *, + pm_runtime_get_sync(_T), pm_runtime_put_autosuspend(_T)); +/* + * Use the following guards with ACQUIRE()/ACQUIRE_ERR(). + * + * The difference between the "_try" and "_try_enabled" variants is that the + * former do not produce an error when runtime PM is disabled for the given + * device. + */ +DEFINE_GUARD_COND(pm_runtime_active, _try, + pm_runtime_get_active(_T, RPM_TRANSPARENT)) +DEFINE_GUARD_COND(pm_runtime_active, _try_enabled, + pm_runtime_resume_and_get(_T)) +DEFINE_GUARD_COND(pm_runtime_active_auto, _try, + pm_runtime_get_active(_T, RPM_TRANSPARENT)) +DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled, + pm_runtime_resume_and_get(_T)) + /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. * @dev: Target device. @@ -619,14 +650,15 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -646,15 +678,15 @@ static inline int pm_runtime_put_sync(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. - * * -EAGAIN: usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -677,15 +709,16 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 5d22b803f51e..45c663124c9b 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -330,8 +330,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress) #endif -bool this_cpu_in_panic(void); - #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0f5f94137f6d..e0dbcb4b4fd9 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -107,6 +107,7 @@ enum sev_cmd { SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA, SEV_CMD_SNP_COMMIT = 0x0CB, SEV_CMD_SNP_VLEK_LOAD = 0x0CD, + SEV_CMD_SNP_FEATURE_INFO = 0x0CE, SEV_CMD_MAX, }; @@ -747,10 +748,13 @@ struct sev_data_snp_guest_request { struct sev_data_snp_init_ex { u32 init_rmp:1; u32 list_paddr_en:1; - u32 rsvd:30; + u32 rapl_dis:1; + u32 ciphertext_hiding_en:1; + u32 rsvd:28; u32 rsvd1; u64 list_paddr; - u8 rsvd2[48]; + u16 max_snp_asid; + u8 rsvd2[46]; } __packed; /** @@ -799,10 +803,13 @@ struct sev_data_snp_shutdown_ex { * @probe: True if this is being called as part of CCP module probe, which * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed * unless psp_init_on_probe module param is set + * @max_snp_asid: When non-zero, enable ciphertext hiding and specify the + * maximum ASID that can be used for an SEV-SNP guest. */ struct sev_platform_init_args { int error; bool probe; + unsigned int max_snp_asid; }; /** @@ -814,6 +821,36 @@ struct sev_data_snp_commit { u32 len; } __packed; +/** + * struct sev_data_snp_feature_info - SEV_SNP_FEATURE_INFO structure + * + * @length: len of the command buffer read by the PSP + * @ecx_in: subfunction index + * @feature_info_paddr : System Physical Address of the FEATURE_INFO structure + */ +struct sev_data_snp_feature_info { + u32 length; + u32 ecx_in; + u64 feature_info_paddr; +} __packed; + +/** + * struct feature_info - FEATURE_INFO structure + * + * @eax: output of SNP_FEATURE_INFO command + * @ebx: output of SNP_FEATURE_INFO command + * @ecx: output of SNP_FEATURE_INFO command + * #edx: output of SNP_FEATURE_INFO command + */ +struct snp_feature_info { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; +} __packed; + +#define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3) + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** @@ -957,6 +994,7 @@ void *psp_copy_user_blob(u64 uaddr, u32 len); void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); void sev_platform_shutdown(void); +bool sev_is_snp_ciphertext_hiding_supported(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ @@ -993,6 +1031,8 @@ static inline void snp_free_firmware_page(void *addr) { } static inline void sev_platform_shutdown(void) { } +static inline bool sev_is_snp_ciphertext_hiding_supported(void) { return false; } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 1b11926ddd47..2abba7552605 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -43,6 +43,16 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) #define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev))) /** + * list_for_each_rcu - Iterate over a list in an RCU-safe fashion + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + !list_is_head(pos, (head)); \ + pos = rcu_dereference(pos->next)) + +/** * list_tail_rcu - returns the prev pointer of the head of the list * @head: the head of the list * diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f67f96711f0d..c5b30054cd01 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -129,7 +129,7 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else static __always_inline void rcu_irq_work_resched(void) { } @@ -713,6 +713,24 @@ do { \ (c) || rcu_read_lock_sched_held(), \ __rcu) +/** + * rcu_dereference_all_check() - rcu_dereference_all with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is similar to rcu_dereference_check(), but allows protection + * by all forms of vanilla RCU readers, including preemption disabled, + * bh-disabled, and interrupt-disabled regions of code. Note that "vanilla + * RCU" excludes SRCU and the various Tasks RCU flavors. Please note + * that this macro should not be backported to any Linux-kernel version + * preceding v5.0 due to changes in synchronize_rcu() semantics prior + * to that version. + */ +#define rcu_dereference_all_check(p, c) \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_any_held(), \ + __rcu) + /* * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. @@ -768,6 +786,14 @@ do { \ #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) /** + * rcu_dereference_all() - fetch RCU-all-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_all(p) rcu_dereference_all_check(p, 0) + +/** * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism * @p: The pointer to hand off * diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 6c85b28ea30b..05a221ce79a6 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -122,7 +122,7 @@ static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, return hash & (tbl->size - 1); } -static inline unsigned int rht_key_get_hash(struct rhashtable *ht, +static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht, const void *key, const struct rhashtable_params params, unsigned int hash_rnd) { @@ -152,7 +152,7 @@ static inline unsigned int rht_key_get_hash(struct rhashtable *ht, return hash; } -static inline unsigned int rht_key_hashfn( +static __always_inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { @@ -161,7 +161,7 @@ static inline unsigned int rht_key_hashfn( return rht_bucket_index(tbl, hash); } -static inline unsigned int rht_head_hashfn( +static __always_inline unsigned int rht_head_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he, const struct rhashtable_params params) { @@ -272,13 +272,13 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert( rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_rcu(p, ht) \ - rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) + rcu_dereference_all_check(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_bucket(p, tbl, hash) \ rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_dereference_bucket_rcu(p, tbl, hash) \ - rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) + rcu_dereference_all_check(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) @@ -373,7 +373,7 @@ static inline struct rhash_head *__rht_ptr( static inline struct rhash_head *rht_ptr_rcu( struct rhash_lock_head __rcu *const *bkt) { - return __rht_ptr(rcu_dereference(*bkt), bkt); + return __rht_ptr(rcu_dereference_all(*bkt), bkt); } static inline struct rhash_head *rht_ptr( @@ -497,7 +497,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, for (({barrier(); }), \ pos = head; \ !rht_is_a_nulls(pos); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) /** * rht_for_each_rcu - iterate over rcu hash chain @@ -513,7 +513,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, for (({barrier(); }), \ pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \ !rht_is_a_nulls(pos); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) /** * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head @@ -560,7 +560,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, * list returned by rhltable_lookup. */ #define rhl_for_each_rcu(pos, list) \ - for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + for (pos = list; pos; pos = rcu_dereference_all(pos->next)) /** * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type @@ -574,7 +574,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, */ #define rhl_for_each_entry_rcu(tpos, pos, list, member) \ for (pos = list; pos && rht_entry(tpos, pos, member); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) @@ -586,7 +586,7 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, } /* Internal function, do not use. */ -static inline struct rhash_head *__rhashtable_lookup( +static __always_inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -639,7 +639,7 @@ restart: * * Returns the first entry on which the compare function returned true. */ -static inline void *rhashtable_lookup( +static __always_inline void *rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -662,7 +662,7 @@ static inline void *rhashtable_lookup( * * Returns the first entry on which the compare function returned true. */ -static inline void *rhashtable_lookup_fast( +static __always_inline void *rhashtable_lookup_fast( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -689,7 +689,7 @@ static inline void *rhashtable_lookup_fast( * * Returns the list of entries that match the given key. */ -static inline struct rhlist_head *rhltable_lookup( +static __always_inline struct rhlist_head *rhltable_lookup( struct rhltable *hlt, const void *key, const struct rhashtable_params params) { @@ -702,7 +702,7 @@ static inline struct rhlist_head *rhltable_lookup( * function returns the existing element already in hashes if there is a clash, * otherwise it returns an error via ERR_PTR(). */ -static inline void *__rhashtable_insert_fast( +static __always_inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { @@ -825,7 +825,7 @@ out_unlock: * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhashtable_insert_fast( +static __always_inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -854,7 +854,7 @@ static inline int rhashtable_insert_fast( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhltable_insert_key( +static __always_inline int rhltable_insert_key( struct rhltable *hlt, const void *key, struct rhlist_head *list, const struct rhashtable_params params) { @@ -877,7 +877,7 @@ static inline int rhltable_insert_key( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhltable_insert( +static __always_inline int rhltable_insert( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { @@ -902,7 +902,7 @@ static inline int rhltable_insert( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhashtable_lookup_insert_fast( +static __always_inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -929,7 +929,7 @@ static inline int rhashtable_lookup_insert_fast( * object if it exists, NULL if it did not and the insertion was successful, * and an ERR_PTR otherwise. */ -static inline void *rhashtable_lookup_get_insert_fast( +static __always_inline void *rhashtable_lookup_get_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -956,7 +956,7 @@ static inline void *rhashtable_lookup_get_insert_fast( * * Returns zero on success. */ -static inline int rhashtable_lookup_insert_key( +static __always_inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -982,7 +982,7 @@ static inline int rhashtable_lookup_insert_key( * object if it exists, NULL if it does not and the insertion was successful, * and an ERR_PTR otherwise. */ -static inline void *rhashtable_lookup_get_insert_key( +static __always_inline void *rhashtable_lookup_get_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -992,7 +992,7 @@ static inline void *rhashtable_lookup_get_insert_key( } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast_one( +static __always_inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) @@ -1074,7 +1074,7 @@ unlocked: } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static __always_inline int __rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { @@ -1115,7 +1115,7 @@ static inline int __rhashtable_remove_fast( * * Returns zero on success, -ENOENT if the entry could not be found. */ -static inline int rhashtable_remove_fast( +static __always_inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -1137,7 +1137,7 @@ static inline int rhashtable_remove_fast( * * Returns zero on success, -ENOENT if the entry could not be found. */ -static inline int rhltable_remove( +static __always_inline int rhltable_remove( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { @@ -1145,7 +1145,7 @@ static inline int rhltable_remove( } /* Internal function, please use rhashtable_replace_fast() instead */ -static inline int __rhashtable_replace_fast( +static __always_inline int __rhashtable_replace_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) @@ -1208,7 +1208,7 @@ unlocked: * Returns zero on success, -ENOENT if the entry could not be found, * -EINVAL if hash is not the same for the old and new objects. */ -static inline int rhashtable_replace_fast( +static __always_inline int rhashtable_replace_fast( struct rhashtable *ht, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 6cd020eea37a..daa92a58585d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -394,18 +394,8 @@ typedef int __bitwise rmap_t; /* The anonymous (sub)page is exclusive to a single process. */ #define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) -/* - * Internally, we're using an enum to specify the granularity. We make the - * compiler emit specialized code for each granularity. - */ -enum rmap_level { - RMAP_LEVEL_PTE = 0, - RMAP_LEVEL_PMD, - RMAP_LEVEL_PUD, -}; - -static inline void __folio_rmap_sanity_checks(const struct folio *folio, - const struct page *page, int nr_pages, enum rmap_level level) +static __always_inline void __folio_rmap_sanity_checks(const struct folio *folio, + const struct page *page, int nr_pages, enum pgtable_level level) { /* hugetlb folios are handled separately. */ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); @@ -427,18 +417,18 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: break; - case RMAP_LEVEL_PMD: + case PGTABLE_LEVEL_PMD: /* * We don't support folios larger than a single PMD yet. So - * when RMAP_LEVEL_PMD is set, we assume that we are creating + * when PGTABLE_LEVEL_PMD is set, we assume that we are creating * a single "entire" mapping of the folio. */ VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); break; - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PUD: /* * Assume that we are creating a single "entire" mapping of the * folio. @@ -447,7 +437,7 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio); break; default: - VM_WARN_ON_ONCE(true); + BUILD_BUG(); } /* @@ -567,14 +557,14 @@ static inline void hugetlb_remove_rmap(struct folio *folio) static __always_inline void __folio_dup_file_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, - enum rmap_level level) + enum pgtable_level level) { const int orig_nr_pages = nr_pages; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (!folio_test_large(folio)) { atomic_inc(&folio->_mapcount); break; @@ -587,11 +577,13 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, } folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; - case RMAP_LEVEL_PMD: - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); folio_inc_large_mapcount(folio, dst_vma); break; + default: + BUILD_BUG(); } } @@ -609,13 +601,13 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, static inline void folio_dup_file_rmap_ptes(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, PGTABLE_LEVEL_PTE); } static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, 1, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, 1, dst_vma, PGTABLE_LEVEL_PTE); } /** @@ -632,7 +624,7 @@ static inline void folio_dup_file_rmap_pmd(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, PGTABLE_LEVEL_PTE); #else WARN_ON_ONCE(true); #endif @@ -640,7 +632,7 @@ static inline void folio_dup_file_rmap_pmd(struct folio *folio, static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, - struct vm_area_struct *src_vma, enum rmap_level level) + struct vm_area_struct *src_vma, enum pgtable_level level) { const int orig_nr_pages = nr_pages; bool maybe_pinned; @@ -665,7 +657,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * copying if the folio maybe pinned. */ switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (unlikely(maybe_pinned)) { for (i = 0; i < nr_pages; i++) if (PageAnonExclusive(page + i)) @@ -687,8 +679,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, } while (page++, --nr_pages > 0); folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; - case RMAP_LEVEL_PMD: - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: if (PageAnonExclusive(page)) { if (unlikely(maybe_pinned)) return -EBUSY; @@ -697,6 +689,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, atomic_inc(&folio->_entire_mapcount); folio_inc_large_mapcount(folio, dst_vma); break; + default: + BUILD_BUG(); } return 0; } @@ -730,7 +724,7 @@ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, struct vm_area_struct *src_vma) { return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma, - src_vma, RMAP_LEVEL_PTE); + src_vma, PGTABLE_LEVEL_PTE); } static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, @@ -738,7 +732,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, struct vm_area_struct *src_vma) { return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma, - RMAP_LEVEL_PTE); + PGTABLE_LEVEL_PTE); } /** @@ -770,7 +764,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma, - src_vma, RMAP_LEVEL_PMD); + src_vma, PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; @@ -778,7 +772,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, } static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) + struct page *page, int nr_pages, enum pgtable_level level) { VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); @@ -873,7 +867,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, static inline int folio_try_share_anon_rmap_pte(struct folio *folio, struct page *page) { - return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); + return __folio_try_share_anon_rmap(folio, page, 1, PGTABLE_LEVEL_PTE); } /** @@ -904,7 +898,7 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, - RMAP_LEVEL_PMD); + PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; @@ -928,6 +922,11 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, /* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) +/* Result flags */ + +/* The page is mapped across page table boundary */ +#define PVMW_PGTABLE_CROSSED (1 << 16) + struct page_vma_mapped_walk { unsigned long pfn; unsigned long nr_pages; diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index fa9f1021541e..ede4c6bf6f22 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -44,6 +44,16 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock) return READ_ONCE(lock->owner) != NULL; } +#ifdef CONFIG_RT_MUTEXES +#define RT_MUTEX_HAS_WAITERS 1UL + +static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock) +{ + unsigned long owner = (unsigned long) READ_ONCE(lock->owner); + + return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); +} +#endif extern void rt_mutex_base_init(struct rt_mutex_base *rtb); /** diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 3b4c36705a9b..3c5689356004 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1160,6 +1160,8 @@ struct rtsx_cr_option { bool ocp_en; u8 sd_400mA_ocp_thd; u8 sd_800mA_ocp_thd; + u8 sd_cd_reverse_en; + u8 sd_wp_reverse_en; }; /* diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 6f8a4965f9b9..29f6ceb98d74 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -158,6 +158,7 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) static inline void sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len, unsigned int offset) { + VM_WARN_ON_ONCE(!page_range_contiguous(page, ALIGN(len + offset, PAGE_SIZE) / PAGE_SIZE)); sg_assign_page(sg, page); sg->offset = offset; sg->length = len; @@ -600,7 +601,7 @@ void __sg_page_iter_start(struct sg_page_iter *piter, */ static inline struct page *sg_page_iter_page(struct sg_page_iter *piter) { - return nth_page(sg_page(piter->sg), piter->sg_pgoffset); + return sg_page(piter->sg) + piter->sg_pgoffset; } /** diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 6eb65ceed213..b7fafe999073 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -8,6 +8,20 @@ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ +static inline unsigned long __mm_flags_get_dumpable(struct mm_struct *mm) +{ + /* + * By convention, dumpable bits are contained in first 32 bits of the + * bitmap, so we can simply access this first unsigned long directly. + */ + return __mm_flags_get_word(mm); +} + +static inline void __mm_flags_set_mask_dumpable(struct mm_struct *mm, int value) +{ + __mm_flags_set_mask_bits_word(mm, MMF_DUMPABLE_MASK, value); +} + extern void set_dumpable(struct mm_struct *mm, int value); /* * This returns the actual value of the suid_dumpable flag. For things @@ -22,7 +36,9 @@ static inline int __get_dumpable(unsigned long mm_flags) static inline int get_dumpable(struct mm_struct *mm) { - return __get_dumpable(mm->flags); + unsigned long flags = __mm_flags_get_dumpable(mm); + + return __get_dumpable(flags); } #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 2201da0afecc..0232d983b715 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -178,7 +178,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm) #endif extern void arch_pick_mmap_layout(struct mm_struct *mm, - struct rlimit *rlim_stack); + const struct rlimit *rlim_stack); unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, @@ -211,7 +211,7 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long flags, vm_flags_t vm_flags); #else static inline void arch_pick_mmap_layout(struct mm_struct *mm, - struct rlimit *rlim_stack) {} + const struct rlimit *rlim_stack) {} #endif static inline bool in_vfork(struct task_struct *tsk) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 34d6a0e108c3..525aa2a632b2 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -210,9 +210,8 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * pins the final release of task.io_context. Also protects ->cpuset and * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), - * neither inside nor outside. + * Nests inside of read_lock(&tasklist_lock). It must not be nested with + * write_lock_irq(&tasklist_lock), neither inside nor outside. */ static inline void task_lock(struct task_struct *p) { diff --git a/include/linux/security.h b/include/linux/security.h index bd33f194c94a..92ac3f27b973 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -391,7 +391,7 @@ int security_dentry_init_security(struct dentry *dentry, int mode, const char **xattr_name, struct lsm_context *lsmcxt); int security_dentry_create_files_as(struct dentry *dentry, int mode, - struct qstr *name, + const struct qstr *name, const struct cred *old, struct cred *new); int security_path_notify(const struct path *path, u64 mask, @@ -872,7 +872,7 @@ static inline int security_dentry_init_security(struct dentry *dentry, } static inline int security_dentry_create_files_as(struct dentry *dentry, - int mode, struct qstr *name, + int mode, const struct qstr *name, const struct cred *old, struct cred *new) { diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 84b4648ead7e..666430b47899 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -788,6 +788,19 @@ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned lo spin_unlock_irqrestore(&up->lock, flags); } +DEFINE_GUARD(uart_port_lock, struct uart_port *, uart_port_lock(_T), uart_port_unlock(_T)); +DEFINE_GUARD_COND(uart_port_lock, _try, uart_port_trylock(_T)); + +DEFINE_GUARD(uart_port_lock_irq, struct uart_port *, uart_port_lock_irq(_T), + uart_port_unlock_irq(_T)); + +DEFINE_LOCK_GUARD_1(uart_port_lock_irqsave, struct uart_port, + uart_port_lock_irqsave(_T->lock, &_T->flags), + uart_port_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags); +DEFINE_LOCK_GUARD_1_COND(uart_port_lock_irqsave, _try, + uart_port_trylock_irqsave(_T->lock, &_T->flags)); + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index 6dfd05ef5c2d..03ba4dab2ef7 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -96,7 +96,7 @@ struct shdma_ops { int (*desc_setup)(struct shdma_chan *, struct shdma_desc *, dma_addr_t, dma_addr_t, size_t *); int (*set_slave)(struct shdma_chan *, int, dma_addr_t, bool); - void (*setup_xfer)(struct shdma_chan *, int); + int (*setup_xfer)(struct shdma_chan *, int); void (*start_xfer)(struct shdma_chan *, struct shdma_desc *); struct shdma_desc *(*embedded_desc)(void *, int); bool (*chan_irq)(struct shdma_chan *, int); diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 6d0f9c599ff7..0e47465ef0fd 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -99,9 +99,9 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts); #ifdef CONFIG_SHMEM -bool shmem_mapping(struct address_space *mapping); +bool shmem_mapping(const struct address_space *mapping); #else -static inline bool shmem_mapping(struct address_space *mapping) +static inline bool shmem_mapping(const struct address_space *mapping) { return false; } diff --git a/include/linux/slab.h b/include/linux/slab.h index d5a8ab98035c..cf443f064a66 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -335,6 +335,37 @@ struct kmem_cache_args { * %NULL means no constructor. */ void (*ctor)(void *); + /** + * @sheaf_capacity: Enable sheaves of given capacity for the cache. + * + * With a non-zero value, allocations from the cache go through caching + * arrays called sheaves. Each cpu has a main sheaf that's always + * present, and a spare sheaf that may be not present. When both become + * empty, there's an attempt to replace an empty sheaf with a full sheaf + * from the per-node barn. + * + * When no full sheaf is available, and gfp flags allow blocking, a + * sheaf is allocated and filled from slab(s) using bulk allocation. + * Otherwise the allocation falls back to the normal operation + * allocating a single object from a slab. + * + * Analogically when freeing and both percpu sheaves are full, the barn + * may replace it with an empty sheaf, unless it's over capacity. In + * that case a sheaf is bulk freed to slab pages. + * + * The sheaves do not enforce NUMA placement of objects, so allocations + * via kmem_cache_alloc_node() with a node specified other than + * NUMA_NO_NODE will bypass them. + * + * Bulk allocation and free operations also try to use the cpu sheaves + * and barn, but fallback to using slab pages directly. + * + * When slub_debug is enabled for the cache, the sheaf_capacity argument + * is ignored. + * + * %0 means no sheaves will be created. + */ + unsigned int sheaf_capacity; }; struct kmem_cache *__kmem_cache_create_args(const char *name, @@ -465,11 +496,16 @@ int kmem_cache_shrink(struct kmem_cache *s); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc_noprof(const void *objp, size_t new_size, - gfp_t flags) __realloc_size(2); -#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) +void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size, + unsigned long align, + gfp_t flags, int nid) __realloc_size(2); +#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE) +#define krealloc_node_align(...) alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__)) +#define krealloc_node(_o, _s, _f, _n) krealloc_node_align(_o, _s, 1, _f, _n) +#define krealloc(...) krealloc_node(__VA_ARGS__, NUMA_NO_NODE) void kfree(const void *objp); +void kfree_nolock(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); @@ -798,6 +834,22 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); + +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size); + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); + +void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *cachep, gfp_t gfp, + struct slab_sheaf *sheaf) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_from_sheaf(...) \ + alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__)) + +unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf); + /* * These macros allow declaring a kmem_buckets * parameter alongside size, which * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call @@ -910,6 +962,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f } #define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) +void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node); +#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__)) + #define kmem_buckets_alloc(_b, _size, _flags) \ alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) @@ -1041,18 +1096,20 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) #define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) -void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); -#define kvmalloc_node_noprof(size, flags, node) \ - __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) -#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) - -#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) -#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) +void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, + gfp_t flags, int node) __alloc_size(1); +#define kvmalloc_node_align_noprof(_size, _align, _flags, _node) \ + __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node) +#define kvmalloc_node_align(...) \ + alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__)) +#define kvmalloc_node(_s, _f, _n) kvmalloc_node_align(_s, 1, _f, _n) +#define kvmalloc(...) kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE) #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) + #define kmem_buckets_valloc(_b, _size, _flags) \ - alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) + alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE)) static inline __alloc_size(1, 2) void * kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) @@ -1062,7 +1119,7 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - return kvmalloc_node_noprof(bytes, flags, node); + return kvmalloc_node_align_noprof(bytes, 1, flags, node); } #define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) @@ -1073,9 +1130,12 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) -void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) - __realloc_size(2); -#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) +void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, + gfp_t flags, int nid) __realloc_size(2); +#define kvrealloc_node_align(...) \ + alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__)) +#define kvrealloc_node(_p, _s, _f, _n) kvrealloc_node_align(_p, _s, 1, _f, _n) +#define kvrealloc(...) kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE) extern void kvfree(const void *addr); DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f179700fecaf..ada65b58bc4c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -275,12 +275,27 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * { struct srcu_ctr __percpu *retval; + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); retval = __srcu_read_lock_fast(ssp); rcu_try_lock_acquire(&ssp->dep_map); return retval; } +/* + * Used by tracing, cannot be traced and cannot call lockdep. + * See srcu_read_lock_fast() for more information. + */ +static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_struct *ssp) + __acquires(ssp) +{ + struct srcu_ctr __percpu *retval; + + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + retval = __srcu_read_lock_fast(ssp); + return retval; +} + /** * srcu_down_read_fast - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -295,6 +310,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp) { WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_down_read_fast()."); srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); return __srcu_read_lock_fast(ssp); } @@ -389,6 +405,18 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); srcu_lock_release(&ssp->dep_map); __srcu_read_unlock_fast(ssp, scp); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); +} + +/* + * Used by tracing, cannot be traced and cannot call lockdep. + * See srcu_read_unlock_fast() for more information. + */ +static inline void srcu_read_unlock_fast_notrace(struct srcu_struct *ssp, + struct srcu_ctr __percpu *scp) __releases(ssp) +{ + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + __srcu_read_unlock_fast(ssp, scp); } /** @@ -405,6 +433,7 @@ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); __srcu_read_unlock_fast(ssp, scp); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_up_read_fast()."); } /** @@ -486,4 +515,9 @@ DEFINE_LOCK_GUARD_1(srcu_fast, struct srcu_struct, srcu_read_unlock_fast(_T->lock, _T->scp), struct srcu_ctr __percpu *scp) +DEFINE_LOCK_GUARD_1(srcu_fast_notrace, struct srcu_struct, + _T->scp = srcu_read_lock_fast_notrace(_T->lock), + srcu_read_unlock_fast_notrace(_T->lock, _T->scp), + struct srcu_ctr __percpu *scp) + #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index bf44d8d1e69e..42098e0fa0b7 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -232,23 +232,40 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * srcu_read_unlock_fast(). * * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because they - * are a single instruction, or because they are a read-modify-write atomic - * operation, depending on the whims of the architecture. + * critical sections either because they disables interrupts, because + * they are a single instruction, or because they are read-modify-write + * atomic operations, depending on the whims of the architecture. + * This matters because the SRCU-fast grace-period mechanism uses either + * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU, + * *not* SRCU, in order to eliminate the need for the read-side smp_mb() + * invocations that are used by srcu_read_lock() and srcu_read_unlock(). + * The __srcu_read_unlock_fast() function also relies on this same RCU + * (again, *not* SRCU) trick to eliminate the need for smp_mb(). + * + * The key point behind this RCU trick is that if any part of a given + * RCU reader precedes the beginning of a given RCU grace period, then + * the entirety of that RCU reader and everything preceding it happens + * before the end of that same RCU grace period. Similarly, if any part + * of a given RCU reader follows the end of a given RCU grace period, + * then the entirety of that RCU reader and everything following it + * happens after the beginning of that same RCU grace period. Therefore, + * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z + * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU + * read-side critical section from the viewpoint of the SRCU grace period. + * This is all the ordering that is required, hence no calls to smp_mb(). * * This means that __srcu_read_lock_fast() is not all that fast * on architectures that support NMIs but do not supply NMI-safe * implementations of this_cpu_inc(). */ -static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp) { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - this_cpu_inc(scp->srcu_locks.counter); /* Y */ + this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader. else - atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */ + atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. barrier(); /* Avoid leaking the critical section. */ return scp; } @@ -259,23 +276,17 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct * different CPU than that which was incremented by the corresponding * srcu_read_lock_fast(), but it must be within the same task. * - * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because they - * are a single instruction, or because they are a read-modify-write atomic - * operation, depending on the whims of the architecture. - * - * This means that __srcu_read_unlock_fast() is not all that fast - * on architectures that support NMIs but do not supply NMI-safe - * implementations of this_cpu_inc(). + * Please see the __srcu_read_lock_fast() function's header comment for + * information on implicit RCU readers and NMI safety. */ -static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +static inline void notrace +__srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) { barrier(); /* Avoid leaking the critical section. */ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ + this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. else - atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); + atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader. } void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index f6aeed07fe04..891f6173c951 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -23,43 +23,30 @@ extern unsigned int nlm_debug; #define dprintk(fmt, ...) \ dfprintk(FACILITY, fmt, ##__VA_ARGS__) -#define dprintk_cont(fmt, ...) \ - dfprintk_cont(FACILITY, fmt, ##__VA_ARGS__) #define dprintk_rcu(fmt, ...) \ dfprintk_rcu(FACILITY, fmt, ##__VA_ARGS__) -#define dprintk_rcu_cont(fmt, ...) \ - dfprintk_rcu_cont(FACILITY, fmt, ##__VA_ARGS__) #undef ifdebug #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) -# define dfprintk(fac, fmt, ...) \ -do { \ - ifdebug(fac) \ - printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ -} while (0) +# if IS_ENABLED(CONFIG_SUNRPC_DEBUG_TRACE) +# define __sunrpc_printk(fmt, ...) trace_printk(fmt, ##__VA_ARGS__) +# else +# define __sunrpc_printk(fmt, ...) printk(KERN_DEFAULT fmt, ##__VA_ARGS__) +# endif -# define dfprintk_cont(fac, fmt, ...) \ +# define dfprintk(fac, fmt, ...) \ do { \ ifdebug(fac) \ - printk(KERN_CONT fmt, ##__VA_ARGS__); \ + __sunrpc_printk(fmt, ##__VA_ARGS__); \ } while (0) # define dfprintk_rcu(fac, fmt, ...) \ do { \ ifdebug(fac) { \ rcu_read_lock(); \ - printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ - rcu_read_unlock(); \ - } \ -} while (0) - -# define dfprintk_rcu_cont(fac, fmt, ...) \ -do { \ - ifdebug(fac) { \ - rcu_read_lock(); \ - printk(KERN_CONT fmt, ##__VA_ARGS__); \ + __sunrpc_printk(fmt, ##__VA_ARGS__); \ rcu_read_unlock(); \ } \ } while (0) @@ -68,7 +55,6 @@ do { \ #else # define ifdebug(fac) if (0) # define dfprintk(fac, fmt, ...) do {} while (0) -# define dfprintk_cont(fac, fmt, ...) do {} while (0) # define dfprintk_rcu(fac, fmt, ...) do {} while (0) # define RPC_IFDEBUG(x) #endif diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 40cbe81360ed..5506d20857c3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -196,7 +196,7 @@ struct svc_rqst { struct xdr_buf rq_arg; struct xdr_stream rq_arg_stream; struct xdr_stream rq_res_stream; - struct page *rq_scratch_page; + struct folio *rq_scratch_folio; struct xdr_buf rq_res; unsigned long rq_maxpages; /* num of entries in rq_pages */ struct page * *rq_pages; @@ -503,7 +503,7 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; xdr_init_decode(xdr, buf, argv->iov_base, NULL); - xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); + xdr_set_scratch_folio(xdr, rqstp->rq_scratch_folio); } /** diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 369a89aea186..da2a2531e110 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -104,6 +104,9 @@ enum { * it has access to. It is NOT counted * in ->sv_tmpcnt. */ + XPT_RPCB_UNREG, /* transport that needs unregistering + * with rpcbind (TCP, UDP) on destroy + */ }; /* @@ -165,7 +168,8 @@ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred); -void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net, + bool unregister); void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8a9ec617cf66..152597750f55 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -288,16 +288,16 @@ xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) } /** - * xdr_set_scratch_page - Attach a scratch buffer for decoding data + * xdr_set_scratch_folio - Attach a scratch buffer for decoding data * @xdr: pointer to xdr_stream struct - * @page: an anonymous page + * @page: an anonymous folio * * See xdr_set_scratch_buffer(). */ static inline void -xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) +xdr_set_scratch_folio(struct xdr_stream *xdr, struct folio *folio) { - xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); + xdr_set_scratch_buffer(xdr, folio_address(folio), folio_size(folio)); } /** @@ -721,7 +721,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) * @len: size of buffer pointed to by @ptr * * Return values: - * On success, returns size of object stored in @ptr + * %0 on success * %-EBADMSG on XDR buffer overflow */ static inline ssize_t @@ -732,7 +732,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len) if (unlikely(!p)) return -EBADMSG; xdr_decode_opaque_fixed(p, ptr, len); - return len; + return 0; } /** diff --git a/include/linux/swap.h b/include/linux/swap.h index 7012a0f758d8..e818fbade1e2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -236,40 +236,6 @@ enum { #define SWAP_CONT_MAX 0x7f /* Max count */ /* - * We use this to track usage of a cluster. A cluster is a block of swap disk - * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All - * free clusters are organized into a list. We fetch an entry from the list to - * get a free cluster. - * - * The flags field determines if a cluster is free. This is - * protected by cluster lock. - */ -struct swap_cluster_info { - spinlock_t lock; /* - * Protect swap_cluster_info fields - * other than list, and swap_info_struct->swap_map - * elements corresponding to the swap cluster. - */ - u16 count; - u8 flags; - u8 order; - struct list_head list; -}; - -/* All on-list cluster must have a non-zero flag. */ -enum swap_cluster_flags { - CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ - CLUSTER_FLAG_FREE, - CLUSTER_FLAG_NONFULL, - CLUSTER_FLAG_FRAG, - /* Clusters with flags above are allocatable */ - CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, - CLUSTER_FLAG_FULL, - CLUSTER_FLAG_DISCARD, - CLUSTER_FLAG_MAX, -}; - -/* * The first page in the swap file is the swap header, which is always marked * bad to prevent it from being allocated as an entry. This also prevents the * cluster to which it belongs being marked free. Therefore 0 is safe to use as @@ -310,7 +276,6 @@ struct swap_info_struct { /* list of cluster that contains at least one free slot */ struct list_head frag_clusters[SWAP_NR_ORDERS]; /* list of cluster that are fragmented or contented */ - atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS]; unsigned int pages; /* total of usable pages of swap */ atomic_long_t inuse_pages; /* number of those currently in use */ struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */ @@ -321,11 +286,8 @@ struct swap_info_struct { struct completion comp; /* seldom referenced */ spinlock_t lock; /* * protect map scan related fields like - * swap_map, lowest_bit, highest_bit, - * inuse_pages, cluster_next, - * cluster_nr, lowest_alloc, - * highest_alloc, free/discard cluster - * list. other fields are only changed + * swap_map, inuse_pages and all cluster + * lists. other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If @@ -517,10 +479,7 @@ extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); -struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; -extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); -extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); sector_t swap_folio_sector(struct folio *folio); @@ -530,11 +489,6 @@ static inline void put_swap_device(struct swap_info_struct *si) } #else /* CONFIG_SWAP */ -static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) -{ - return NULL; -} - static inline struct swap_info_struct *get_swap_device(swp_entry_t entry) { return NULL; diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h deleted file mode 100644 index 5cf6f6f82aa7..000000000000 --- a/include/linux/tca6416_keypad.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tca6416 keypad platform support - * - * Copyright (C) 2010 Texas Instruments - * - * Author: Sriramakrishnan <srk@ti.com> - */ - -#ifndef _TCA6416_KEYS_H -#define _TCA6416_KEYS_H - -#include <linux/types.h> - -struct tca6416_button { - /* Configuration parameters */ - int code; /* input event code (KEY_*, SW_*) */ - int active_low; - int type; /* input event type (EV_KEY, EV_SW) */ -}; - -struct tca6416_keys_platform_data { - struct tca6416_button *buttons; - int nbuttons; - unsigned int rep:1; /* enable input subsystem auto repeat */ - uint16_t pinmask; - uint16_t invert; - int use_polling; /* use polling if Interrupt is not connected*/ -}; -#endif diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 75247486616b..0ba112175bb3 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -213,7 +213,7 @@ enum tb_link_width { * queried first * @service_ids: Used to generate IDs for the services * @in_hopids: Input HopIDs for DMA tunneling - * @out_hopids; Output HopIDs for DMA tunneling + * @out_hopids: Output HopIDs for DMA tunneling * @local_property_block: Local block of properties * @local_property_block_gen: Generation of @local_property_block * @local_property_block_len: Length of the @local_property_block in dwords @@ -356,7 +356,7 @@ int tb_xdomain_request(struct tb_xdomain *xd, const void *request, unsigned int timeout_msec); /** - * tb_protocol_handler - Protocol specific handler + * struct tb_protocol_handler - Protocol specific handler * @uuid: XDomain messages with this UUID are dispatched to this handler * @callback: Callback called with the XDomain message. Returning %1 * here tells the XDomain core that the message was handled @@ -437,7 +437,7 @@ static inline struct tb_service *tb_to_service(struct device *dev) } /** - * tb_service_driver - Thunderbolt service driver + * struct tb_service_driver - Thunderbolt service driver * @driver: Driver structure * @probe: Called when the driver is probed * @remove: Called when the driver is removed (optional) @@ -519,6 +519,7 @@ struct tb_nhi { * @head: Head of the ring (write next descriptor here) * @tail: Tail of the ring (complete next descriptor here) * @descriptors: Allocated descriptors for this ring + * @descriptors_dma: DMA address of descriptors for this ring * @queue: Queue holding frames to be transferred over this ring * @in_flight: Queue holding frames that are currently in flight * @work: Interrupt work structure @@ -571,12 +572,12 @@ typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); /** * enum ring_desc_flags - Flags for DMA ring descriptor - * %RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) - * %RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) - * %RING_DESC_COMPLETED: Descriptor completed (set by NHI) - * %RING_DESC_POSTED: Always set this - * %RING_DESC_BUFFER_OVERRUN: RX buffer overrun - * %RING_DESC_INTERRUPT: Request an interrupt on completion + * @RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) + * @RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) + * @RING_DESC_COMPLETED: Descriptor completed (set by NHI) + * @RING_DESC_POSTED: Always set this + * @RING_DESC_BUFFER_OVERRUN: RX buffer overrun + * @RING_DESC_INTERRUPT: Request an interrupt on completion */ enum ring_desc_flags { RING_DESC_ISOCH = 0x1, @@ -636,7 +637,7 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); * If ring_stop() is called after the packet has been enqueued * @frame->callback will be called with canceled set to true. * - * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + * Return: %-ESHUTDOWN if ring_stop() has been called, %0 otherwise. */ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) { @@ -657,7 +658,7 @@ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) * If ring_stop() is called after the packet has been enqueued @frame->callback * will be called with canceled set to true. * - * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + * Return: %-ESHUTDOWN if ring_stop has been called, %0 otherwise. */ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) { @@ -675,6 +676,8 @@ void tb_ring_poll_complete(struct tb_ring *ring); * * Use this function when you are mapping DMA for buffers that are * passed to the ring for sending/receiving. + * + * Return: Pointer to device used for DMA mapping. */ static inline struct device *tb_ring_dma_device(struct tb_ring *ring) { diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index a93ed5ac3226..557780fe1c77 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -21,10 +21,10 @@ (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int))) struct trace_seq { - char buffer[TRACE_SEQ_BUFFER_SIZE]; struct seq_buf seq; size_t readpos; int full; + char buffer[TRACE_SEQ_BUFFER_SIZE]; }; static inline void diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 332ddb93603e..660c254f1efe 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -270,4 +270,18 @@ static inline void tty_port_tty_vhangup(struct tty_port *port) __tty_port_tty_hangup(port, false, false); } +#ifdef CONFIG_TTY +void tty_kref_put(struct tty_struct *tty); +__DEFINE_CLASS_IS_CONDITIONAL(tty_port_tty, true); +__DEFINE_UNLOCK_GUARD(tty_port_tty, struct tty_struct, tty_kref_put(_T->lock)); +static inline class_tty_port_tty_t class_tty_port_tty_constructor(struct tty_port *tport) +{ + class_tty_port_tty_t _t = { + .lock = tty_port_tty_get(tport), + }; + return _t; +} +#define scoped_tty() ((struct tty_struct *)(__guard_ptr(tty_port_tty)(&scope))) +#endif + #endif diff --git a/include/linux/usb.h b/include/linux/usb.h index 9d662c6abb4d..e85105939af8 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -636,6 +636,8 @@ struct usb3_lpm_parameters { * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) + * @offload_at_suspend: offload activities during suspend is enabled. + * @offload_usage: number of offload activities happening on this usb device. * @slot_id: Slot ID assigned by xHCI * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout. @@ -724,6 +726,8 @@ struct usb_device { unsigned do_remote_wakeup:1; unsigned reset_resume:1; unsigned port_is_suspended:1; + unsigned offload_at_suspend:1; + int offload_usage; enum usb_link_tunnel_mode tunnel_mode; struct device_link *usb4_link; @@ -841,6 +845,20 @@ static inline void usb_mark_last_busy(struct usb_device *udev) { } #endif +#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) +int usb_offload_get(struct usb_device *udev); +int usb_offload_put(struct usb_device *udev); +bool usb_offload_check(struct usb_device *udev); +#else + +static inline int usb_offload_get(struct usb_device *udev) +{ return 0; } +static inline int usb_offload_put(struct usb_device *udev) +{ return 0; } +static inline bool usb_offload_check(struct usb_device *udev) +{ return false; } +#endif + extern int usb_disable_lpm(struct usb_device *udev); extern void usb_enable_lpm(struct usb_device *udev); /* Same as above, but these functions lock/unlock the bandwidth_mutex. */ @@ -2039,6 +2057,12 @@ static inline u16 usb_maxpacket(struct usb_device *udev, int pipe) return usb_endpoint_maxp(&ep->desc); } +u32 usb_endpoint_max_periodic_payload(struct usb_device *udev, + const struct usb_host_endpoint *ep); + +bool usb_endpoint_is_hs_isoc_double(struct usb_device *udev, + const struct usb_host_endpoint *ep); + /* translate USB error codes to codes user space understands */ static inline int usb_translate_errors(int error_code) { diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 0f28c5512fcb..3aaf19e77558 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -15,6 +15,7 @@ #ifndef __LINUX_USB_GADGET_H #define __LINUX_USB_GADGET_H +#include <linux/cleanup.h> #include <linux/configfs.h> #include <linux/device.h> #include <linux/errno.h> @@ -32,6 +33,7 @@ struct usb_ep; /** * struct usb_request - describes one i/o request + * @ep: The associated endpoint set by usb_ep_alloc_request(). * @buf: Buffer used for data. Always provide this; some controllers * only use PIO, or don't use DMA for some endpoints. * @dma: DMA address corresponding to 'buf'. If you don't set this @@ -98,6 +100,7 @@ struct usb_ep; */ struct usb_request { + struct usb_ep *ep; void *buf; unsigned length; dma_addr_t dma; @@ -291,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep) /*-------------------------------------------------------------------------*/ +/** + * free_usb_request - frees a usb_request object and its buffer + * @req: the request being freed + * + * This helper function frees both the request's buffer and the request object + * itself by calling usb_ep_free_request(). Its signature is designed to be used + * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request + * pointers. + */ +static inline void free_usb_request(struct usb_request *req) +{ + if (!req) + return; + + kfree(req->buf); + usb_ep_free_request(req->ep, req); +} + +DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T)) + +/*-------------------------------------------------------------------------*/ + struct usb_dcd_config_params { __u8 bU1devExitLat; /* U1 Device exit Latency */ #define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */ diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h index 2489a7857d8e..aa9ebb7e2fe0 100644 --- a/include/linux/usb/typec_mux.h +++ b/include/linux/usb/typec_mux.h @@ -3,6 +3,7 @@ #ifndef __USB_TYPEC_MUX #define __USB_TYPEC_MUX +#include <linux/err.h> #include <linux/property.h> #include <linux/usb/typec.h> @@ -24,16 +25,13 @@ struct typec_switch_desc { void *drvdata; }; +#if IS_ENABLED(CONFIG_TYPEC) + struct typec_switch *fwnode_typec_switch_get(struct fwnode_handle *fwnode); void typec_switch_put(struct typec_switch *sw); int typec_switch_set(struct typec_switch *sw, enum typec_orientation orientation); -static inline struct typec_switch *typec_switch_get(struct device *dev) -{ - return fwnode_typec_switch_get(dev_fwnode(dev)); -} - struct typec_switch_dev * typec_switch_register(struct device *parent, const struct typec_switch_desc *desc); @@ -42,6 +40,44 @@ void typec_switch_unregister(struct typec_switch_dev *sw); void typec_switch_set_drvdata(struct typec_switch_dev *sw, void *data); void *typec_switch_get_drvdata(struct typec_switch_dev *sw); +#else + +static inline struct typec_switch * +fwnode_typec_switch_get(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline void typec_switch_put(struct typec_switch *sw) {} + +static inline int typec_switch_set(struct typec_switch *sw, + enum typec_orientation orientation) +{ + return 0; +} + +static inline struct typec_switch_dev * +typec_switch_register(struct device *parent, + const struct typec_switch_desc *desc) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void typec_switch_unregister(struct typec_switch_dev *sw) {} + +static inline void typec_switch_set_drvdata(struct typec_switch_dev *sw, void *data) {} +static inline void *typec_switch_get_drvdata(struct typec_switch_dev *sw) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +#endif /* CONFIG_TYPEC */ + +static inline struct typec_switch *typec_switch_get(struct device *dev) +{ + return fwnode_typec_switch_get(dev_fwnode(dev)); +} + struct typec_mux_state { struct typec_altmode *alt; unsigned long mode; diff --git a/include/linux/usb/usbio.h b/include/linux/usb/usbio.h new file mode 100644 index 000000000000..6c4e7c246d58 --- /dev/null +++ b/include/linux/usb/usbio.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025 Intel Corporation. + * + */ + +#ifndef _LINUX_USBIO_H_ +#define _LINUX_USBIO_H_ + +#include <linux/auxiliary_bus.h> +#include <linux/byteorder/generic.h> +#include <linux/list.h> +#include <linux/types.h> + +/*********************** + * USBIO Clients Names * + ***********************/ +#define USBIO_GPIO_CLIENT "usbio-gpio" +#define USBIO_I2C_CLIENT "usbio-i2c" + +/**************** + * USBIO quirks * + ****************/ +#define USBIO_QUIRK_BULK_MAXP_63 BIT(0) /* Force bulk endpoint maxp to 63 */ +#define USBIO_QUIRK_I2C_NO_INIT_ACK BIT(8) /* Do not ask for ack on I2C init */ +#define USBIO_QUIRK_I2C_MAX_RW_LEN_52 BIT(9) /* Set i2c-adapter max r/w len to 52 */ +#define USBIO_QUIRK_I2C_USE_CHUNK_LEN BIT(10) /* Send chunk-len for split xfers */ +#define USBIO_QUIRK_I2C_ALLOW_400KHZ BIT(11) /* Override desc, allowing 400 KHz */ + +/************************** + * USBIO Type Definitions * + **************************/ + +/* USBIO Packet Type */ +#define USBIO_PKTTYPE_CTRL 1 +#define USBIO_PKTTYPE_DBG 2 +#define USBIO_PKTTYPE_GPIO 3 +#define USBIO_PKTTYPE_I2C 4 + +/* USBIO Packet Header */ +struct usbio_packet_header { + u8 type; + u8 cmd; + u8 flags; +} __packed; + +/* USBIO Control Transfer Packet */ +struct usbio_ctrl_packet { + struct usbio_packet_header header; + u8 len; + u8 data[] __counted_by(len); +} __packed; + +/* USBIO Bulk Transfer Packet */ +struct usbio_bulk_packet { + struct usbio_packet_header header; + __le16 len; + u8 data[] __counted_by(len); +} __packed; + +/* USBIO GPIO commands */ +enum usbio_gpio_cmd { + USBIO_GPIOCMD_DEINIT, + USBIO_GPIOCMD_INIT, + USBIO_GPIOCMD_READ, + USBIO_GPIOCMD_WRITE, + USBIO_GPIOCMD_END +}; + +/* USBIO GPIO config */ +enum usbio_gpio_pincfg { + USBIO_GPIO_PINCFG_DEFAULT, + USBIO_GPIO_PINCFG_PULLUP, + USBIO_GPIO_PINCFG_PULLDOWN, + USBIO_GPIO_PINCFG_PUSHPULL +}; + +#define USBIO_GPIO_PINCFG_SHIFT 2 +#define USBIO_GPIO_PINCFG_MASK (0x3 << USBIO_GPIO_PINCFG_SHIFT) +#define USBIO_GPIO_SET_PINCFG(pincfg) \ + (((pincfg) << USBIO_GPIO_PINCFG_SHIFT) & USBIO_GPIO_PINCFG_MASK) + +enum usbio_gpio_pinmode { + USBIO_GPIO_PINMOD_INVAL, + USBIO_GPIO_PINMOD_INPUT, + USBIO_GPIO_PINMOD_OUTPUT, + USBIO_GPIO_PINMOD_MAXVAL +}; + +#define USBIO_GPIO_PINMOD_MASK 0x3 +#define USBIO_GPIO_SET_PINMOD(pin) (pin & USBIO_GPIO_PINMOD_MASK) + +/************************* + * USBIO GPIO Controller * + *************************/ + +#define USBIO_MAX_GPIOBANKS 5 +#define USBIO_GPIOSPERBANK 32 + +struct usbio_gpio_bank_desc { + u8 id; + u8 pins; + __le32 bmap; +} __packed; + +struct usbio_gpio_init { + u8 bankid; + u8 config; + u8 pincount; + u8 pin; +} __packed; + +struct usbio_gpio_rw { + u8 bankid; + u8 pincount; + u8 pin; + __le32 value; +} __packed; + +/* USBIO I2C commands */ +enum usbio_i2c_cmd { + USBIO_I2CCMD_UNINIT, + USBIO_I2CCMD_INIT, + USBIO_I2CCMD_READ, + USBIO_I2CCMD_WRITE, + USBIO_I2CCMD_END +}; + +/************************ + * USBIO I2C Controller * + ************************/ + +#define USBIO_MAX_I2CBUSES 5 + +#define USBIO_I2C_BUS_ADDR_CAP_10B BIT(3) /* 10bit address support */ +#define USBIO_I2C_BUS_MODE_CAP_MASK 0x3 +#define USBIO_I2C_BUS_MODE_CAP_SM 0 /* Standard Mode */ +#define USBIO_I2C_BUS_MODE_CAP_FM 1 /* Fast Mode */ +#define USBIO_I2C_BUS_MODE_CAP_FMP 2 /* Fast Mode+ */ +#define USBIO_I2C_BUS_MODE_CAP_HSM 3 /* High-Speed Mode */ + +struct usbio_i2c_bus_desc { + u8 id; + u8 caps; +} __packed; + +struct usbio_i2c_uninit { + u8 busid; + __le16 config; +} __packed; + +struct usbio_i2c_init { + u8 busid; + __le16 config; + __le32 speed; +} __packed; + +struct usbio_i2c_rw { + u8 busid; + __le16 config; + __le16 size; + u8 data[] __counted_by(size); +} __packed; + +int usbio_control_msg(struct auxiliary_device *adev, u8 type, u8 cmd, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len); + +int usbio_bulk_msg(struct auxiliary_device *adev, u8 type, u8 cmd, bool last, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len); + +int usbio_acquire(struct auxiliary_device *adev); +void usbio_release(struct auxiliary_device *adev); +void usbio_get_txrxbuf_len(struct auxiliary_device *adev, u16 *txbuf_len, u16 *rxbuf_len); +unsigned long usbio_get_quirks(struct auxiliary_device *adev); +void usbio_acpi_bind(struct auxiliary_device *adev, const struct acpi_device_id *hids); + +#endif diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h index 45288c392f6e..005257085dcb 100644 --- a/include/linux/usb/xhci-sideband.h +++ b/include/linux/usb/xhci-sideband.h @@ -11,6 +11,7 @@ #include <linux/scatterlist.h> #include <linux/usb.h> +#include <linux/usb/hcd.h> #define EP_CTX_PER_DEV 31 /* FIXME defined twice, from xhci.h */ @@ -83,6 +84,14 @@ xhci_sideband_get_endpoint_buffer(struct xhci_sideband *sb, struct usb_host_endpoint *host_ep); struct sg_table * xhci_sideband_get_event_buffer(struct xhci_sideband *sb); + +#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) +bool xhci_sideband_check(struct usb_hcd *hcd); +#else +static inline bool xhci_sideband_check(struct usb_hcd *hcd) +{ return false; } +#endif /* IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) */ + int xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, bool ip_autoclear, u32 imod_interval, int intr_num); diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2e7a30fe6b92..4cf21d6e9cfd 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/device.h> #include <linux/interrupt.h> +#include <linux/virtio.h> #include <linux/vhost_iotlb.h> #include <linux/virtio_net.h> #include <linux/virtio_blk.h> @@ -70,11 +71,12 @@ struct vdpa_mgmt_dev; /** * struct vdpa_device - representation of a vDPA device * @dev: underlying device - * @dma_dev: the actual device that is performing DMA + * @vmap: the metadata passed to upper layer to be used for mapping * @driver_override: driver name to force a match; do not set directly, * because core frees it; use driver_set_override() to * set or clear it. * @config: the configuration ops for this device. + * @map: the map ops for this device * @cf_lock: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests @@ -87,9 +89,10 @@ struct vdpa_mgmt_dev; */ struct vdpa_device { struct device dev; - struct device *dma_dev; + union virtio_map vmap; const char *driver_override; const struct vdpa_config_ops *config; + const struct virtio_map_ops *map; struct rw_semaphore cf_lock; /* Protects get/set config */ unsigned int index; bool features_valid; @@ -352,11 +355,11 @@ struct vdpa_map_file { * @vdev: vdpa device * @asid: address space identifier * Returns integer: success (0) or error (< 0) - * @get_vq_dma_dev: Get the dma device for a specific + * @get_vq_map: Get the map metadata for a specific * virtqueue (optional) * @vdev: vdpa device * @idx: virtqueue index - * Returns pointer to structure device or error (NULL) + * Returns map token union error (NULL) * @bind_mm: Bind the device to a specific address space * so the vDPA framework can use VA when this * callback is implemented. (optional) @@ -436,7 +439,7 @@ struct vdpa_config_ops { int (*reset_map)(struct vdpa_device *vdev, unsigned int asid); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); - struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); + union virtio_map (*get_vq_map)(struct vdpa_device *vdev, u16 idx); int (*bind_mm)(struct vdpa_device *vdev, struct mm_struct *mm); void (*unbind_mm)(struct vdpa_device *vdev); @@ -446,6 +449,7 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + const struct virtio_map_ops *map, unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va); @@ -457,6 +461,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * @member: the name of struct vdpa_device within the @dev_struct * @parent: the parent device * @config: the bus operations that is supported by this device + * @map: the map operations that is supported by this device * @ngroups: the number of virtqueue groups supported by this device * @nas: the number of address spaces * @name: name of the vdpa device @@ -464,10 +469,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * * Return allocated data structure or ERR_PTR upon error */ -#define vdpa_alloc_device(dev_struct, member, parent, config, ngroups, nas, \ - name, use_va) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, map, \ + ngroups, nas, name, use_va) \ container_of((__vdpa_alloc_device( \ - parent, config, ngroups, nas, \ + parent, config, map, ngroups, nas, \ (sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ dev_struct, member))), name, use_va)), \ @@ -520,9 +525,9 @@ static inline void vdpa_set_drvdata(struct vdpa_device *vdev, void *data) dev_set_drvdata(&vdev->dev, data); } -static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) +static inline union virtio_map vdpa_get_map(struct vdpa_device *vdev) { - return vdev->dma_dev; + return vdev->vmap; } static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index db31fc6f4f1f..96c66126c074 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -41,6 +41,15 @@ struct virtqueue { void *priv; }; +struct vduse_iova_domain; + +union virtio_map { + /* Device that performs DMA */ + struct device *dma_dev; + /* VDUSE specific mapping data */ + struct vduse_iova_domain *iova_domain; +}; + int virtqueue_add_outbuf(struct virtqueue *vq, struct scatterlist sg[], unsigned int num, void *data, @@ -161,9 +170,11 @@ struct virtio_device { struct virtio_device_id id; const struct virtio_config_ops *config; const struct vringh_config_ops *vringh_config; + const struct virtio_map_ops *map; struct list_head vqs; VIRTIO_DECLARE_FEATURES(features); void *priv; + union virtio_map vmap; #ifdef CONFIG_VIRTIO_DEBUG struct dentry *debugfs_dir; u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS]; @@ -262,18 +273,41 @@ void unregister_virtio_driver(struct virtio_driver *drv); module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) -dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, + +void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, + union virtio_map mapping_token, + size_t size, dma_addr_t *dma_handle, + gfp_t gfp); + +void virtqueue_map_free_coherent(struct virtio_device *vdev, + union virtio_map mapping_token, + size_t size, void *vaddr, + dma_addr_t dma_handle); + +dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction dir, + unsigned long attrs); + +void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, + dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + +dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); +int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr); -bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); -void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, +bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 7427b79d6f3d..16001e9f9b39 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -139,6 +139,78 @@ struct virtio_config_ops { int (*enable_vq_after_reset)(struct virtqueue *vq); }; +/** + * struct virtio_map_ops - operations for mapping buffer for a virtio device + * Note: For transport that has its own mapping logic it must + * implements all of the operations + * @map_page: map a buffer to the device + * map: metadata for performing mapping + * page: the page that will be mapped by the device + * offset: the offset in the page for a buffer + * size: the buffer size + * dir: mapping direction + * attrs: mapping attributes + * Returns: the mapped address + * @unmap_page: unmap a buffer from the device + * map: device specific mapping map + * map_handle: the mapped address + * size: the buffer size + * dir: mapping direction + * attrs: unmapping attributes + * @sync_single_for_cpu: sync a single buffer from device to cpu + * map: metadata for performing mapping + * map_handle: the mapping address to sync + * size: the size of the buffer + * dir: synchronization direction + * @sync_single_for_device: sync a single buffer from cpu to device + * map: metadata for performing mapping + * map_handle: the mapping address to sync + * size: the size of the buffer + * dir: synchronization direction + * @alloc: alloc a coherent buffer mapping + * map: metadata for performing mapping + * size: the size of the buffer + * map_handle: the mapping address to sync + * gfp: allocation flag (GFP_XXX) + * Returns: virtual address of the allocated buffer + * @free: free a coherent buffer mapping + * map: metadata for performing mapping + * size: the size of the buffer + * vaddr: virtual address of the buffer + * map_handle: the mapping address to sync + * attrs: unmapping attributes + * @need_sync: if the buffer needs synchronization + * map: metadata for performing mapping + * map_handle: the mapped address + * Returns: whether the buffer needs synchronization + * @mapping_error: if the mapping address is error + * map: metadata for performing mapping + * map_handle: the mapped address + * @max_mapping_size: get the maximum buffer size that can be mapped + * map: metadata for performing mapping + * Returns: the maximum buffer size that can be mapped + */ +struct virtio_map_ops { + dma_addr_t (*map_page)(union virtio_map map, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_page)(union virtio_map map, dma_addr_t map_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*sync_single_for_cpu)(union virtio_map map, dma_addr_t map_handle, + size_t size, enum dma_data_direction dir); + void (*sync_single_for_device)(union virtio_map map, + dma_addr_t map_handle, size_t size, + enum dma_data_direction dir); + void *(*alloc)(union virtio_map map, size_t size, + dma_addr_t *map_handle, gfp_t gfp); + void (*free)(union virtio_map map, size_t size, void *vaddr, + dma_addr_t map_handle, unsigned long attrs); + bool (*need_sync)(union virtio_map map, dma_addr_t map_handle); + int (*mapping_error)(union virtio_map map, dma_addr_t map_handle); + size_t (*max_mapping_size)(union virtio_map map); +}; + /* If driver didn't advertise the feature, it will never appear. */ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, unsigned int fbit); diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 9b33df741b63..c97a12c1cda3 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -3,6 +3,7 @@ #define _LINUX_VIRTIO_RING_H #include <asm/barrier.h> +#include <linux/virtio.h> #include <linux/irqreturn.h> #include <uapi/linux/virtio_ring.h> @@ -79,9 +80,9 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, /* * Creates a virtqueue and allocates the descriptor ring with per - * virtqueue DMA device. + * virtqueue mapping operations. */ -struct virtqueue *vring_create_virtqueue_dma(unsigned int index, +struct virtqueue *vring_create_virtqueue_map(unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, @@ -91,7 +92,7 @@ struct virtqueue *vring_create_virtqueue_dma(unsigned int index, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name, - struct device *dma_dev); + union virtio_map map); /* * Creates a virtqueue with a standard layout but a caller-allocated diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9e15a088ba38..92f80b4d69a6 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -2,6 +2,8 @@ #ifndef VM_EVENT_ITEM_H_INCLUDED #define VM_EVENT_ITEM_H_INCLUDED +#include <linux/thread_info.h> + #ifdef CONFIG_ZONE_DMA #define DMA_ZONE(xx) xx##_DMA, #else diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2759dac6be44..eb54b7b3202f 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -197,9 +197,15 @@ extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1 extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) -void * __must_check vrealloc_noprof(const void *p, size_t size, gfp_t flags) - __realloc_size(2); -#define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__)) +void *__must_check vrealloc_node_align_noprof(const void *p, size_t size, + unsigned long align, gfp_t flags, int nid) __realloc_size(2); +#define vrealloc_node_noprof(_p, _s, _f, _nid) \ + vrealloc_node_align_noprof(_p, _s, 1, _f, _nid) +#define vrealloc_noprof(_p, _s, _f) \ + vrealloc_node_align_noprof(_p, _s, 1, _f, NUMA_NO_NODE) +#define vrealloc_node_align(...) alloc_hooks(vrealloc_node_align_noprof(__VA_ARGS__)) +#define vrealloc_node(...) alloc_hooks(vrealloc_node_noprof(__VA_ARGS__)) +#define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__)) extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/include/linux/wait.h b/include/linux/wait.h index 09855d819418..f648044466d5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -965,6 +965,18 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); __ret; \ }) +#define __wait_event_state_exclusive(wq, condition, state) \ + ___wait_event(wq, condition, state, 1, 0, schedule()) + +#define wait_event_state_exclusive(wq, condition, state) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_state_exclusive(wq, condition, state); \ + __ret; \ +}) + #define __wait_event_killable_timeout(wq_head, condition, timeout) \ ___wait_event(wq_head, ___wait_cond_timeout(condition), \ TASK_KILLABLE, 0, timeout, \ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 15a4bc4ab819..22dd4adc5667 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -362,12 +362,6 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb); struct folio *writeback_iter(struct address_space *mapping, struct writeback_control *wbc, struct folio *folio, int *error); -typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc, - void *data); - -int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, diff --git a/include/linux/zpool.h b/include/linux/zpool.h deleted file mode 100644 index 369ef068fad8..000000000000 --- a/include/linux/zpool.h +++ /dev/null @@ -1,86 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * zpool memory storage api - * - * Copyright (C) 2014 Dan Streetman - * - * This is a common frontend for the zswap compressed memory storage - * implementations. - */ - -#ifndef _ZPOOL_H_ -#define _ZPOOL_H_ - -struct zpool; - -bool zpool_has_pool(char *type); - -struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp); - -const char *zpool_get_type(struct zpool *pool); - -void zpool_destroy_pool(struct zpool *pool); - -int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, - unsigned long *handle, const int nid); - -void zpool_free(struct zpool *pool, unsigned long handle); - -void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, - void *local_copy); - -void zpool_obj_read_end(struct zpool *zpool, unsigned long handle, - void *handle_mem); - -void zpool_obj_write(struct zpool *zpool, unsigned long handle, - void *handle_mem, size_t mem_len); - -u64 zpool_get_total_pages(struct zpool *pool); - - -/** - * struct zpool_driver - driver implementation for zpool - * @type: name of the driver. - * @list: entry in the list of zpool drivers. - * @create: create a new pool. - * @destroy: destroy a pool. - * @malloc: allocate mem from a pool. - * @free: free mem from a pool. - * @sleep_mapped: whether zpool driver can sleep during map. - * @map: map a handle. - * @unmap: unmap a handle. - * @total_size: get total size of a pool. - * - * This is created by a zpool implementation and registered - * with zpool. - */ -struct zpool_driver { - char *type; - struct module *owner; - atomic_t refcount; - struct list_head list; - - void *(*create)(const char *name, gfp_t gfp); - void (*destroy)(void *pool); - - int (*malloc)(void *pool, size_t size, gfp_t gfp, - unsigned long *handle, const int nid); - void (*free)(void *pool, unsigned long handle); - - void *(*obj_read_begin)(void *pool, unsigned long handle, - void *local_copy); - void (*obj_read_end)(void *pool, unsigned long handle, - void *handle_mem); - void (*obj_write)(void *pool, unsigned long handle, - void *handle_mem, size_t mem_len); - - u64 (*total_pages)(void *pool); -}; - -void zpool_register_driver(struct zpool_driver *driver); - -int zpool_unregister_driver(struct zpool_driver *driver); - -bool zpool_can_sleep_mapped(struct zpool *pool); - -#endif |
