diff options
Diffstat (limited to 'arch')
355 files changed, 4113 insertions, 3569 deletions
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 3e61073f4b30..b9cd364e814e 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -196,40 +196,44 @@ static const struct tty_operations srmcons_ops = { static int __init srmcons_init(void) { + struct tty_driver *driver; + int err; + timer_setup(&srmcons_singleton.timer, srmcons_receive_chars, 0); - if (srm_is_registered_console) { - struct tty_driver *driver; - int err; - - driver = tty_alloc_driver(MAX_SRM_CONSOLE_DEVICES, 0); - if (IS_ERR(driver)) - return PTR_ERR(driver); - - tty_port_init(&srmcons_singleton.port); - - driver->driver_name = "srm"; - driver->name = "srm"; - driver->major = 0; /* dynamic */ - driver->minor_start = 0; - driver->type = TTY_DRIVER_TYPE_SYSTEM; - driver->subtype = SYSTEM_TYPE_SYSCONS; - driver->init_termios = tty_std_termios; - tty_set_operations(driver, &srmcons_ops); - tty_port_link_device(&srmcons_singleton.port, driver, 0); - err = tty_register_driver(driver); - if (err) { - tty_driver_kref_put(driver); - tty_port_destroy(&srmcons_singleton.port); - return err; - } - srmcons_driver = driver; - } - return -ENODEV; + if (!srm_is_registered_console) + return -ENODEV; + + driver = tty_alloc_driver(MAX_SRM_CONSOLE_DEVICES, 0); + if (IS_ERR(driver)) + return PTR_ERR(driver); + + tty_port_init(&srmcons_singleton.port); + + driver->driver_name = "srm"; + driver->name = "srm"; + driver->major = 0; /* dynamic */ + driver->minor_start = 0; + driver->type = TTY_DRIVER_TYPE_SYSTEM; + driver->subtype = SYSTEM_TYPE_SYSCONS; + driver->init_termios = tty_std_termios; + tty_set_operations(driver, &srmcons_ops); + tty_port_link_device(&srmcons_singleton.port, driver, 0); + err = tty_register_driver(driver); + if (err) + goto err_free_drv; + + srmcons_driver = driver; + + return 0; +err_free_drv: + tty_driver_kref_put(driver); + tty_port_destroy(&srmcons_singleton.port); + + return err; } device_initcall(srmcons_init); - /* * The console driver */ diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 61c2198b1359..2d491b8cdab9 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -273,14 +273,6 @@ srm_paging_stop (void) } #endif -void __init -mem_init(void) -{ - set_max_mapnr(max_low_pfn); - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - memblock_free_all(); -} - static const pgprot_t protection_map[16] = { [VM_NONE] = _PAGE_P(_PAGE_FOE | _PAGE_FOW | _PAGE_FOR), diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 6a71b23f1383..a73cc94f806e 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -150,41 +150,18 @@ void __init setup_arch_memory(void) */ max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; - high_memory = (void *)(min_high_pfn << PAGE_SHIFT); - arch_pfn_offset = min(min_low_pfn, min_high_pfn); kmap_init(); - -#else /* CONFIG_HIGHMEM */ - /* pfn_valid() uses this when FLATMEM=y and HIGHMEM=n */ - max_mapnr = max_low_pfn - min_low_pfn; - #endif /* CONFIG_HIGHMEM */ free_area_init(max_zone_pfn); } -static void __init highmem_init(void) +void __init arch_mm_preinit(void) { #ifdef CONFIG_HIGHMEM - unsigned long tmp; - memblock_phys_free(high_mem_start, high_mem_sz); - for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); #endif -} - -/* - * mem_init - initializes memory - * - * Frees up bootmem - * Calculates and displays memory available/used - */ -void __init mem_init(void) -{ - memblock_free_all(); - highmem_init(); BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index b07004d53267..fd8897a0e52c 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -32,7 +32,7 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size) return (void __iomem *)(u32)paddr; return ioremap_prot(paddr, size, - pgprot_val(pgprot_noncached(PAGE_KERNEL))); + pgprot_noncached(PAGE_KERNEL)); } EXPORT_SYMBOL(ioremap); @@ -44,10 +44,8 @@ EXPORT_SYMBOL(ioremap); * might need finer access control (R/W/X) */ void __iomem *ioremap_prot(phys_addr_t paddr, size_t size, - unsigned long flags) + pgprot_t prot) { - pgprot_t prot = __pgprot(flags); - /* force uncached */ return generic_ioremap_prot(paddr, size, pgprot_noncached(prot)); } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 835b5f100e92..25ed6f1a7c7a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -121,7 +121,7 @@ config ARM select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_IS_LLD) + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_CAN_USE_KEEP_IN_OVERLAY) select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL @@ -133,6 +133,7 @@ config ARM select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ + select HAVE_RUST if CPU_LITTLE_ENDIAN && CPU_32v7 select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 00ca7886b18e..4808d3ed98e4 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -150,6 +150,7 @@ endif KBUILD_CPPFLAGS +=$(cpp-y) KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include asm/unified.h -msoft-float +KBUILD_RUSTFLAGS += --target=arm-unknown-linux-gnueabi CHECKFLAGS += -D__arm__ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index be91e376df79..6b986ef6042f 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -19,14 +19,13 @@ extern struct page *empty_zero_page; #define ZERO_PAGE(vaddr) (empty_zero_page) #endif -#ifndef CONFIG_MMU - #include <asm-generic/pgtable-nopud.h> + +#ifndef CONFIG_MMU #include <asm/pgtable-nommu.h> #else -#include <asm-generic/pgtable-nopud.h> #include <asm/page.h> #include <asm/pgtable-hwdef.h> diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index d60f6e83a9f7..0341973e30e1 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -19,7 +19,7 @@ #endif #ifdef CONFIG_MMU -#define ARM_MMU_KEEP(x) x +#define ARM_MMU_KEEP(x) KEEP(x) #define ARM_MMU_DISCARD(x) #else #define ARM_MMU_KEEP(x) @@ -34,6 +34,12 @@ #define NOCROSSREFS #endif +#ifdef CONFIG_LD_CAN_USE_KEEP_IN_OVERLAY +#define OVERLAY_KEEP(x) KEEP(x) +#else +#define OVERLAY_KEEP(x) x +#endif + /* Set start/end symbol names to the LMA for the section */ #define ARM_LMA(sym, section) \ sym##_start = LOADADDR(section); \ @@ -125,13 +131,13 @@ __vectors_lma = .; \ OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \ .vectors { \ - *(.vectors) \ + OVERLAY_KEEP(*(.vectors)) \ } \ .vectors.bhb.loop8 { \ - *(.vectors.bhb.loop8) \ + OVERLAY_KEEP(*(.vectors.bhb.loop8)) \ } \ .vectors.bhb.bpiall { \ - *(.vectors.bhb.bpiall) \ + OVERLAY_KEEP(*(.vectors.bhb.bpiall)) \ } \ } \ ARM_LMA(__vectors, .vectors); \ diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index da2ee8d6ef1a..354ce16d83cb 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -285,11 +285,9 @@ bool in_module_plt(unsigned long loc) struct module *mod; bool ret; - preempt_disable(); + guard(rcu)(); mod = __module_text_address(loc); ret = mod && (loc - (u32)mod->arch.core.plt_ent < mod->arch.core.plt_count * PLT_ENT_SIZE || loc - (u32)mod->arch.init.plt_ent < mod->arch.init.plt_count * PLT_ENT_SIZE); - preempt_enable(); - return ret; } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 3431c0553f45..50999886a8b5 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -551,7 +551,8 @@ void show_ipi_list(struct seq_file *p, int prec) if (!ipi_desc[i]) continue; - seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); + seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, + prec >= 4 ? " " : ""); for_each_online_cpu(cpu) seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 5eddb75a7174..f2e8d4fac068 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -63,7 +63,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(KEEP(*(__ex_table))) + ARM_MMU_KEEP(*(__ex_table)) __stop___ex_table = .; } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index de373c6c2ae8..d592a203f9c6 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(KEEP(*(__ex_table))) + ARM_MMU_KEEP(*(__ex_table)) __stop___ex_table = .; } diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 5345d218899a..54bdca025c9f 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -237,56 +237,17 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -static void __init free_highpages(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long max_low = max_low_pfn; - phys_addr_t range_start, range_end; - u64 i; - - /* set highmem page free */ - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, - &range_start, &range_end, NULL) { - unsigned long start = PFN_UP(range_start); - unsigned long end = PFN_DOWN(range_end); - - /* Ignore complete lowmem entries */ - if (end <= max_low) - continue; - - /* Truncate partial highmem entries */ - if (start < max_low) - start = max_low; - - for (; start < end; start++) - free_highmem_page(pfn_to_page(start)); - } -#endif -} - -/* - * mem_init() marks the free areas in the mem_map and tells us how much - * memory is free. This is done after various parts of the system have - * claimed their memory after the kernel image. - */ -void __init mem_init(void) +void __init arch_mm_preinit(void) { #ifdef CONFIG_ARM_LPAE swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); #endif - set_max_mapnr(pfn_to_page(max_pfn) - mem_map); - - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); - #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL); + memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); #endif - free_highpages(); - /* * Check boundaries twice: Some fundamental inconsistencies can * be detected at build time already. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 62dc903ecc7f..a182295e6f08 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -38,9 +38,11 @@ config ARM64 select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEM_ENCRYPT + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_HW_PTE_YOUNG @@ -157,7 +159,6 @@ config ARM64 select GENERIC_IRQ_SHOW_LEVEL select GENERIC_LIB_DEVMEM_IS_ALLOWED select GENERIC_PCI_IOMAP - select GENERIC_PTDUMP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index c607e0bf5e0b..d1cc0571798b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -132,6 +132,7 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP09 0xD02 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -218,6 +219,7 @@ #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index e390c432f546..39577f1d079a 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -188,8 +188,10 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX, AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, + AARCH64_INSN_LDST_LOAD_ACQ, AARCH64_INSN_LDST_LOAD_EX, AARCH64_INSN_LDST_LOAD_ACQ_EX, + AARCH64_INSN_LDST_STORE_REL, AARCH64_INSN_LDST_STORE_EX, AARCH64_INSN_LDST_STORE_REL_EX, AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, @@ -351,8 +353,10 @@ __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) -__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) -__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) +__AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00) +__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00) +__AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000) +__AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000) __AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400) __AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) __AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) @@ -602,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, int offset, enum aarch64_insn_variant variant, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type); u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register base, enum aarch64_insn_register state, diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 76ebbdc6ffdd..9b96840fb979 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -270,9 +270,9 @@ int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); #define _PAGE_IOREMAP PROT_DEVICE_nGnRE #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), PROT_NORMAL_NC) + ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_np(addr, size) \ - ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE) + ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) /* * io{read,write}{16,32,64}be() macros @@ -293,7 +293,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size) if (pfn_is_map_memory(__phys_to_pfn(addr))) return (void __iomem *)__phys_to_virt(addr); - return ioremap_prot(addr, size, PROT_NORMAL); + return ioremap_prot(addr, size, __pgprot(PROT_NORMAL)); } /* diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 84f05f781a70..d3b538be1500 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -633,11 +633,6 @@ static inline pud_t pud_mkhuge(pud_t pud) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP -#define pud_special(pte) pte_special(pud_pte(pud)) -#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud))) -#endif - #define pmd_pgprot pmd_pgprot static inline pgprot_t pmd_pgprot(pmd_t pmd) { diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 6cf4aae05219..b2931d1ae0fb 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -7,7 +7,7 @@ #include <linux/ptdump.h> -#ifdef CONFIG_PTDUMP_CORE +#ifdef CONFIG_PTDUMP #include <linux/mm_types.h> #include <linux/seq_file.h> @@ -70,6 +70,6 @@ static inline void ptdump_debugfs_register(struct ptdump_info *info, #else static inline void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { } -#endif /* CONFIG_PTDUMP_CORE */ +#endif /* CONFIG_PTDUMP */ #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h new file mode 100644 index 000000000000..5b80785324b6 --- /dev/null +++ b/arch/arm64/include/asm/rqspinlock.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RQSPINLOCK_H +#define _ASM_RQSPINLOCK_H + +#include <asm/barrier.h> + +/* + * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom + * version based on [0]. In rqspinlock code, our conditional expression involves + * checking the value _and_ additionally a timeout. However, on arm64, the + * WFE-based implementation may never spin again if no stores occur to the + * locked byte in the lock word. As such, we may be stuck forever if + * event-stream based unblocking is not available on the platform for WFE spin + * loops (arch_timer_evtstrm_available). + * + * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this + * copy-paste. + * + * While we rely on the implementation to amortize the cost of sampling + * cond_expr for us, it will not happen when event stream support is + * unavailable, time_expr check is amortized. This is not the common case, and + * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns + * comparison, hence just let it be. In case of event-stream, the loop is woken + * up at microsecond granularity. + * + * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com + */ + +#ifndef smp_cond_load_acquire_timewait + +#define smp_cond_time_check_count 200 + +#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \ + time_limit_ns) ({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + unsigned int __count = 0; \ + for (;;) { \ + VAL = READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + cpu_relax(); \ + if (__count++ < smp_cond_time_check_count) \ + continue; \ + if ((time_expr_ns) >= (time_limit_ns)) \ + break; \ + __count = 0; \ + } \ + (typeof(*ptr))VAL; \ +}) + +#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \ + time_expr_ns, time_limit_ns) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + if ((time_expr_ns) >= (time_limit_ns)) \ + break; \ + } \ + (typeof(*ptr))VAL; \ +}) + +#define smp_cond_load_acquire_timewait(ptr, cond_expr, \ + time_expr_ns, time_limit_ns) \ +({ \ + __unqual_scalar_typeof(*ptr) _val; \ + int __wfe = arch_timer_evtstrm_available(); \ + \ + if (likely(__wfe)) { \ + _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \ + time_expr_ns, \ + time_limit_ns); \ + } else { \ + _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \ + time_expr_ns, \ + time_limit_ns); \ + smp_acquire__after_ctrl_dep(); \ + } \ + (typeof(*ptr))_val; \ +}) + +#endif + +#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) + +#include <asm-generic/rqspinlock.h> + +#endif /* _ASM_RQSPINLOCK_H */ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 8104aee4f9a0..eba1a98657f1 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -322,13 +322,6 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) return true; } -static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm, - unsigned long uaddr) -{ - __flush_tlb_page_nosync(mm, uaddr); -} - /* * If mprotect/munmap/etc occurs during TLB batched flushing, we need to * synchronise all the TLBI issued with a DSB to avoid the race mentioned in @@ -450,7 +443,7 @@ static inline bool __flush_tlb_range_limit_excess(unsigned long start, return false; } -static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, +static inline void __flush_tlb_range_nosync(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long stride, bool last_level, int tlb_level) @@ -462,12 +455,12 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, pages = (end - start) >> PAGE_SHIFT; if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { - flush_tlb_mm(vma->vm_mm); + flush_tlb_mm(mm); return; } dsb(ishst); - asid = ASID(vma->vm_mm); + asid = ASID(mm); if (last_level) __flush_tlb_range_op(vale1is, start, pages, stride, asid, @@ -476,7 +469,7 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true, lpa2_is_enabled()); - mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); } static inline void __flush_tlb_range(struct vm_area_struct *vma, @@ -484,7 +477,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long stride, bool last_level, int tlb_level) { - __flush_tlb_range_nosync(vma, start, end, stride, + __flush_tlb_range_nosync(vma->vm_mm, start, end, stride, last_level, tlb_level); dsb(ish); } @@ -535,6 +528,12 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) dsb(ish); isb(); } + +static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, unsigned long start, unsigned long end) +{ + __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); +} #endif #endif diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index d780d1bd2eac..82cf1f879c61 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -109,10 +109,9 @@ static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned lon int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr); int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr); int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr); - unsigned long dst, src, size; + unsigned long dst, size; dst = regs->regs[dstreg]; - src = regs->regs[srcreg]; size = regs->regs[sizereg]; /* @@ -129,6 +128,7 @@ static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned lon } } else { /* CPY* instruction */ + unsigned long src = regs->regs[srcreg]; if (!(option_a ^ wrong_option)) { /* Format is from Option B */ if (regs->pstate & PSR_N_BIT) { diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index e6f66491fbe9..b9a66fc146c9 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -379,7 +379,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) prot = __acpi_get_writethrough_mem_attribute(); } } - return ioremap_prot(phys, size, pgprot_val(prot)); + return ioremap_prot(phys, size, prot); } /* diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c index deff21bfa680..b68e1d328d4c 100644 --- a/arch/arm64/kernel/compat_alignment.c +++ b/arch/arm64/kernel/compat_alignment.c @@ -368,6 +368,8 @@ int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs) return 1; } + if (!handler) + return 1; type = handler(addr, instr, regs); if (type == TYPE_ERROR || type == TYPE_FAULT) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index d7c0d023dfe5..5a890714ee2e 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -320,14 +320,13 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, * dealing with an out-of-range condition, we can assume it * is due to a module being loaded far away from the kernel. * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * NOTE: __module_text_address() must be called within a RCU read + * section, but we can rely on ftrace_lock to ensure that 'mod' * retains its validity throughout the remainder of this code. */ if (!mod) { - preempt_disable(); + guard(rcu)(); mod = __module_text_address(pc); - preempt_enable(); } if (WARN_ON(!mod)) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index d5d11fd11549..b198dde79e59 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -901,6 +901,7 @@ static u8 spectre_bhb_loop_affected(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD), + MIDR_ALL_VERSIONS(MIDR_HISI_HIP09), {}, }; static const struct midr_range spectre_bhb_k11_list[] = { diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 887ac0b05961..78ddf6bdecad 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -130,7 +130,8 @@ static int __setup_additional_pages(enum vdso_abi abi, mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC|gp_flags| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_SEALED_SYSMAP, vdso_info[abi].cm); if (IS_ERR(ret)) goto up_fail; @@ -256,7 +257,8 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) */ ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, VM_READ | VM_EXEC | - VM_MAYREAD | VM_MAYEXEC, + VM_MAYREAD | VM_MAYEXEC | + VM_SEALED_SYSMAP, &aarch32_vdso_maps[AA32_MAP_VECTORS]); return PTR_ERR_OR_ZERO(ret); @@ -279,7 +281,8 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) */ ret = _install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | - VM_MAYWRITE | VM_MAYEXEC, + VM_MAYWRITE | VM_MAYEXEC | + VM_SEALED_SYSMAP, &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); if (IS_ERR(ret)) goto out; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index ead632ad01b4..096e45acadb2 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -71,8 +71,8 @@ config PTDUMP_STAGE2_DEBUGFS depends on KVM depends on DEBUG_KERNEL depends on DEBUG_FS - depends on GENERIC_PTDUMP - select PTDUMP_CORE + depends on ARCH_HAS_PTDUMP + select PTDUMP default n help Say Y here if you want to show the stage-2 kernel pagetables diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index b008a9b46a7f..9bef696e2230 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -540,6 +540,35 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, offset >> shift); } +u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type) +{ + u32 insn; + + switch (type) { + case AARCH64_INSN_LDST_LOAD_ACQ: + insn = aarch64_insn_get_load_acq_value(); + break; + case AARCH64_INSN_LDST_STORE_REL: + insn = aarch64_insn_get_store_rel_value(); + break; + default: + pr_err("%s: unknown load-acquire/store-release encoding %d\n", + __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + reg); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + base); +} + u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register base, enum aarch64_insn_register state, diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index fc92170a8f37..c26489cf96cd 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -5,7 +5,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o fixmap.o obj-$(CONFIG_ARM64_CONTPTE) += contpte.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += ptdump.o +obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index 55107d27d3f8..bcac4f55f9c1 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c @@ -335,7 +335,7 @@ int contpte_ptep_clear_flush_young(struct vm_area_struct *vma, * eliding the trailing DSB applies here. */ addr = ALIGN_DOWN(addr, CONT_PTE_SIZE); - __flush_tlb_range_nosync(vma, addr, addr + CONT_PTE_SIZE, + __flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE, PAGE_SIZE, true, 3); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ccdef53872a0..b99bf3980fc6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -98,21 +98,19 @@ static void __init arch_reserve_crashkernel(void) { unsigned long long low_size = 0; unsigned long long crash_base, crash_size; - char *cmdline = boot_command_line; bool high = false; int ret; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) return; - reserve_crashkernel_generic(cmdline, crash_size, crash_base, - low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) @@ -309,8 +307,6 @@ void __init arm64_memblock_init(void) } early_init_fdt_scan_reserved_mem(); - - high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } void __init bootmem_init(void) @@ -359,12 +355,7 @@ void __init bootmem_init(void) memblock_dump_all(); } -/* - * mem_init() marks the free areas in the mem_map and tells us how much memory - * is free. This is done after various parts of the system have claimed their - * memory after the kernel image. - */ -void __init mem_init(void) +void __init arch_mm_preinit(void) { unsigned int flags = SWIOTLB_VERBOSE; bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); @@ -388,9 +379,6 @@ void __init mem_init(void) swiotlb_init(swiotlb, flags); swiotlb_update_mem_attributes(); - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); - /* * Check boundaries twice: Some fundamental inconsistencies can be * detected at build time already. diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 6cc0b7e7eb03..10e246f11271 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -15,10 +15,9 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook) } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t pgprot) { unsigned long last_addr = phys_addr + size - 1; - pgprot_t pgprot = __pgprot(prot); /* Don't allow outside PHYS_MASK */ if (last_addr & ~PHYS_MASK) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b98f89420713..ea6695d53fb9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1361,7 +1361,8 @@ int arch_add_memory(int nid, u64 start, u64 size, __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); else { - max_pfn = PFN_UP(start + size); + /* Address of hotplugged memory can be smaller */ + max_pfn = max(max_pfn, PFN_UP(start + size)); max_low_pfn = max_pfn; } diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index b22ab2f97a30..a3b0e693a125 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -119,6 +119,26 @@ aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \ AARCH64_INSN_LDST_STORE_REL_EX) +/* Load-acquire & store-release */ +#define A64_LDAR(Rt, Rn, size) \ + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_LOAD_ACQ) +#define A64_STLR(Rt, Rn, size) \ + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_STORE_REL) + +/* Rt = [Rn] (load acquire) */ +#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8) +#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16) +#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32) +#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64) + +/* [Rn] = Rt (store release) */ +#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8) +#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16) +#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32) +#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64) + /* * LSE atomics * diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8446848edddb..70d7c89d3ac9 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -272,7 +272,7 @@ static inline void emit_a64_add_i(const bool is64, const int dst, const int src, { if (is_addsub_imm(imm)) { emit(A64_ADD_I(is64, dst, src, imm), ctx); - } else if (is_addsub_imm(-imm)) { + } else if (is_addsub_imm(-(u32)imm)) { emit(A64_SUB_I(is64, dst, src, -imm), ctx); } else { emit_a64_mov_i(is64, tmp, imm, ctx); @@ -647,6 +647,81 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) return 0; } +static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const s32 imm = insn->imm; + const s16 off = insn->off; + const u8 code = insn->code; + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; + const u8 dst = bpf2a64[insn->dst_reg]; + const u8 src = bpf2a64[insn->src_reg]; + const u8 tmp = bpf2a64[TMP_REG_1]; + u8 reg; + + switch (imm) { + case BPF_LOAD_ACQ: + reg = src; + break; + case BPF_STORE_REL: + reg = dst; + break; + default: + pr_err_once("unknown atomic load/store op code %02x\n", imm); + return -EINVAL; + } + + if (off) { + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); + reg = tmp; + } + if (arena) { + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); + reg = tmp; + } + + switch (imm) { + case BPF_LOAD_ACQ: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_LDARB(dst, reg), ctx); + break; + case BPF_H: + emit(A64_LDARH(dst, reg), ctx); + break; + case BPF_W: + emit(A64_LDAR32(dst, reg), ctx); + break; + case BPF_DW: + emit(A64_LDAR64(dst, reg), ctx); + break; + } + break; + case BPF_STORE_REL: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_STLRB(src, reg), ctx); + break; + case BPF_H: + emit(A64_STLRH(src, reg), ctx); + break; + case BPF_W: + emit(A64_STLR32(src, reg), ctx); + break; + case BPF_DW: + emit(A64_STLR64(src, reg), ctx); + break; + } + break; + default: + pr_err_once("unexpected atomic load/store op code %02x\n", + imm); + return -EINVAL; + } + + return 0; +} + #ifdef CONFIG_ARM64_LSE_ATOMICS static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { @@ -1159,7 +1234,7 @@ emit_bswap_uxt: case BPF_ALU64 | BPF_SUB | BPF_K: if (is_addsub_imm(imm)) { emit(A64_SUB_I(is64, dst, dst, imm), ctx); - } else if (is_addsub_imm(-imm)) { + } else if (is_addsub_imm(-(u32)imm)) { emit(A64_ADD_I(is64, dst, dst, -imm), ctx); } else { emit_a64_mov_i(is64, tmp, imm, ctx); @@ -1330,7 +1405,7 @@ emit_cond_jmp: case BPF_JMP32 | BPF_JSLE | BPF_K: if (is_addsub_imm(imm)) { emit(A64_CMP_I(is64, dst, imm), ctx); - } else if (is_addsub_imm(-imm)) { + } else if (is_addsub_imm(-(u32)imm)) { emit(A64_CMN_I(is64, dst, -imm), ctx); } else { emit_a64_mov_i(is64, tmp, imm, ctx); @@ -1641,11 +1716,17 @@ emit_cond_jmp: return ret; break; + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + if (bpf_atomic_is_load_store(insn)) + ret = emit_atomic_ld_st(insn, ctx); + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) ret = emit_lse_atomic(insn, ctx); else ret = emit_ll_sc_atomic(insn, ctx); @@ -2669,7 +2750,8 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) switch (insn->code) { case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + if (!bpf_atomic_is_load_store(insn) && + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) return false; } return true; diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h index ed53f0b47388..536d3bf32ff1 100644 --- a/arch/csky/include/asm/io.h +++ b/arch/csky/include/asm/io.h @@ -36,7 +36,7 @@ */ #define ioremap_wc(addr, size) \ ioremap_prot((addr), (size), \ - (_PAGE_IOREMAP & ~_CACHE_MASK) | _CACHE_UNCACHED) + __pgprot((_PAGE_IOREMAP & ~_CACHE_MASK) | _CACHE_UNCACHED)) #include <asm-generic/io.h> diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index bf8400c28b5a..11055c574968 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -61,11 +61,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return ret; } -#define __pte_free_tlb(tlb, pte, address) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ -} while (0) +#define __pte_free_tlb(tlb, pte, address) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) extern void pagetable_init(void); extern void mmu_init(unsigned long min_pfn, unsigned long max_pfn); diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index fe715b707fd0..e0d6ca86ea8c 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -12,6 +12,45 @@ #include <asm/mmu_context.h> #include <asm/pgalloc.h> +#ifdef CONFIG_BLK_DEV_INITRD +static void __init setup_initrd(void) +{ + unsigned long size; + + if (initrd_start >= initrd_end) { + pr_err("initrd not found or empty"); + goto disable; + } + + if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { + pr_err("initrd extends beyond end of memory"); + goto disable; + } + + size = initrd_end - initrd_start; + + if (memblock_is_region_reserved(__pa(initrd_start), size)) { + pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region", + __pa(initrd_start), size); + goto disable; + } + + memblock_reserve(__pa(initrd_start), size); + + pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *)(initrd_start), size); + + initrd_below_start_ok = 1; + + return; + +disable: + initrd_start = initrd_end = 0; + + pr_err(" - disabling initrd\n"); +} +#endif + static void __init csky_memblock_init(void) { unsigned long lowmem_size = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); @@ -40,6 +79,10 @@ static void __init csky_memblock_init(void) max_low_pfn = min_low_pfn + sseg_size; } +#ifdef CONFIG_BLK_DEV_INITRD + setup_initrd(); +#endif + max_zone_pfn[ZONE_NORMAL] = max_low_pfn; mmu_init(min_low_pfn, max_low_pfn); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index bde7cabd23df..573da66b2543 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -42,73 +42,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -#ifdef CONFIG_BLK_DEV_INITRD -static void __init setup_initrd(void) -{ - unsigned long size; - - if (initrd_start >= initrd_end) { - pr_err("initrd not found or empty"); - goto disable; - } - - if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { - pr_err("initrd extends beyond end of memory"); - goto disable; - } - - size = initrd_end - initrd_start; - - if (memblock_is_region_reserved(__pa(initrd_start), size)) { - pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region", - __pa(initrd_start), size); - goto disable; - } - - memblock_reserve(__pa(initrd_start), size); - - pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", - (void *)(initrd_start), size); - - initrd_below_start_ok = 1; - - return; - -disable: - initrd_start = initrd_end = 0; - - pr_err(" - disabling initrd\n"); -} -#endif - -void __init mem_init(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long tmp; - - set_max_mapnr(highend_pfn - ARCH_PFN_OFFSET); -#else - set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); -#endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - -#ifdef CONFIG_BLK_DEV_INITRD - setup_initrd(); -#endif - - memblock_free_all(); - -#ifdef CONFIG_HIGHMEM - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - /* FIXME not sure about */ - if (!memblock_is_reserved(tmp << PAGE_SHIFT)) - free_highmem_page(page); - } -#endif -} - void free_initmem(void) { free_initmem_default(-1); diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 1ee5f5f157ca..937a11ef4c33 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -87,10 +87,7 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, max_kernel_seg = pmdindex; } -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pagetable_dtor((page_ptdesc(pte))); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ -} while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #endif diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 3458f39ca2ac..34eb9d424b96 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -43,32 +43,6 @@ DEFINE_SPINLOCK(kmap_gen_lock); /* checkpatch says don't init this to 0. */ unsigned long long kmap_generation; -/* - * mem_init - initializes memory - * - * Frees up bootmem - * Fixes up more stuff for HIGHMEM - * Calculates and displays memory available/used - */ -void __init mem_init(void) -{ - /* No idea where this is actually declared. Seems to evade LXR. */ - memblock_free_all(); - - /* - * To-Do: someone somewhere should wipe out the bootmem map - * after we're done? - */ - - /* - * This can be moved to some more virtual-memory-specific - * initialization hook at some point. Set the init_mm - * descriptors "context" value to point to the initial - * kernel segment table's physical address. - */ - init_mm.context.ptbase = __pa(init_mm.pgd); -} - void sync_icache_dcache(pte_t pte) { unsigned long addr; @@ -104,10 +78,10 @@ static void __init paging_init(void) free_area_init(max_zone_pfn); /* sets up the zonelists and mem_map */ /* - * Start of high memory area. Will probably need something more - * fancy if we... get more fancy. + * Set the init_mm descriptors "context" value to point to the + * initial kernel segment table's physical address. */ - high_memory = (void *)((bootmem_lastpg + 1) << PAGE_SHIFT); + init_mm.context.ptbase = __pa(init_mm.pgd); } #ifndef DMA_RESERVE diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 687502917ae2..067c0b994648 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -30,6 +30,7 @@ config LOONGARCH select ARCH_HAS_SET_MEMORY select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN select ARCH_HAS_VDSO_ARCH_DATA select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION @@ -177,7 +178,7 @@ config LOONGARCH select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ - select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP + select HAVE_VIRT_CPU_ACCOUNTING_GEN select IRQ_FORCED_THREADING select IRQ_LOONGARCH_CPU select LOCK_MM_AND_FIND_VMA @@ -387,8 +388,8 @@ config CMDLINE_BOOTLOADER config CMDLINE_EXTEND bool "Use built-in to extend bootloader kernel arguments" help - The command-line arguments provided during boot will be - appended to the built-in command line. This is useful in + The built-in command line will be appended to the command- + line arguments provided during boot. This is useful in cases where the provided arguments are insufficient and you don't want to or cannot modify them. diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3c240afe5aed..90f21dfe22b1 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -24,9 +24,9 @@ CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_RDMA=y +CONFIG_CGROUP_DMEM=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y @@ -109,8 +109,7 @@ CONFIG_BINFMT_MISC=m CONFIG_ZPOOL=y CONFIG_ZSWAP=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y -CONFIG_ZBUD=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y # CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y # CONFIG_MHP_DEFAULT_ONLINE_TYPE_OFFLINE is not set @@ -666,6 +665,10 @@ CONFIG_RTW88_8723DE=m CONFIG_RTW88_8723DU=m CONFIG_RTW88_8821CE=m CONFIG_RTW88_8821CU=m +CONFIG_RTW88_8821AU=m +CONFIG_RTW88_8812AU=m +CONFIG_RTW88_8814AE=m +CONFIG_RTW88_8814AU=m CONFIG_RTW89=m CONFIG_RTW89_8851BE=m CONFIG_RTW89_8852AE=m @@ -749,6 +752,7 @@ CONFIG_MEDIA_PCI_SUPPORT=y CONFIG_VIDEO_BT848=m CONFIG_DVB_BT8XX=m CONFIG_DRM=y +CONFIG_DRM_LOAD_EDID_FIRMWARE=y CONFIG_DRM_RADEON=m CONFIG_DRM_RADEON_USERPTR=y CONFIG_DRM_AMDGPU=m @@ -762,6 +766,7 @@ CONFIG_DRM_LOONGSON=y CONFIG_FB=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y +CONFIG_FIRMWARE_EDID=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m # CONFIG_VGA_CONSOLE is not set @@ -844,6 +849,9 @@ CONFIG_TYPEC_TCPCI=m CONFIG_TYPEC_UCSI=m CONFIG_UCSI_ACPI=m CONFIG_INFINIBAND=m +CONFIG_EDAC=y +# CONFIG_EDAC_LEGACY_SYSFS is not set +CONFIG_EDAC_LOONGSON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y CONFIG_RTC_DRV_LOONGSON=y diff --git a/arch/loongarch/include/asm/cache.h b/arch/loongarch/include/asm/cache.h index 1b6d09617199..aa622c754414 100644 --- a/arch/loongarch/include/asm/cache.h +++ b/arch/loongarch/include/asm/cache.h @@ -8,6 +8,8 @@ #define L1_CACHE_SHIFT CONFIG_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define ARCH_DMA_MINALIGN (16) + #define __read_mostly __section(".data..read_mostly") #endif /* _ASM_CACHE_H */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index e77a56eaf906..eaff72b38dc8 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -23,9 +23,9 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); #ifdef CONFIG_ARCH_IOREMAP static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { - switch (prot_val & _CACHE_MASK) { + switch (pgprot_val(prot) & _CACHE_MASK) { case _CACHE_CC: return (void __iomem *)(unsigned long)(CACHE_BASE + offset); case _CACHE_SUC: @@ -38,7 +38,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, } #define ioremap(offset, size) \ - ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_SUC)) + ioremap_prot((offset), (size), PAGE_KERNEL_SUC) #define iounmap(addr) ((void)(addr)) @@ -55,10 +55,10 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, */ #define ioremap_wc(offset, size) \ ioremap_prot((offset), (size), \ - pgprot_val(wc_enabled ? PAGE_KERNEL_WUC : PAGE_KERNEL_SUC)) + wc_enabled ? PAGE_KERNEL_WUC : PAGE_KERNEL_SUC) #define ioremap_cache(offset, size) \ - ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) + ioremap_prot((offset), (size), PAGE_KERNEL) #define mmiowb() wmb() diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index a0ca84da8541..12bd15578c33 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -53,7 +53,7 @@ void spurious_interrupt(void); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu); -#define MAX_IO_PICS 2 +#define MAX_IO_PICS 8 #define NR_IRQS (64 + NR_VECTORS * (NR_CPUS + MAX_IO_PICS)) struct acpi_vector_group { diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 7211dff8c969..b58f587f0f0a 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -55,11 +55,8 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) return pte; } -#define __pte_free_tlb(tlb, pte, address) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ -} while (0) +#define __pte_free_tlb(tlb, pte, address) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #ifndef __PAGETABLE_PMD_FOLDED diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h index f23adb15f418..fc8b64773794 100644 --- a/arch/loongarch/include/asm/stacktrace.h +++ b/arch/loongarch/include/asm/stacktrace.h @@ -8,6 +8,7 @@ #include <asm/asm.h> #include <asm/ptrace.h> #include <asm/loongarch.h> +#include <asm/unwind_hints.h> #include <linux/stringify.h> enum stack_type { @@ -43,6 +44,7 @@ int get_stack_info(unsigned long stack, struct task_struct *task, struct stack_i static __always_inline void prepare_frametrace(struct pt_regs *regs) { __asm__ __volatile__( + UNWIND_HINT_SAVE /* Save $ra */ STORE_ONE_REG(1) /* Use $ra to save PC */ @@ -80,6 +82,7 @@ static __always_inline void prepare_frametrace(struct pt_regs *regs) STORE_ONE_REG(29) STORE_ONE_REG(30) STORE_ONE_REG(31) + UNWIND_HINT_RESTORE : "=m" (regs->csr_era) : "r" (regs->regs) : "memory"); diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h index a01086ad9dde..2c68bc72736c 100644 --- a/arch/loongarch/include/asm/unwind_hints.h +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -23,6 +23,14 @@ UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL .endm -#endif /* __ASSEMBLY__ */ +#else /* !__ASSEMBLY__ */ + +#define UNWIND_HINT_SAVE \ + UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) + +#define UNWIND_HINT_RESTORE \ + UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) + +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */ diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index 2f1f5b08638f..27144de5c5fe 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -68,6 +68,8 @@ static int __init fdt_cpu_clk_init(void) return -ENODEV; clk = of_clk_get(np, 0); + of_node_put(np); + if (IS_ERR(clk)) return -ENODEV; diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 25c9a4cfd5fa..d5d81d74034c 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -85,14 +85,13 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod * dealing with an out-of-range condition, we can assume it * is due to a module being loaded far away from the kernel. * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * NOTE: __module_text_address() must be called within a RCU read + * section, but we can rely on ftrace_lock to ensure that 'mod' * retains its validity throughout the remainder of this code. */ if (!mod) { - preempt_disable(); - mod = __module_text_address(pc); - preempt_enable(); + scoped_guard(rcu) + mod = __module_text_address(pc); } if (WARN_ON(!mod)) diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c index 445c452d72a7..7be5b4c0c900 100644 --- a/arch/loongarch/kernel/kgdb.c +++ b/arch/loongarch/kernel/kgdb.c @@ -8,6 +8,7 @@ #include <linux/hw_breakpoint.h> #include <linux/kdebug.h> #include <linux/kgdb.h> +#include <linux/objtool.h> #include <linux/processor.h> #include <linux/ptrace.h> #include <linux/sched.h> @@ -224,13 +225,13 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) regs->csr_era = pc; } -void arch_kgdb_breakpoint(void) +noinline void arch_kgdb_breakpoint(void) { __asm__ __volatile__ ( \ ".globl kgdb_breakinst\n\t" \ - "nop\n" \ "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */ } +STACK_FRAME_NON_STANDARD(arch_kgdb_breakpoint); /* * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 84fe7f854820..30a72fd528c0 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -387,12 +387,6 @@ void __init paging_init(void) free_area_init(zones_size); } -void __init mem_init(void) -{ - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - memblock_free_all(); -} - int pcibus_to_node(struct pci_bus *bus) { return dev_to_node(&bus->dev); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 90cb3ca96f08..b99fbb388fe0 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -259,18 +259,17 @@ static void __init arch_reserve_crashkernel(void) int ret; unsigned long long low_size = 0; unsigned long long crash_base, crash_size; - char *cmdline = boot_command_line; bool high = false; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) return; - reserve_crashkernel_generic(cmdline, crash_size, crash_base, low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } static void __init fdt_setup(void) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index b25722876331..d623935a7547 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -399,7 +399,7 @@ bool unwind_next_frame(struct unwind_state *state) return false; /* Don't let modules unload while we're reading their ORC data. */ - preempt_disable(); + guard(rcu)(); if (is_entry_func(state->pc)) goto end; @@ -514,14 +514,12 @@ bool unwind_next_frame(struct unwind_state *state) if (!__kernel_text_address(state->pc)) goto err; - preempt_enable(); return true; err: state->error = true; end: - preempt_enable(); state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index ca5aa5f46a9f..fdb7f73ad160 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -75,14 +75,6 @@ void __init paging_init(void) free_area_init(max_zone_pfns); } - -void __init mem_init(void) -{ - max_mapnr = max_low_pfn; - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - - memblock_free_all(); -} #endif /* !CONFIG_NUMA */ void __ref free_initmem(void) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index ea357a3edc09..fa1500d4aa3e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -142,6 +142,8 @@ static void build_prologue(struct jit_ctx *ctx) */ if (seen_tail_call(ctx) && seen_call(ctx)) move_reg(ctx, TCC_SAVED, REG_TCC); + else + emit_insn(ctx, nop); ctx->stack_size = stack_adjust; } @@ -905,7 +907,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext move_addr(ctx, t1, func_addr); emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); - move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); + + if (insn->src_reg != BPF_PSEUDO_CALL) + move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); + break; /* tail call */ @@ -930,7 +935,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext { const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; - move_imm(ctx, dst, imm64, is32); + if (bpf_pseudo_func(insn)) + move_addr(ctx, dst, imm64); + else + move_imm(ctx, dst, imm64, is32); return 1; } diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index 68586338ecf8..f9c569f53949 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -27,6 +27,11 @@ struct jit_data { struct jit_ctx ctx; }; +static inline void emit_nop(union loongarch_instruction *insn) +{ + insn->word = INSN_NOP; +} + #define emit_insn(ctx, func, ...) \ do { \ if (ctx->image != NULL) { \ diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 1c26147aff70..ccd2c5e135c6 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -36,8 +36,7 @@ endif # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ - --hash-style=sysv --build-id -T + $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared --build-id -T # # Shared build commands. diff --git a/arch/loongarch/vdso/vgetrandom-chacha.S b/arch/loongarch/vdso/vgetrandom-chacha.S index c2733e6c3a8d..c4dd2bab8825 100644 --- a/arch/loongarch/vdso/vgetrandom-chacha.S +++ b/arch/loongarch/vdso/vgetrandom-chacha.S @@ -58,9 +58,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) #define copy0 t5 #define copy1 t6 #define copy2 t7 - -/* Reuse i as copy3 */ -#define copy3 i +#define copy3 t8 /* Packs to be used with OP_4REG */ #define line0 state0, state1, state2, state3 @@ -99,6 +97,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) li.w copy0, 0x61707865 li.w copy1, 0x3320646e li.w copy2, 0x79622d32 + li.w copy3, 0x6b206574 ld.w cnt_lo, counter, 0 ld.w cnt_hi, counter, 4 @@ -108,7 +107,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) move state0, copy0 move state1, copy1 move state2, copy2 - li.w state3, 0x6b206574 + move state3, copy3 /* state[4,5,..,11] = key */ ld.w state4, key, 0 @@ -167,12 +166,6 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) addi.w i, i, -1 bnez i, .Lpermute - /* - * copy[3] = "expa", materialize it here because copy[3] shares the - * same register with i which just became dead. - */ - li.w copy3, 0x6b206574 - /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ OP_4REG add.w line0, copy st.w state0, output, 0 diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 80afc3a18724..1e21c758b774 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -17,11 +17,8 @@ extern const char bad_pmd_string[]; -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ -} while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 8b11d0d545aa..488411af1b3f 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -121,7 +121,5 @@ static inline void init_pointer_tables(void) void __init mem_init(void) { - /* this will put all memory onto the freelists */ - memblock_free_all(); init_pointer_tables(); } diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 4520c5741579..31d475cdb1c5 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -52,19 +52,6 @@ static void __init highmem_init(void) map_page(PKMAP_BASE, 0, 0); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); } - -static void __meminit highmem_setup(void) -{ - unsigned long pfn; - - for (pfn = max_low_pfn; pfn < max_pfn; ++pfn) { - struct page *page = pfn_to_page(pfn); - - /* FIXME not sure about */ - if (!memblock_is_reserved(pfn << PAGE_SHIFT)) - free_highmem_page(page); - } -} #endif /* CONFIG_HIGHMEM */ /* @@ -104,17 +91,13 @@ void __init setup_memory(void) * * min_low_pfn - the first page (mm/bootmem.c - node_boot_start) * max_low_pfn - * max_mapnr - the first unused page (mm/bootmem.c - node_low_pfn) */ /* memory start is from the kernel end (aligned) to higher addr */ min_low_pfn = memory_start >> PAGE_SHIFT; /* minimum for allocation */ - /* RAM is assumed contiguous */ - max_mapnr = memory_size >> PAGE_SHIFT; max_low_pfn = ((u64)memory_start + (u64)lowmem_size) >> PAGE_SHIFT; max_pfn = ((u64)memory_start + (u64)memory_size) >> PAGE_SHIFT; - pr_info("%s: max_mapnr: %#lx\n", __func__, max_mapnr); pr_info("%s: min_low_pfn: %#lx\n", __func__, min_low_pfn); pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); @@ -124,14 +107,6 @@ void __init setup_memory(void) void __init mem_init(void) { - high_memory = (void *)__va(memory_start + lowmem_size - 1); - - /* this will put all memory onto the freelists */ - memblock_free_all(); -#ifdef CONFIG_HIGHMEM - highmem_setup(); -#endif - mem_init_done = 1; } @@ -143,7 +118,7 @@ int page_is_ram(unsigned long pfn) /* * Check for command-line options that affect what MMU_init will do. */ -static void mm_cmdline_setup(void) +static void __init mm_cmdline_setup(void) { unsigned long maxmem = 0; char *p = cmd_line; diff --git a/arch/mips/boot/dts/ingenic/gcw0.dts b/arch/mips/boot/dts/ingenic/gcw0.dts index 5d33f26fd28c..8455778928b7 100644 --- a/arch/mips/boot/dts/ingenic/gcw0.dts +++ b/arch/mips/boot/dts/ingenic/gcw0.dts @@ -91,7 +91,7 @@ "MIC1N", "Built-in Mic"; simple-audio-card,pin-switches = "Speaker", "Headphones"; - simple-audio-card,hp-det-gpio = <&gpf 21 GPIO_ACTIVE_LOW>; + simple-audio-card,hp-det-gpios = <&gpf 21 GPIO_ACTIVE_LOW>; simple-audio-card,aux-devs = <&speaker_amp>, <&headphones_amp>; simple-audio-card,bitclock-master = <&dai_codec>; diff --git a/arch/mips/boot/dts/ingenic/rs90.dts b/arch/mips/boot/dts/ingenic/rs90.dts index e8df70dd42bf..6d2c8aea5f49 100644 --- a/arch/mips/boot/dts/ingenic/rs90.dts +++ b/arch/mips/boot/dts/ingenic/rs90.dts @@ -148,7 +148,7 @@ "Speaker", "OUTR"; simple-audio-card,pin-switches = "Speaker"; - simple-audio-card,hp-det-gpio = <&gpd 16 GPIO_ACTIVE_LOW>; + simple-audio-card,hp-det-gpios = <&gpd 16 GPIO_ACTIVE_LOW>; simple-audio-card,aux-devs = <&>; simple-audio-card,bitclock-master = <&dai_codec>; diff --git a/arch/mips/boot/dts/mobileye/eyeq6h.dtsi b/arch/mips/boot/dts/mobileye/eyeq6h.dtsi index 4a1a43f351d3..dabd5ed778b7 100644 --- a/arch/mips/boot/dts/mobileye/eyeq6h.dtsi +++ b/arch/mips/boot/dts/mobileye/eyeq6h.dtsi @@ -32,6 +32,10 @@ #interrupt-cells = <1>; }; + coherency-manager { + compatible = "mobileye,eyeq6-cm"; + }; + xtal: clock-30000000 { compatible = "fixed-clock"; #clock-cells = <0>; diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts index 18107ca0a06b..7743d014631a 100644 --- a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts +++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts @@ -5,7 +5,7 @@ /dts-v1/; -/include/ "mt7628a.dtsi" +#include "mt7628a.dtsi" #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/input/input.h> diff --git a/arch/mips/boot/dts/ralink/mt7620a.dtsi b/arch/mips/boot/dts/ralink/mt7620a.dtsi index 1f6e5320f486..d66045948a83 100644 --- a/arch/mips/boot/dts/ralink/mt7620a.dtsi +++ b/arch/mips/boot/dts/ralink/mt7620a.dtsi @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <dt-bindings/clock/mediatek,mtmips-sysc.h> + / { #address-cells = <1>; #size-cells = <1>; @@ -25,9 +27,11 @@ #address-cells = <1>; #size-cells = <1>; - sysc@0 { - compatible = "ralink,mt7620a-sysc"; + sysc: syscon@0 { + compatible = "ralink,mt7620-sysc", "syscon"; reg = <0x0 0x100>; + #clock-cells = <1>; + #reset-cells = <1>; }; intc: intc@200 { @@ -50,6 +54,8 @@ compatible = "ralink,mt7620a-uart", "ralink,rt2880-uart", "ns16550a"; reg = <0xc00 0x100>; + clocks = <&sysc MT7620_CLK_UARTLITE>; + interrupt-parent = <&intc>; interrupts = <12>; diff --git a/arch/mips/boot/dts/ralink/mt7620a_eval.dts b/arch/mips/boot/dts/ralink/mt7620a_eval.dts index 8de8f89f31b8..da483ee65b61 100644 --- a/arch/mips/boot/dts/ralink/mt7620a_eval.dts +++ b/arch/mips/boot/dts/ralink/mt7620a_eval.dts @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /dts-v1/; -/include/ "mt7620a.dtsi" +#include "mt7620a.dtsi" / { compatible = "ralink,mt7620a-eval-board", "ralink,mt7620a-soc"; diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi index 45a15e005cc4..0212700c4fb4 100644 --- a/arch/mips/boot/dts/ralink/mt7628a.dtsi +++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <dt-bindings/clock/mediatek,mtmips-sysc.h> / { #address-cells = <1>; @@ -16,11 +17,6 @@ }; }; - resetc: reset-controller { - compatible = "ralink,rt2880-reset"; - #reset-cells = <1>; - }; - cpuintc: interrupt-controller { #address-cells = <0>; #interrupt-cells = <1>; @@ -36,9 +32,11 @@ #address-cells = <1>; #size-cells = <1>; - sysc: system-controller@0 { - compatible = "ralink,mt7620a-sysc", "syscon"; + sysc: syscon@0 { + compatible = "ralink,mt7628-sysc", "syscon"; reg = <0x0 0x60>; + #clock-cells = <1>; + #reset-cells = <1>; }; pinmux: pinmux@60 { @@ -138,7 +136,7 @@ compatible = "mediatek,mt7621-wdt"; reg = <0x100 0x30>; - resets = <&resetc 8>; + resets = <&sysc 8>; reset-names = "wdt"; interrupt-parent = <&intc>; @@ -154,7 +152,7 @@ interrupt-controller; #interrupt-cells = <1>; - resets = <&resetc 9>; + resets = <&sysc 9>; reset-names = "intc"; interrupt-parent = <&cpuintc>; @@ -190,7 +188,9 @@ pinctrl-names = "default"; pinctrl-0 = <&pinmux_spi_spi>; - resets = <&resetc 18>; + clocks = <&sysc MT76X8_CLK_SPI1>; + + resets = <&sysc 18>; reset-names = "spi"; #address-cells = <1>; @@ -206,7 +206,9 @@ pinctrl-names = "default"; pinctrl-0 = <&pinmux_i2c_i2c>; - resets = <&resetc 16>; + clocks = <&sysc MT76X8_CLK_I2C>; + + resets = <&sysc 16>; reset-names = "i2c"; #address-cells = <1>; @@ -222,7 +224,9 @@ pinctrl-names = "default"; pinctrl-0 = <&pinmux_uart0_uart>; - resets = <&resetc 12>; + clocks = <&sysc MT76X8_CLK_UART0>; + + resets = <&sysc 12>; reset-names = "uart0"; interrupt-parent = <&intc>; @@ -238,7 +242,9 @@ pinctrl-names = "default"; pinctrl-0 = <&pinmux_uart1_uart>; - resets = <&resetc 19>; + clocks = <&sysc MT76X8_CLK_UART1>; + + resets = <&sysc 19>; reset-names = "uart1"; interrupt-parent = <&intc>; @@ -254,7 +260,9 @@ pinctrl-names = "default"; pinctrl-0 = <&pinmux_uart2_uart>; - resets = <&resetc 20>; + clocks = <&sysc MT76X8_CLK_UART2>; + + resets = <&sysc 20>; reset-names = "uart2"; interrupt-parent = <&intc>; @@ -271,7 +279,7 @@ #phy-cells = <0>; ralink,sysctl = <&sysc>; - resets = <&resetc 22 &resetc 25>; + resets = <&sysc 22 &sysc 25>; reset-names = "host", "device"; }; @@ -290,6 +298,8 @@ compatible = "mediatek,mt7628-wmac"; reg = <0x10300000 0x100000>; + clocks = <&sysc MT76X8_CLK_WMAC>; + interrupt-parent = <&cpuintc>; interrupts = <6>; diff --git a/arch/mips/boot/dts/ralink/omega2p.dts b/arch/mips/boot/dts/ralink/omega2p.dts index 5884fd48f59a..51a40ab6df2b 100644 --- a/arch/mips/boot/dts/ralink/omega2p.dts +++ b/arch/mips/boot/dts/ralink/omega2p.dts @@ -1,6 +1,6 @@ /dts-v1/; -/include/ "mt7628a.dtsi" +#include "mt7628a.dtsi" / { compatible = "onion,omega2+", "ralink,mt7688a-soc", "ralink,mt7628a-soc"; diff --git a/arch/mips/boot/dts/ralink/rt2880.dtsi b/arch/mips/boot/dts/ralink/rt2880.dtsi index 8fc1987d9063..1f2ea3434324 100644 --- a/arch/mips/boot/dts/ralink/rt2880.dtsi +++ b/arch/mips/boot/dts/ralink/rt2880.dtsi @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <dt-bindings/clock/mediatek,mtmips-sysc.h> + / { #address-cells = <1>; #size-cells = <1>; @@ -25,9 +27,11 @@ #address-cells = <1>; #size-cells = <1>; - sysc@0 { - compatible = "ralink,rt2880-sysc"; + sysc: syscon@0 { + compatible = "ralink,rt2880-sysc", "syscon"; reg = <0x0 0x100>; + #clock-cells = <1>; + #reset-cells = <1>; }; intc: intc@200 { @@ -50,6 +54,8 @@ compatible = "ralink,rt2880-uart", "ns16550a"; reg = <0xc00 0x100>; + clocks = <&sysc RT2880_CLK_UARTLITE>; + interrupt-parent = <&intc>; interrupts = <8>; diff --git a/arch/mips/boot/dts/ralink/rt2880_eval.dts b/arch/mips/boot/dts/ralink/rt2880_eval.dts index 759bc1dd5b83..9854a4b120e9 100644 --- a/arch/mips/boot/dts/ralink/rt2880_eval.dts +++ b/arch/mips/boot/dts/ralink/rt2880_eval.dts @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /dts-v1/; -/include/ "rt2880.dtsi" +#include "rt2880.dtsi" / { compatible = "ralink,rt2880-eval-board", "ralink,rt2880-soc"; diff --git a/arch/mips/boot/dts/ralink/rt3050.dtsi b/arch/mips/boot/dts/ralink/rt3050.dtsi index 23062333a76d..a7d9bb9bc1af 100644 --- a/arch/mips/boot/dts/ralink/rt3050.dtsi +++ b/arch/mips/boot/dts/ralink/rt3050.dtsi @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <dt-bindings/clock/mediatek,mtmips-sysc.h> + / { #address-cells = <1>; #size-cells = <1>; @@ -25,9 +27,11 @@ #address-cells = <1>; #size-cells = <1>; - sysc@0 { - compatible = "ralink,rt3052-sysc", "ralink,rt3050-sysc"; + sysc: syscon@0 { + compatible = "ralink,rt3052-sysc", "ralink,rt3050-sysc", "syscon"; reg = <0x0 0x100>; + #clock-cells = <1>; + #reset-cells = <1>; }; intc: intc@200 { @@ -50,6 +54,8 @@ compatible = "ralink,rt3052-uart", "ralink,rt2880-uart", "ns16550a"; reg = <0xc00 0x100>; + clocks = <&sysc RT305X_CLK_UARTLITE>; + interrupt-parent = <&intc>; interrupts = <12>; diff --git a/arch/mips/boot/dts/ralink/rt3883.dtsi b/arch/mips/boot/dts/ralink/rt3883.dtsi index 61132cf157e5..11d111a06037 100644 --- a/arch/mips/boot/dts/ralink/rt3883.dtsi +++ b/arch/mips/boot/dts/ralink/rt3883.dtsi @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <dt-bindings/clock/mediatek,mtmips-sysc.h> + / { #address-cells = <1>; #size-cells = <1>; @@ -25,9 +27,11 @@ #address-cells = <1>; #size-cells = <1>; - sysc@0 { - compatible = "ralink,rt3883-sysc", "ralink,rt3050-sysc"; + sysc: syscon@0 { + compatible = "ralink,rt3883-sysc", "syscon"; reg = <0x0 0x100>; + #clock-cells = <1>; + #reset-cells = <1>; }; intc: intc@200 { @@ -50,6 +54,8 @@ compatible = "ralink,rt3883-uart", "ralink,rt2880-uart", "ns16550a"; reg = <0xc00 0x100>; + clocks = <&sysc RT3883_CLK_UARTLITE>; + interrupt-parent = <&intc>; interrupts = <12>; diff --git a/arch/mips/boot/dts/ralink/rt3883_eval.dts b/arch/mips/boot/dts/ralink/rt3883_eval.dts index c22bc84df219..a095a1fe9415 100644 --- a/arch/mips/boot/dts/ralink/rt3883_eval.dts +++ b/arch/mips/boot/dts/ralink/rt3883_eval.dts @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /dts-v1/; -/include/ "rt3883.dtsi" +#include "rt3883.dtsi" / { compatible = "ralink,rt3883-eval-board", "ralink,rt3883-soc"; diff --git a/arch/mips/boot/dts/realtek/cisco_sg220-26.dts b/arch/mips/boot/dts/realtek/cisco_sg220-26.dts index 1cdbb09297ef..fab3d552404d 100644 --- a/arch/mips/boot/dts/realtek/cisco_sg220-26.dts +++ b/arch/mips/boot/dts/realtek/cisco_sg220-26.dts @@ -2,9 +2,10 @@ /dts-v1/; -#include "rtl83xx.dtsi" #include "rtl838x.dtsi" +#include <dt-bindings/gpio/gpio.h> + / { model = "Cisco SG220-26"; compatible = "cisco,sg220-26", "realtek,rtl8382-soc"; @@ -18,6 +19,13 @@ device_type = "memory"; reg = <0x0 0x8000000>; }; + + gpio-restart { + compatible = "gpio-restart"; + gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; + priority = <192>; + open-source; + }; }; &uart0 { diff --git a/arch/mips/boot/dts/realtek/rtl838x.dtsi b/arch/mips/boot/dts/realtek/rtl838x.dtsi index 722106e39194..ce522a6af262 100644 --- a/arch/mips/boot/dts/realtek/rtl838x.dtsi +++ b/arch/mips/boot/dts/realtek/rtl838x.dtsi @@ -1,6 +1,14 @@ // SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause / { + #address-cells = <1>; + #size-cells = <1>; + + aliases { + serial0 = &uart0; + serial1 = &uart1; + }; + cpus { #address-cells = <1>; #size-cells = <0>; @@ -9,8 +17,7 @@ device_type = "cpu"; compatible = "mips,mips4KEc"; reg = <0>; - clocks = <&baseclk 0>; - clock-names = "cpu"; + clocks = <&baseclk>; }; }; @@ -19,4 +26,104 @@ #clock-cells = <0>; clock-frequency = <500000000>; }; + + cpuintc: cpuintc { + compatible = "mti,cpu-interrupt-controller"; + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + + lx_clk: clock-lexra { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <200000000>; + }; + + soc@18000000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x18000000 0x10000>; + + spi0: spi@1200 { + compatible = "realtek,rtl8380-spi"; + reg = <0x1200 0x100>; + + #address-cells = <1>; + #size-cells = <0>; + }; + + uart0: serial@2000 { + compatible = "ns16550a"; + reg = <0x2000 0x100>; + + clocks = <&lx_clk>; + + interrupt-parent = <&intc>; + interrupts = <31>; + + reg-io-width = <1>; + reg-shift = <2>; + fifo-size = <1>; + no-loopback-test; + + status = "disabled"; + }; + + uart1: serial@2100 { + compatible = "ns16550a"; + reg = <0x2100 0x100>; + + clocks = <&lx_clk>; + + interrupt-parent = <&intc>; + interrupts = <30>; + + reg-io-width = <1>; + reg-shift = <2>; + fifo-size = <1>; + no-loopback-test; + + status = "disabled"; + }; + + intc: interrupt-controller@3000 { + compatible = "realtek,rtl8380-intc", "realtek,rtl-intc"; + reg = <0x3000 0x20>; + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <2>, <3>, <4>, <5>, <6>; + }; + + watchdog: watchdog@3150 { + compatible = "realtek,rtl8380-wdt"; + reg = <0x3150 0xc>; + + realtek,reset-mode = "soc"; + + clocks = <&lx_clk>; + timeout-sec = <20>; + + interrupt-parent = <&intc>; + interrupt-names = "phase1", "phase2"; + interrupts = <19>, <18>; + }; + + gpio0: gpio@3500 { + compatible = "realtek,rtl8380-gpio", "realtek,otto-gpio"; + reg = <0x3500 0x1c>; + + gpio-controller; + #gpio-cells = <2>; + ngpios = <24>; + + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&intc>; + interrupts = <23>; + }; + }; }; diff --git a/arch/mips/boot/dts/realtek/rtl83xx.dtsi b/arch/mips/boot/dts/realtek/rtl83xx.dtsi deleted file mode 100644 index 03ddc61f7c9e..000000000000 --- a/arch/mips/boot/dts/realtek/rtl83xx.dtsi +++ /dev/null @@ -1,59 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause - -/ { - #address-cells = <1>; - #size-cells = <1>; - - aliases { - serial0 = &uart0; - serial1 = &uart1; - }; - - cpuintc: cpuintc { - compatible = "mti,cpu-interrupt-controller"; - #address-cells = <0>; - #interrupt-cells = <1>; - interrupt-controller; - }; - - soc: soc { - compatible = "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x0 0x18000000 0x10000>; - - uart0: serial@2000 { - compatible = "ns16550a"; - reg = <0x2000 0x100>; - - clock-frequency = <200000000>; - - interrupt-parent = <&cpuintc>; - interrupts = <31>; - - reg-io-width = <1>; - reg-shift = <2>; - fifo-size = <1>; - no-loopback-test; - - status = "disabled"; - }; - - uart1: serial@2100 { - compatible = "ns16550a"; - reg = <0x2100 0x100>; - - clock-frequency = <200000000>; - - interrupt-parent = <&cpuintc>; - interrupts = <30>; - - reg-io-width = <1>; - reg-shift = <2>; - fifo-size = <1>; - no-loopback-test; - - status = "disabled"; - }; - }; -}; diff --git a/arch/mips/boot/dts/realtek/rtl930x.dtsi b/arch/mips/boot/dts/realtek/rtl930x.dtsi index 17577457d159..f2e57ea3a60c 100644 --- a/arch/mips/boot/dts/realtek/rtl930x.dtsi +++ b/arch/mips/boot/dts/realtek/rtl930x.dtsi @@ -1,10 +1,23 @@ // SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause -#include "rtl83xx.dtsi" - / { compatible = "realtek,rtl9302-soc"; + #address-cells = <1>; + #size-cells = <1>; + + aliases { + serial0 = &uart0; + serial1 = &uart1; + }; + + cpuintc: cpuintc { + compatible = "mti,cpu-interrupt-controller"; + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + cpus { #address-cells = <1>; #size-cells = <0>; @@ -13,8 +26,7 @@ device_type = "cpu"; compatible = "mips,mips34Kc"; reg = <0>; - clocks = <&baseclk 0>; - clock-names = "cpu"; + clocks = <&baseclk>; }; }; @@ -58,64 +70,84 @@ status = "disabled"; }; }; -}; -&soc { - ranges = <0x0 0x18000000 0x20000>; + soc: soc@18000000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x18000000 0x20000>; - intc: interrupt-controller@3000 { - compatible = "realtek,rtl9300-intc", "realtek,rtl-intc"; - reg = <0x3000 0x18>, <0x3018 0x18>; - interrupt-controller; - #interrupt-cells = <1>; + intc: interrupt-controller@3000 { + compatible = "realtek,rtl9300-intc", "realtek,rtl-intc"; + reg = <0x3000 0x18>, <0x3018 0x18>; + interrupt-controller; + #interrupt-cells = <1>; - interrupt-parent = <&cpuintc>; - interrupts = <2>, <3>, <4>, <5>, <6>, <7>; - }; + interrupt-parent = <&cpuintc>; + interrupts = <2>, <3>, <4>, <5>, <6>, <7>; + }; - spi0: spi@1200 { - compatible = "realtek,rtl8380-spi"; - reg = <0x1200 0x100>; + spi0: spi@1200 { + compatible = "realtek,rtl8380-spi"; + reg = <0x1200 0x100>; - #address-cells = <1>; - #size-cells = <0>; - }; + #address-cells = <1>; + #size-cells = <0>; + }; - timer0: timer@3200 { - compatible = "realtek,rtl9302-timer", "realtek,otto-timer"; - reg = <0x3200 0x10>, <0x3210 0x10>, <0x3220 0x10>, - <0x3230 0x10>, <0x3240 0x10>; + timer0: timer@3200 { + compatible = "realtek,rtl9302-timer", "realtek,otto-timer"; + reg = <0x3200 0x10>, <0x3210 0x10>, <0x3220 0x10>, + <0x3230 0x10>, <0x3240 0x10>; - interrupt-parent = <&intc>; - interrupts = <7>, <8>, <9>, <10>, <11>; - clocks = <&lx_clk>; - }; + interrupt-parent = <&intc>; + interrupts = <7>, <8>, <9>, <10>, <11>; + clocks = <&lx_clk>; + }; - snand: spi@1a400 { - compatible = "realtek,rtl9301-snand"; - reg = <0x1a400 0x44>; - interrupt-parent = <&intc>; - interrupts = <19>; - clocks = <&lx_clk>; - #address-cells = <1>; - #size-cells = <0>; - status = "disabled"; - }; -}; + snand: spi@1a400 { + compatible = "realtek,rtl9301-snand"; + reg = <0x1a400 0x44>; + interrupt-parent = <&intc>; + interrupts = <19>; + clocks = <&lx_clk>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; -&uart0 { - /delete-property/ clock-frequency; - clocks = <&lx_clk>; + uart0: serial@2000 { + compatible = "ns16550a"; + reg = <0x2000 0x100>; - interrupt-parent = <&intc>; - interrupts = <30>; -}; + clocks = <&lx_clk>; -&uart1 { - /delete-property/ clock-frequency; - clocks = <&lx_clk>; + interrupt-parent = <&intc>; + interrupts = <30>; - interrupt-parent = <&intc>; - interrupts = <31>; -}; + reg-io-width = <1>; + reg-shift = <2>; + fifo-size = <1>; + no-loopback-test; + status = "disabled"; + }; + + uart1: serial@2100 { + compatible = "ns16550a"; + reg = <0x2100 0x100>; + + clocks = <&lx_clk>; + + interrupt-parent = <&intc>; + interrupts = <31>; + + reg-io-width = <1>; + reg-shift = <2>; + fifo-size = <1>; + no-loopback-test; + + status = "disabled"; + }; + }; +}; diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 92fc0edbac47..12f3eed8a946 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -116,7 +116,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index e22e8b825903..31ca93d3acc5 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -161,7 +161,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index b08a199767d1..b8907b3d7a33 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -63,7 +63,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m @@ -255,9 +254,6 @@ CONFIG_I2C_TAOS_EVM=m CONFIG_I2C_STUB=m # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_MFD_PCF50633=m -CONFIG_PCF50633_ADC=m -CONFIG_PCF50633_GPIO=m # CONFIG_VGA_ARB is not set CONFIG_LEDS_LP3944=m CONFIG_LEDS_PCA955X=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index e308b82c094a..869a14b3184f 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -190,7 +190,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index fa5b04063ddb..41e1fea303ea 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -194,7 +194,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index b21f48863d81..1b98f6945c2d 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -64,7 +64,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index ecfa8a396c33..7b8905cb3400 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -66,7 +66,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 5cb4f262a4ea..8249f6a51895 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -67,7 +67,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 5e1498296782..21cb37668763 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -68,7 +68,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index c8594505d676..3df9cd669683 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -65,7 +65,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 40283171af68..13ff1877e26e 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -191,7 +191,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 8e98c0796437..06b7a0b97eca 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -162,7 +162,6 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig index 7c1c1b974d8f..128f9abab7fc 100644 --- a/arch/mips/configs/omega2p_defconfig +++ b/arch/mips/configs/omega2p_defconfig @@ -61,6 +61,7 @@ CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_MMC=y +CONFIG_CLK_MTMIPS=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_MEMORY=y CONFIG_PHY_RALINK_USB=y diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 517f1b060bf4..0261969a6e45 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -77,7 +77,6 @@ CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_PRIO=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 3a6d0384b774..7b5a5591ccc9 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -143,7 +143,6 @@ CONFIG_NET_SCH_GRED=m CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig index 7c8ebb1b56da..917967fed45f 100644 --- a/arch/mips/configs/vocore2_defconfig +++ b/arch/mips/configs/vocore2_defconfig @@ -61,6 +61,7 @@ CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_MMC=y +CONFIG_CLK_MTMIPS=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_MEMORY=y CONFIG_PHY_RALINK_USB=y diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c index cb12eb211a49..8d74d7d6c05b 100644 --- a/arch/mips/dec/prom/init.c +++ b/arch/mips/dec/prom/init.c @@ -42,7 +42,7 @@ int (*__pmax_close)(int); * Detect which PROM the DECSTATION has, and set the callback vectors * appropriately. */ -void __init which_prom(s32 magic, s32 *prom_vec) +static void __init which_prom(s32 magic, s32 *prom_vec) { /* * No sign of the REX PROM's magic number means we assume a non-REX diff --git a/arch/mips/include/asm/ds1287.h b/arch/mips/include/asm/ds1287.h index 46cfb01f9a14..51cb61fd4c03 100644 --- a/arch/mips/include/asm/ds1287.h +++ b/arch/mips/include/asm/ds1287.h @@ -8,7 +8,7 @@ #define __ASM_DS1287_H extern int ds1287_timer_state(void); -extern void ds1287_set_base_clock(unsigned int clock); +extern int ds1287_set_base_clock(unsigned int hz); extern int ds1287_clockevent_init(int irq); #endif diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h index dc025888f6d2..b41fc1044668 100644 --- a/arch/mips/include/asm/ftrace.h +++ b/arch/mips/include/asm/ftrace.h @@ -91,4 +91,20 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ + +#ifdef CONFIG_FTRACE_SYSCALLS +#ifndef __ASSEMBLY__ +/* + * Some syscall entry functions on mips start with "__sys_" (fork and clone, + * for instance). We should also match the sys_ variant with those. + */ +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + return !strcmp(sym, name) || + (!strncmp(sym, "__sys_", 6) && !strcmp(sym + 6, name + 4)); +} +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FTRACE_SYSCALLS */ #endif /* _ASM_MIPS_FTRACE_H */ diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 78c6573f91f2..980187a83053 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -115,7 +115,7 @@ static inline unsigned long isa_virt_to_bus(volatile void *address) } void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, - unsigned long prot_val); + pgprot_t prot); void iounmap(const volatile void __iomem *addr); /* @@ -130,7 +130,7 @@ void iounmap(const volatile void __iomem *addr); * address. */ #define ioremap(offset, size) \ - ioremap_prot((offset), (size), _CACHE_UNCACHED) + ioremap_prot((offset), (size), __pgprot(_CACHE_UNCACHED)) /* * ioremap_cache - map bus memory into CPU space @@ -148,7 +148,7 @@ void iounmap(const volatile void __iomem *addr); * memory-like regions on I/O busses. */ #define ioremap_cache(offset, size) \ - ioremap_prot((offset), (size), _page_cachable_default) + ioremap_prot((offset), (size), __pgprot(_page_cachable_default)) /* * ioremap_wc - map bus memory into CPU space @@ -169,7 +169,7 @@ void iounmap(const volatile void __iomem *addr); * _CACHE_UNCACHED option (see cpu_probe() method). */ #define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), boot_cpu_data.writecombine) + ioremap_prot((offset), (size), __pgprot(boot_cpu_data.writecombine)) #if defined(CONFIG_CPU_CAVIUM_OCTEON) #define war_io_reorder_wmb() wmb() diff --git a/arch/mips/include/asm/mach-rc32434/pci.h b/arch/mips/include/asm/mach-rc32434/pci.h index 3eb767c8a4ee..e17ce82d02ba 100644 --- a/arch/mips/include/asm/mach-rc32434/pci.h +++ b/arch/mips/include/asm/mach-rc32434/pci.h @@ -167,7 +167,7 @@ struct pci_msu { #define PCI_CFGA_DEV 0x0000f800 #define PCI_CFGA_DEV_INTERN 0 #define PCI_CFGA_BUS_BIT 16 -#define PCI CFGA_BUS 0x00ff0000 +#define PCI_CFGA_BUS 0x00ff0000 #define PCI_CFGA_BUS_TYPE0 0 #define PCI_CFGA_EN (1 << 31) diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 23ce951f445b..407f253bb4a1 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -59,6 +59,16 @@ extern phys_addr_t mips_cm_l2sync_phys_base(void); */ extern int mips_cm_is64; +/* + * mips_cm_is_l2_hci_broken - determine if HCI is broken + * + * Some CM reports show that Hardware Cache Initialization is + * complete, but in reality it's not the case. They also incorrectly + * indicate that Hardware Cache Initialization is supported. This + * flags allows warning about this broken feature. + */ +extern bool mips_cm_is_l2_hci_broken; + /** * mips_cm_error_report - Report CM cache errors */ @@ -98,6 +108,18 @@ static inline bool mips_cm_present(void) } /** + * mips_cm_update_property - update property from the device tree + * + * Retrieve the properties from the device tree if a CM node exist and + * update the internal variable based on this. + */ +#ifdef CONFIG_MIPS_CM +extern void mips_cm_update_property(void); +#else +static inline void mips_cm_update_property(void) {} +#endif + +/** * mips_cm_has_l2sync - determine whether an L2-only sync region is present * * Returns true if the system implements an L2-only sync region, else false. @@ -255,6 +277,12 @@ GCR_ACCESSOR_RW(32, 0x130, l2_config) GCR_ACCESSOR_RO(32, 0x150, sys_config2) #define CM_GCR_SYS_CONFIG2_MAXVPW GENMASK(3, 0) +/* GCR_L2-RAM_CONFIG - Configuration & status of L2 cache RAMs */ +GCR_ACCESSOR_RW(64, 0x240, l2_ram_config) +#define CM_GCR_L2_RAM_CONFIG_PRESENT BIT(31) +#define CM_GCR_L2_RAM_CONFIG_HCI_DONE BIT(30) +#define CM_GCR_L2_RAM_CONFIG_HCI_SUPPORTED BIT(29) + /* GCR_L2_PFT_CONTROL - Controls hardware L2 prefetching */ GCR_ACCESSOR_RW(32, 0x300, l2_pft_control) #define CM_GCR_L2_PFT_CONTROL_PAGEMASK GENMASK(31, 12) @@ -266,6 +294,18 @@ GCR_ACCESSOR_RW(32, 0x308, l2_pft_control_b) #define CM_GCR_L2_PFT_CONTROL_B_CEN BIT(8) #define CM_GCR_L2_PFT_CONTROL_B_PORTID GENMASK(7, 0) +/* GCR_L2_TAG_ADDR - Access addresses in L2 cache tags */ +GCR_ACCESSOR_RW(64, 0x600, l2_tag_addr) + +/* GCR_L2_TAG_STATE - Access L2 cache tag state */ +GCR_ACCESSOR_RW(64, 0x608, l2_tag_state) + +/* GCR_L2_DATA - Access data in L2 cache lines */ +GCR_ACCESSOR_RW(64, 0x610, l2_data) + +/* GCR_L2_ECC - Access ECC information from L2 cache lines */ +GCR_ACCESSOR_RW(64, 0x618, l2_ecc) + /* GCR_L2SM_COP - L2 cache op state machine control */ GCR_ACCESSOR_RW(32, 0x620, l2sm_cop) #define CM_GCR_L2SM_COP_PRESENT BIT(31) diff --git a/arch/mips/include/asm/mmzone.h b/arch/mips/include/asm/mmzone.h index 14226ea42036..602a21aee9d4 100644 --- a/arch/mips/include/asm/mmzone.h +++ b/arch/mips/include/asm/mmzone.h @@ -20,6 +20,4 @@ #define nid_to_addrbase(nid) 0 #endif -extern void setup_zero_pages(void); - #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 26c7a6ede983..bbca420c96d3 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -48,11 +48,8 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern void pgd_init(void *addr); extern pgd_t *pgd_alloc(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, address) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ -} while (0) +#define __pte_free_tlb(tlb, pte, address) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #ifndef __PAGETABLE_PMD_FOLDED diff --git a/arch/mips/include/asm/smp-cps.h b/arch/mips/include/asm/smp-cps.h index ab94e50f62b8..10d3ebd890cb 100644 --- a/arch/mips/include/asm/smp-cps.h +++ b/arch/mips/include/asm/smp-cps.h @@ -22,7 +22,12 @@ struct core_boot_config { struct vpe_boot_config *vpe_config; }; -extern struct core_boot_config *mips_cps_core_bootcfg; +struct cluster_boot_config { + unsigned long *core_power; + struct core_boot_config *core_config; +}; + +extern struct cluster_boot_config *mips_cps_cluster_bootcfg; extern void mips_cps_core_boot(int cca, void __iomem *gcr_base); extern void mips_cps_core_init(void); diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index b910ec54a3a1..1e29efcba46e 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -410,6 +410,9 @@ void output_cps_defines(void) { COMMENT(" MIPS CPS offsets. "); + OFFSET(CLUSTERBOOTCFG_CORECONFIG, cluster_boot_config, core_config); + DEFINE(CLUSTERBOOTCFG_SIZE, sizeof(struct cluster_boot_config)); + OFFSET(COREBOOTCFG_VPEMASK, core_boot_config, vpe_mask); OFFSET(COREBOOTCFG_VPECONFIG, core_boot_config, vpe_config); DEFINE(COREBOOTCFG_SIZE, sizeof(struct core_boot_config)); diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c index 9a47fbcd4638..de64d6bb7ba3 100644 --- a/arch/mips/kernel/cevt-ds1287.c +++ b/arch/mips/kernel/cevt-ds1287.c @@ -10,6 +10,7 @@ #include <linux/mc146818rtc.h> #include <linux/irq.h> +#include <asm/ds1287.h> #include <asm/time.h> int ds1287_timer_state(void) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index f876309130ad..2ae7034a3d5c 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -19,6 +19,10 @@ #define GCR_CPC_BASE_OFS 0x0088 #define GCR_CL_COHERENCE_OFS 0x2008 #define GCR_CL_ID_OFS 0x2028 +#define CM3_GCR_Cx_ID_CLUSTER_SHF 8 +#define CM3_GCR_Cx_ID_CLUSTER_MSK (0xff << 8) +#define CM3_GCR_Cx_ID_CORENUM_SHF 0 +#define CM3_GCR_Cx_ID_CORENUM_MSK (0xff << 0) #define CPC_CL_VC_STOP_OFS 0x2020 #define CPC_CL_VC_RUN_OFS 0x2028 @@ -271,12 +275,21 @@ LEAF(mips_cps_core_init) */ LEAF(mips_cps_get_bootcfg) /* Calculate a pointer to this cores struct core_boot_config */ + PTR_LA v0, mips_cps_cluster_bootcfg + PTR_L v0, 0(v0) lw t0, GCR_CL_ID_OFS(s1) +#ifdef CONFIG_CPU_MIPSR6 + ext t1, t0, CM3_GCR_Cx_ID_CLUSTER_SHF, 8 + li t2, CLUSTERBOOTCFG_SIZE + mul t1, t1, t2 + PTR_ADDU \ + v0, v0, t1 +#endif + PTR_L v0, CLUSTERBOOTCFG_CORECONFIG(v0) + andi t0, t0, CM3_GCR_Cx_ID_CORENUM_MSK li t1, COREBOOTCFG_SIZE mul t0, t0, t1 - PTR_LA t1, mips_cps_core_bootcfg - PTR_L t1, 0(t1) - PTR_ADDU v0, t0, t1 + PTR_ADDU v0, v0, t0 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ li t9, 0 diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index 3eb2cfb893e1..43cb1e20baed 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -5,6 +5,7 @@ */ #include <linux/errno.h> +#include <linux/of.h> #include <linux/percpu.h> #include <linux/spinlock.h> @@ -14,6 +15,7 @@ void __iomem *mips_gcr_base; void __iomem *mips_cm_l2sync_base; int mips_cm_is64; +bool mips_cm_is_l2_hci_broken; static char *cm2_tr[8] = { "mem", "gcr", "gic", "mmio", @@ -237,6 +239,18 @@ static void mips_cm_probe_l2sync(void) mips_cm_l2sync_base = ioremap(addr, MIPS_CM_L2SYNC_SIZE); } +void mips_cm_update_property(void) +{ + struct device_node *cm_node; + + cm_node = of_find_compatible_node(of_root, NULL, "mobileye,eyeq6-cm"); + if (!cm_node) + return; + pr_info("HCI (Hardware Cache Init for the L2 cache) in GCR_L2_RAM_CONFIG from the CM3 is broken"); + mips_cm_is_l2_hci_broken = true; + of_node_put(cm_node); +} + int mips_cm_probe(void) { phys_addr_t addr; @@ -308,7 +322,9 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, FIELD_PREP(CM3_GCR_Cx_OTHER_VP, vp); if (cm_rev >= CM_REV_CM3_5) { - val |= CM_GCR_Cx_OTHER_CLUSTER_EN; + if (cluster != cpu_cluster(¤t_cpu_data)) + val |= CM_GCR_Cx_OTHER_CLUSTER_EN; + val |= CM_GCR_Cx_OTHER_GIC_EN; val |= FIELD_PREP(CM_GCR_Cx_OTHER_CLUSTER, cluster); val |= FIELD_PREP(CM_GCR_Cx_OTHER_BLOCK, block); } else { diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index d09ca77e624d..3de0e05e0511 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -57,10 +57,7 @@ static DEFINE_PER_CPU_ALIGNED(u32*, ready_count); /* Indicates online CPUs coupled with the current CPU */ static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled); -/* - * Used to synchronize entry to deep idle states. Actually per-core rather - * than per-CPU. - */ +/* Used to synchronize entry to deep idle states */ static DEFINE_PER_CPU_ALIGNED(atomic_t, pm_barrier); /* Saved CPU state across the CPS_PM_POWER_GATED state */ @@ -104,17 +101,20 @@ static void coupled_barrier(atomic_t *a, unsigned online) int cps_pm_enter_state(enum cps_pm_state state) { unsigned cpu = smp_processor_id(); + unsigned int cluster = cpu_cluster(¤t_cpu_data); unsigned core = cpu_core(¤t_cpu_data); unsigned online, left; cpumask_t *coupled_mask = this_cpu_ptr(&online_coupled); u32 *core_ready_count, *nc_core_ready_count; void *nc_addr; cps_nc_entry_fn entry; + struct cluster_boot_config *cluster_cfg; struct core_boot_config *core_cfg; struct vpe_boot_config *vpe_cfg; + atomic_t *barrier; /* Check that there is an entry function for this state */ - entry = per_cpu(nc_asm_enter, core)[state]; + entry = per_cpu(nc_asm_enter, cpu)[state]; if (!entry) return -EINVAL; @@ -138,7 +138,8 @@ int cps_pm_enter_state(enum cps_pm_state state) if (!mips_cps_smp_in_use()) return -EINVAL; - core_cfg = &mips_cps_core_bootcfg[core]; + cluster_cfg = &mips_cps_cluster_bootcfg[cluster]; + core_cfg = &cluster_cfg->core_config[core]; vpe_cfg = &core_cfg->vpe_config[cpu_vpe_id(¤t_cpu_data)]; vpe_cfg->pc = (unsigned long)mips_cps_pm_restore; vpe_cfg->gp = (unsigned long)current_thread_info(); @@ -150,7 +151,7 @@ int cps_pm_enter_state(enum cps_pm_state state) smp_mb__after_atomic(); /* Create a non-coherent mapping of the core ready_count */ - core_ready_count = per_cpu(ready_count, core); + core_ready_count = per_cpu(ready_count, cpu); nc_addr = kmap_noncoherent(virt_to_page(core_ready_count), (unsigned long)core_ready_count); nc_addr += ((unsigned long)core_ready_count & ~PAGE_MASK); @@ -158,7 +159,8 @@ int cps_pm_enter_state(enum cps_pm_state state) /* Ensure ready_count is zero-initialised before the assembly runs */ WRITE_ONCE(*nc_core_ready_count, 0); - coupled_barrier(&per_cpu(pm_barrier, core), online); + barrier = &per_cpu(pm_barrier, cpumask_first(&cpu_sibling_map[cpu])); + coupled_barrier(barrier, online); /* Run the generated entry code */ left = entry(online, nc_core_ready_count); @@ -629,12 +631,14 @@ out_err: static int cps_pm_online_cpu(unsigned int cpu) { - enum cps_pm_state state; - unsigned core = cpu_core(&cpu_data[cpu]); + unsigned int sibling, core; void *entry_fn, *core_rc; + enum cps_pm_state state; + + core = cpu_core(&cpu_data[cpu]); for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) { - if (per_cpu(nc_asm_enter, core)[state]) + if (per_cpu(nc_asm_enter, cpu)[state]) continue; if (!test_bit(state, state_support)) continue; @@ -646,16 +650,19 @@ static int cps_pm_online_cpu(unsigned int cpu) clear_bit(state, state_support); } - per_cpu(nc_asm_enter, core)[state] = entry_fn; + for_each_cpu(sibling, &cpu_sibling_map[cpu]) + per_cpu(nc_asm_enter, sibling)[state] = entry_fn; } - if (!per_cpu(ready_count, core)) { + if (!per_cpu(ready_count, cpu)) { core_rc = kmalloc(sizeof(u32), GFP_KERNEL); if (!core_rc) { pr_err("Failed allocate core %u ready_count\n", core); return -ENOMEM; } - per_cpu(ready_count, core) = core_rc; + + for_each_cpu(sibling, &cpu_sibling_map[cpu]) + per_cpu(ready_count, sibling) = core_rc; } return 0; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 82c8f9b9573c..e85bd087467e 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -36,11 +36,55 @@ enum label_id { UASM_L_LA(_not_nmi) -static DECLARE_BITMAP(core_power, NR_CPUS); static u64 core_entry_reg; static phys_addr_t cps_vec_pa; -struct core_boot_config *mips_cps_core_bootcfg; +struct cluster_boot_config *mips_cps_cluster_bootcfg; + +static void power_up_other_cluster(unsigned int cluster) +{ + u32 stat, seq_state; + unsigned int timeout; + + mips_cm_lock_other(cluster, CM_GCR_Cx_OTHER_CORE_CM, 0, + CM_GCR_Cx_OTHER_BLOCK_LOCAL); + stat = read_cpc_co_stat_conf(); + mips_cm_unlock_other(); + + seq_state = stat & CPC_Cx_STAT_CONF_SEQSTATE; + seq_state >>= __ffs(CPC_Cx_STAT_CONF_SEQSTATE); + if (seq_state == CPC_Cx_STAT_CONF_SEQSTATE_U5) + return; + + /* Set endianness & power up the CM */ + mips_cm_lock_other(cluster, 0, 0, CM_GCR_Cx_OTHER_BLOCK_GLOBAL); + write_cpc_redir_sys_config(IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)); + write_cpc_redir_pwrup_ctl(1); + mips_cm_unlock_other(); + + /* Wait for the CM to start up */ + timeout = 1000; + mips_cm_lock_other(cluster, CM_GCR_Cx_OTHER_CORE_CM, 0, + CM_GCR_Cx_OTHER_BLOCK_LOCAL); + while (1) { + stat = read_cpc_co_stat_conf(); + seq_state = stat & CPC_Cx_STAT_CONF_SEQSTATE; + seq_state >>= __ffs(CPC_Cx_STAT_CONF_SEQSTATE); + if (seq_state == CPC_Cx_STAT_CONF_SEQSTATE_U5) + break; + + if (timeout) { + mdelay(1); + timeout--; + } else { + pr_warn("Waiting for cluster %u CM to power up... STAT_CONF=0x%x\n", + cluster, stat); + mdelay(1000); + } + } + + mips_cm_unlock_other(); +} static unsigned __init core_vpe_count(unsigned int cluster, unsigned core) { @@ -178,6 +222,9 @@ static void __init cps_smp_setup(void) pr_cont(","); pr_cont("{"); + if (mips_cm_revision() >= CM_REV_CM3_5) + power_up_other_cluster(cl); + ncores = mips_cps_numcores(cl); for (c = 0; c < ncores; c++) { core_vpes = core_vpe_count(cl, c); @@ -205,8 +252,8 @@ static void __init cps_smp_setup(void) /* Indicate present CPUs (CPU being synonymous with VPE) */ for (v = 0; v < min_t(unsigned, nvpes, NR_CPUS); v++) { - set_cpu_possible(v, cpu_cluster(&cpu_data[v]) == 0); - set_cpu_present(v, cpu_cluster(&cpu_data[v]) == 0); + set_cpu_possible(v, true); + set_cpu_present(v, true); __cpu_number_map[v] = v; __cpu_logical_map[v] = v; } @@ -214,9 +261,6 @@ static void __init cps_smp_setup(void) /* Set a coherent default CCA (CWB) */ change_c0_config(CONF_CM_CMASK, 0x5); - /* Core 0 is powered up (we're running on it) */ - bitmap_set(core_power, 0, 1); - /* Initialise core 0 */ mips_cps_core_init(); @@ -238,8 +282,10 @@ static void __init cps_smp_setup(void) static void __init cps_prepare_cpus(unsigned int max_cpus) { - unsigned ncores, core_vpes, c, cca; + unsigned int nclusters, ncores, core_vpes, c, cl, cca; bool cca_unsuitable, cores_limited; + struct cluster_boot_config *cluster_bootcfg; + struct core_boot_config *core_bootcfg; mips_mt_set_cpuoptions(); @@ -281,40 +327,62 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) setup_cps_vecs(); - /* Allocate core boot configuration structs */ - ncores = mips_cps_numcores(0); - mips_cps_core_bootcfg = kcalloc(ncores, sizeof(*mips_cps_core_bootcfg), + /* Allocate cluster boot configuration structs */ + nclusters = mips_cps_numclusters(); + mips_cps_cluster_bootcfg = kcalloc(nclusters, + sizeof(*mips_cps_cluster_bootcfg), + GFP_KERNEL); + + if (nclusters > 1) + mips_cm_update_property(); + + for (cl = 0; cl < nclusters; cl++) { + /* Allocate core boot configuration structs */ + ncores = mips_cps_numcores(cl); + core_bootcfg = kcalloc(ncores, sizeof(*core_bootcfg), GFP_KERNEL); - if (!mips_cps_core_bootcfg) { - pr_err("Failed to allocate boot config for %u cores\n", ncores); - goto err_out; - } + if (!core_bootcfg) + goto err_out; + mips_cps_cluster_bootcfg[cl].core_config = core_bootcfg; - /* Allocate VPE boot configuration structs */ - for (c = 0; c < ncores; c++) { - core_vpes = core_vpe_count(0, c); - mips_cps_core_bootcfg[c].vpe_config = kcalloc(core_vpes, - sizeof(*mips_cps_core_bootcfg[c].vpe_config), + mips_cps_cluster_bootcfg[cl].core_power = + kcalloc(BITS_TO_LONGS(ncores), sizeof(unsigned long), GFP_KERNEL); - if (!mips_cps_core_bootcfg[c].vpe_config) { - pr_err("Failed to allocate %u VPE boot configs\n", - core_vpes); - goto err_out; + + /* Allocate VPE boot configuration structs */ + for (c = 0; c < ncores; c++) { + core_vpes = core_vpe_count(cl, c); + core_bootcfg[c].vpe_config = kcalloc(core_vpes, + sizeof(*core_bootcfg[c].vpe_config), + GFP_KERNEL); + if (!core_bootcfg[c].vpe_config) + goto err_out; } } - /* Mark this CPU as booted */ - atomic_set(&mips_cps_core_bootcfg[cpu_core(¤t_cpu_data)].vpe_mask, - 1 << cpu_vpe_id(¤t_cpu_data)); + /* Mark this CPU as powered up & booted */ + cl = cpu_cluster(¤t_cpu_data); + c = cpu_core(¤t_cpu_data); + cluster_bootcfg = &mips_cps_cluster_bootcfg[cl]; + core_bootcfg = &cluster_bootcfg->core_config[c]; + bitmap_set(cluster_bootcfg->core_power, cpu_core(¤t_cpu_data), 1); + atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(¤t_cpu_data)); return; err_out: /* Clean up allocations */ - if (mips_cps_core_bootcfg) { - for (c = 0; c < ncores; c++) - kfree(mips_cps_core_bootcfg[c].vpe_config); - kfree(mips_cps_core_bootcfg); - mips_cps_core_bootcfg = NULL; + if (mips_cps_cluster_bootcfg) { + for (cl = 0; cl < nclusters; cl++) { + cluster_bootcfg = &mips_cps_cluster_bootcfg[cl]; + ncores = mips_cps_numcores(cl); + for (c = 0; c < ncores; c++) { + core_bootcfg = &cluster_bootcfg->core_config[c]; + kfree(core_bootcfg->vpe_config); + } + kfree(mips_cps_cluster_bootcfg[c].core_config); + } + kfree(mips_cps_cluster_bootcfg); + mips_cps_cluster_bootcfg = NULL; } /* Effectively disable SMP by declaring CPUs not present */ @@ -325,13 +393,118 @@ err_out: } } -static void boot_core(unsigned int core, unsigned int vpe_id) +static void init_cluster_l2(void) { - u32 stat, seq_state; - unsigned timeout; + u32 l2_cfg, l2sm_cop, result; + + while (!mips_cm_is_l2_hci_broken) { + l2_cfg = read_gcr_redir_l2_ram_config(); + + /* If HCI is not supported, use the state machine below */ + if (!(l2_cfg & CM_GCR_L2_RAM_CONFIG_PRESENT)) + break; + if (!(l2_cfg & CM_GCR_L2_RAM_CONFIG_HCI_SUPPORTED)) + break; + + /* If the HCI_DONE bit is set, we're finished */ + if (l2_cfg & CM_GCR_L2_RAM_CONFIG_HCI_DONE) + return; + } + + l2sm_cop = read_gcr_redir_l2sm_cop(); + if (WARN(!(l2sm_cop & CM_GCR_L2SM_COP_PRESENT), + "L2 init not supported on this system yet")) + return; + + /* Clear L2 tag registers */ + write_gcr_redir_l2_tag_state(0); + write_gcr_redir_l2_ecc(0); + + /* Ensure the L2 tag writes complete before the state machine starts */ + mb(); + + /* Wait for the L2 state machine to be idle */ + do { + l2sm_cop = read_gcr_redir_l2sm_cop(); + } while (l2sm_cop & CM_GCR_L2SM_COP_RUNNING); + + /* Start a store tag operation */ + l2sm_cop = CM_GCR_L2SM_COP_TYPE_IDX_STORETAG; + l2sm_cop <<= __ffs(CM_GCR_L2SM_COP_TYPE); + l2sm_cop |= CM_GCR_L2SM_COP_CMD_START; + write_gcr_redir_l2sm_cop(l2sm_cop); + + /* Ensure the state machine starts before we poll for completion */ + mb(); + + /* Wait for the operation to be complete */ + do { + l2sm_cop = read_gcr_redir_l2sm_cop(); + result = l2sm_cop & CM_GCR_L2SM_COP_RESULT; + result >>= __ffs(CM_GCR_L2SM_COP_RESULT); + } while (!result); + + WARN(result != CM_GCR_L2SM_COP_RESULT_DONE_OK, + "L2 state machine failed cache init with error %u\n", result); +} + +static void boot_core(unsigned int cluster, unsigned int core, + unsigned int vpe_id) +{ + struct cluster_boot_config *cluster_cfg; + u32 access, stat, seq_state; + unsigned int timeout, ncores; + + cluster_cfg = &mips_cps_cluster_bootcfg[cluster]; + ncores = mips_cps_numcores(cluster); + + if ((cluster != cpu_cluster(¤t_cpu_data)) && + bitmap_empty(cluster_cfg->core_power, ncores)) { + power_up_other_cluster(cluster); + + mips_cm_lock_other(cluster, core, 0, + CM_GCR_Cx_OTHER_BLOCK_GLOBAL); + + /* Ensure cluster GCRs are where we expect */ + write_gcr_redir_base(read_gcr_base()); + write_gcr_redir_cpc_base(read_gcr_cpc_base()); + write_gcr_redir_gic_base(read_gcr_gic_base()); + + init_cluster_l2(); + + /* Mirror L2 configuration */ + write_gcr_redir_l2_only_sync_base(read_gcr_l2_only_sync_base()); + write_gcr_redir_l2_pft_control(read_gcr_l2_pft_control()); + write_gcr_redir_l2_pft_control_b(read_gcr_l2_pft_control_b()); + + /* Mirror ECC/parity setup */ + write_gcr_redir_err_control(read_gcr_err_control()); + + /* Set BEV base */ + write_gcr_redir_bev_base(core_entry_reg); + + mips_cm_unlock_other(); + } + + if (cluster != cpu_cluster(¤t_cpu_data)) { + mips_cm_lock_other(cluster, core, 0, + CM_GCR_Cx_OTHER_BLOCK_GLOBAL); + + /* Ensure the core can access the GCRs */ + access = read_gcr_redir_access(); + access |= BIT(core); + write_gcr_redir_access(access); + + mips_cm_unlock_other(); + } else { + /* Ensure the core can access the GCRs */ + access = read_gcr_access(); + access |= BIT(core); + write_gcr_access(access); + } /* Select the appropriate core */ - mips_cm_lock_other(0, core, 0, CM_GCR_Cx_OTHER_BLOCK_LOCAL); + mips_cm_lock_other(cluster, core, 0, CM_GCR_Cx_OTHER_BLOCK_LOCAL); /* Set its reset vector */ if (mips_cm_is64) @@ -400,30 +573,42 @@ static void boot_core(unsigned int core, unsigned int vpe_id) mips_cm_unlock_other(); /* The core is now powered up */ - bitmap_set(core_power, core, 1); + bitmap_set(cluster_cfg->core_power, core, 1); + + /* + * Restore CM_PWRUP=0 so that the CM can power down if all the cores in + * the cluster do (eg. if they're all removed via hotplug. + */ + if (mips_cm_revision() >= CM_REV_CM3_5) { + mips_cm_lock_other(cluster, 0, 0, CM_GCR_Cx_OTHER_BLOCK_GLOBAL); + write_cpc_redir_pwrup_ctl(0); + mips_cm_unlock_other(); + } } static void remote_vpe_boot(void *dummy) { + unsigned int cluster = cpu_cluster(¤t_cpu_data); unsigned core = cpu_core(¤t_cpu_data); - struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core]; + struct cluster_boot_config *cluster_cfg = + &mips_cps_cluster_bootcfg[cluster]; + struct core_boot_config *core_cfg = &cluster_cfg->core_config[core]; mips_cps_boot_vpes(core_cfg, cpu_vpe_id(¤t_cpu_data)); } static int cps_boot_secondary(int cpu, struct task_struct *idle) { + unsigned int cluster = cpu_cluster(&cpu_data[cpu]); unsigned core = cpu_core(&cpu_data[cpu]); unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]); - struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core]; + struct cluster_boot_config *cluster_cfg = + &mips_cps_cluster_bootcfg[cluster]; + struct core_boot_config *core_cfg = &cluster_cfg->core_config[core]; struct vpe_boot_config *vpe_cfg = &core_cfg->vpe_config[vpe_id]; unsigned int remote; int err; - /* We don't yet support booting CPUs in other clusters */ - if (cpu_cluster(&cpu_data[cpu]) != cpu_cluster(&raw_current_cpu_data)) - return -ENOSYS; - vpe_cfg->pc = (unsigned long)&smp_bootstrap; vpe_cfg->sp = __KSTK_TOS(idle); vpe_cfg->gp = (unsigned long)task_thread_info(idle); @@ -432,14 +617,15 @@ static int cps_boot_secondary(int cpu, struct task_struct *idle) preempt_disable(); - if (!test_bit(core, core_power)) { + if (!test_bit(core, cluster_cfg->core_power)) { /* Boot a VPE on a powered down core */ - boot_core(core, vpe_id); + boot_core(cluster, core, vpe_id); goto out; } if (cpu_has_vp) { - mips_cm_lock_other(0, core, vpe_id, CM_GCR_Cx_OTHER_BLOCK_LOCAL); + mips_cm_lock_other(cluster, core, vpe_id, + CM_GCR_Cx_OTHER_BLOCK_LOCAL); if (mips_cm_is64) write_gcr_co_reset64_base(core_entry_reg); else @@ -576,12 +762,14 @@ static void cps_kexec_nonboot_cpu(void) static int cps_cpu_disable(void) { unsigned cpu = smp_processor_id(); + struct cluster_boot_config *cluster_cfg; struct core_boot_config *core_cfg; if (!cps_pm_support_state(CPS_PM_POWER_GATED)) return -EINVAL; - core_cfg = &mips_cps_core_bootcfg[cpu_core(¤t_cpu_data)]; + cluster_cfg = &mips_cps_cluster_bootcfg[cpu_cluster(¤t_cpu_data)]; + core_cfg = &cluster_cfg->core_config[cpu_core(¤t_cpu_data)]; atomic_sub(1 << cpu_vpe_id(¤t_cpu_data), &core_cfg->vpe_mask); smp_mb__after_atomic(); set_cpu_online(cpu, false); @@ -647,11 +835,15 @@ static void cps_cpu_die(unsigned int cpu) { } static void cps_cleanup_dead_cpu(unsigned cpu) { + unsigned int cluster = cpu_cluster(&cpu_data[cpu]); unsigned core = cpu_core(&cpu_data[cpu]); unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]); ktime_t fail_time; unsigned stat; int err; + struct cluster_boot_config *cluster_cfg; + + cluster_cfg = &mips_cps_cluster_bootcfg[cluster]; /* * Now wait for the CPU to actually offline. Without doing this that @@ -703,7 +895,7 @@ static void cps_cleanup_dead_cpu(unsigned cpu) } while (1); /* Indicate the core is powered off */ - bitmap_clear(core_power, core, 1); + bitmap_clear(cluster_cfg->core_power, core, 1); } else if (cpu_has_mipsmt) { /* * Have a CPU with access to the offlined CPUs registers wait diff --git a/arch/mips/loongson2ef/common/machtype.c b/arch/mips/loongson2ef/common/machtype.c index 82f6de49f20f..e635e66d2e6c 100644 --- a/arch/mips/loongson2ef/common/machtype.c +++ b/arch/mips/loongson2ef/common/machtype.c @@ -48,8 +48,7 @@ void __init prom_init_machtype(void) return; } p += strlen("machtype="); - strncpy(str, p, MACHTYPE_LEN); - str[MACHTYPE_LEN] = '\0'; + strscpy(str, p); p = strstr(str, " "); if (p) *p = '\0'; diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 8388400d052f..95d5f553ce19 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -164,13 +164,6 @@ void __init paging_init(void) free_area_init(zones_size); } -void __init mem_init(void) -{ - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ -} - /* All PCI device belongs to logical Node-0 */ int pcibus_to_node(struct pci_bus *bus) { diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 4583d1a2a73e..a673d3d68254 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -59,24 +59,16 @@ EXPORT_SYMBOL(zero_page_mask); /* * Not static inline because used by IP27 special magic initialization code */ -void setup_zero_pages(void) +static void __init setup_zero_pages(void) { - unsigned int order, i; - struct page *page; + unsigned int order; if (cpu_has_vce) order = 3; else order = 0; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Oh boy, that early out of memory?"); - - page = virt_to_page((void *)empty_zero_page); - split_page(page, order); - for (i = 0; i < (1 << order); i++, page++) - mark_page_reserved(page); + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -423,17 +415,8 @@ void __init paging_init(void) " %ldk highmem ignored\n", (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; - - max_mapnr = max_low_pfn; - } else if (highend_pfn) { - max_mapnr = highend_pfn; - } else { - max_mapnr = max_low_pfn; } -#else - max_mapnr = max_low_pfn; #endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); free_area_init(max_zone_pfns); } @@ -442,26 +425,7 @@ void __init paging_init(void) static struct kcore_list kcore_kseg0; #endif -static inline void __init mem_init_free_highmem(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long tmp; - - if (cpu_has_dc_aliases) - return; - - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - if (!memblock_is_memory(PFN_PHYS(tmp))) - SetPageReserved(page); - else - free_highmem_page(page); - } -#endif -} - -void __init mem_init(void) +void __init arch_mm_preinit(void) { /* * When PFN_PTE_SHIFT is greater than PAGE_SHIFT we won't have enough PTE @@ -470,9 +434,7 @@ void __init mem_init(void) BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT)); maar_init(); - memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ - mem_init_free_highmem(); #ifdef CONFIG_64BIT if ((unsigned long) &_text > (unsigned long) CKSEG0) @@ -482,6 +444,11 @@ void __init mem_init(void) 0x80000000 - 4, KCORE_TEXT); #endif } +#else /* CONFIG_NUMA */ +void __init arch_mm_preinit(void) +{ + setup_zero_pages(); /* This comes from node 0 */ +} #endif /* !CONFIG_NUMA */ void free_init_pages(const char *what, unsigned long begin, unsigned long end) diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index d8243d61ef32..c6c4576cd4a8 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -44,9 +44,9 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, * ioremap_prot gives the caller control over cache coherency attributes (CCA) */ void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { - unsigned long flags = prot_val & _CACHE_MASK; + unsigned long flags = pgprot_val(prot) & _CACHE_MASK; unsigned long offset, pfn, last_pfn; struct vm_struct *area; phys_addr_t last_addr; diff --git a/arch/mips/mm/ioremap64.c b/arch/mips/mm/ioremap64.c index 15e7820d6a5f..acc03ba20098 100644 --- a/arch/mips/mm/ioremap64.c +++ b/arch/mips/mm/ioremap64.c @@ -3,9 +3,9 @@ #include <ioremap.h> void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { - unsigned long flags = prot_val & _CACHE_MASK; + unsigned long flags = pgprot_val(prot) & _CACHE_MASK; u64 base = (flags == _CACHE_UNCACHED ? IO_BASE : UNCAC_BASE); void __iomem *addr; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 1963313f55d8..2b3e46e2e607 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -406,8 +406,6 @@ void __init prom_meminit(void) } } -extern void setup_zero_pages(void); - void __init paging_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, }; @@ -416,10 +414,3 @@ void __init paging_init(void) zones_size[ZONE_NORMAL] = max_low_pfn; free_area_init(zones_size); } - -void __init mem_init(void) -{ - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ -} diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c index 42fdb939c88d..03cb69937258 100644 --- a/arch/mips/sni/setup.c +++ b/arch/mips/sni/setup.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/console.h> -#include <linux/fb.h> #include <linux/screen_info.h> #ifdef CONFIG_FW_ARC diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 12a536b7bfbd..db122b093a8b 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -28,10 +28,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, extern pgd_t *pgd_alloc(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, addr) \ - do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ - } while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #endif /* _ASM_NIOS2_PGALLOC_H */ diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index da122a5fa43b..2a40150142c3 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -149,6 +149,8 @@ void __init setup_arch(char **cmdline_p) memory_start = memblock_start_of_DRAM(); memory_end = memblock_end_of_DRAM(); + pr_debug("%s: start=%lx, end=%lx\n", __func__, memory_start, memory_end); + setup_initial_init_mm(_stext, _etext, _edata, _end); init_task.thread.kregs = &fake_regs; @@ -156,7 +158,6 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; find_limits(&min_low_pfn, &max_low_pfn, &max_pfn); - max_mapnr = max_low_pfn; memblock_reserve(__pa_symbol(_stext), _end - _stext); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index a2278485de19..94efa3de3933 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -51,7 +51,7 @@ void __init paging_init(void) pagetable_init(); pgd_current = swapper_pg_dir; - max_zone_pfn[ZONE_NORMAL] = max_mapnr; + max_zone_pfn[ZONE_NORMAL] = max_low_pfn; /* pass the memory from the bootmem allocator to the main allocator */ free_area_init(max_zone_pfn); @@ -60,20 +60,6 @@ void __init paging_init(void) (unsigned long)empty_zero_page + PAGE_SIZE); } -void __init mem_init(void) -{ - unsigned long end_mem = memory_end; /* this must not include - kernel stack at top */ - - pr_debug("mem_init: start=%lx, end=%lx\n", memory_start, memory_end); - - end_mem &= PAGE_MASK; - high_memory = __va(end_mem); - - /* this will put all memory onto the freelists */ - memblock_free_all(); -} - void __init mmu_init(void) { flush_tlb_all(); diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 3372f4e6ab4b..3f110931d8f6 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -64,10 +64,7 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm) extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ -} while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #endif diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index d0cb1a0126f9..be1c2eb8bb94 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -193,15 +193,9 @@ void __init mem_init(void) { BUG_ON(!mem_map); - max_mapnr = max_low_pfn; - high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); - /* clear the zero-page */ memset((void *)empty_zero_page, 0, PAGE_SIZE); - /* this will put all low memory onto the freelists */ - memblock_free_all(); - printk("mem_init_done ...........................................\n"); mem_init_done = 1; return; diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h index 59d85d2386bb..f01ad3ad60b5 100644 --- a/arch/parisc/include/asm/io.h +++ b/arch/parisc/include/asm/io.h @@ -131,13 +131,10 @@ static inline void gsc_writeq(unsigned long long val, unsigned long addr) _PAGE_ACCESSED | _PAGE_NO_CACHE) #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), _PAGE_IOREMAP) + ioremap_prot((addr), (size), __pgprot(_PAGE_IOREMAP)) #define pci_iounmap pci_iounmap -void memcpy_fromio(void *dst, const volatile void __iomem *src, int count); -#define memcpy_fromio memcpy_fromio - /* Port-space IO */ #define inb_p inb diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 1c366b0d3134..509146a52725 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -43,7 +43,6 @@ EXPORT_SYMBOL($global$); #endif #include <asm/io.h> -EXPORT_SYMBOL(memcpy_fromio); extern void $$divI(void); extern void $$divU(void); diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index 0f9b3b5914cf..b70b67adb855 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -63,6 +63,7 @@ static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss; #define PDT_ADDR_PERM_ERR (pdt_type != PDT_PDC ? 2UL : 0UL) #define PDT_ADDR_SINGLE_ERR 1UL +#ifdef CONFIG_PROC_FS /* report PDT entries via /proc/meminfo */ void arch_report_meminfo(struct seq_file *m) { @@ -74,6 +75,7 @@ void arch_report_meminfo(struct seq_file *m) seq_printf(m, "PDT_cur_entries: %7lu\n", pdt_status.pdt_entries); } +#endif static int get_info_pat_new(void) { diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index 5e8e37a722ef..5e10f98ce7b5 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -475,9 +475,9 @@ static const struct file_operations perf_fops = { }; static struct miscdevice perf_dev = { - MISC_DYNAMIC_MINOR, - PA_PERF_DEV, - &perf_fops + .minor = MISC_DYNAMIC_MINOR, + .name = PA_PERF_DEV, + .fops = &perf_fops, }; /* diff --git a/arch/parisc/lib/io.c b/arch/parisc/lib/io.c index 7461366a65c9..3c7e617f5a93 100644 --- a/arch/parisc/lib/io.c +++ b/arch/parisc/lib/io.c @@ -13,67 +13,6 @@ #include <asm/io.h> /* -** Copies a block of memory from a device in an efficient manner. -** Assumes the device can cope with 32-bit transfers. If it can't, -** don't use this function. -** -** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM: -** 27341/64 = 427 cyc per int -** 61311/128 = 478 cyc per short -** 122637/256 = 479 cyc per byte -** Ergo bus latencies dominant (not transfer size). -** Minimize total number of transfers at cost of CPU cycles. -** TODO: only look at src alignment and adjust the stores to dest. -*/ -void memcpy_fromio(void *dst, const volatile void __iomem *src, int count) -{ - /* first compare alignment of src/dst */ - if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) ) - goto bytecopy; - - if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) ) - goto shortcopy; - - /* Then check for misaligned start address */ - if ((unsigned long)src & 1) { - *(u8 *)dst = readb(src); - src++; - dst++; - count--; - if (count < 2) goto bytecopy; - } - - if ((unsigned long)src & 2) { - *(u16 *)dst = __raw_readw(src); - src += 2; - dst += 2; - count -= 2; - } - - while (count > 3) { - *(u32 *)dst = __raw_readl(src); - dst += 4; - src += 4; - count -= 4; - } - - shortcopy: - while (count > 1) { - *(u16 *)dst = __raw_readw(src); - src += 2; - dst += 2; - count -= 2; - } - - bytecopy: - while (count--) { - *(char *)dst = readb(src); - src++; - dst++; - } -} - -/* * Read COUNT 8-bit bytes from port PORT into memory starting at * SRC. */ @@ -123,15 +62,15 @@ void insw (unsigned long port, void *dst, unsigned long count) unsigned char *p; p = (unsigned char *)dst; - + if (!count) return; - + switch (((unsigned long)p) & 0x3) { case 0x00: /* Buffer 32-bit aligned */ while (count>=2) { - + count -= 2; l = cpu_to_le16(inw(port)) << 16; l |= cpu_to_le16(inw(port)); @@ -142,13 +81,13 @@ void insw (unsigned long port, void *dst, unsigned long count) *(unsigned short *)p = cpu_to_le16(inw(port)); } break; - + case 0x02: /* Buffer 16-bit aligned */ *(unsigned short *)p = cpu_to_le16(inw(port)); p += 2; count--; while (count>=2) { - + count -= 2; l = cpu_to_le16(inw(port)) << 16; l |= cpu_to_le16(inw(port)); @@ -159,13 +98,13 @@ void insw (unsigned long port, void *dst, unsigned long count) *(unsigned short *)p = cpu_to_le16(inw(port)); } break; - + case 0x01: /* Buffer 8-bit aligned */ case 0x03: /* I don't bother with 32bit transfers * in this case, 16bit will have to do -- DE */ --count; - + l = cpu_to_le16(inw(port)); *p = l >> 8; p++; @@ -195,10 +134,10 @@ void insl (unsigned long port, void *dst, unsigned long count) unsigned char *p; p = (unsigned char *)dst; - + if (!count) return; - + switch (((unsigned long) dst) & 0x3) { case 0x00: /* Buffer 32-bit aligned */ @@ -208,14 +147,14 @@ void insl (unsigned long port, void *dst, unsigned long count) p += 4; } break; - + case 0x02: /* Buffer 16-bit aligned */ --count; - + l = cpu_to_le32(inl(port)); *(unsigned short *)p = l >> 16; p += 2; - + while (count--) { l2 = cpu_to_le32(inl(port)); @@ -227,7 +166,7 @@ void insl (unsigned long port, void *dst, unsigned long count) break; case 0x01: /* Buffer 8-bit aligned */ --count; - + l = cpu_to_le32(inl(port)); *(unsigned char *)p = l >> 24; p++; @@ -244,7 +183,7 @@ void insl (unsigned long port, void *dst, unsigned long count) break; case 0x03: /* Buffer 8-bit aligned */ --count; - + l = cpu_to_le32(inl(port)); *p = l >> 24; p++; @@ -293,10 +232,10 @@ void outsw (unsigned long port, const void *src, unsigned long count) const unsigned char *p; p = (const unsigned char *)src; - + if (!count) return; - + switch (((unsigned long)p) & 0x3) { case 0x00: /* Buffer 32-bit aligned */ @@ -311,13 +250,13 @@ void outsw (unsigned long port, const void *src, unsigned long count) outw(le16_to_cpu(*(unsigned short*)p), port); } break; - + case 0x02: /* Buffer 16-bit aligned */ - + outw(le16_to_cpu(*(unsigned short*)p), port); p += 2; count--; - + while (count>=2) { count -= 2; l = *(unsigned int *)p; @@ -329,11 +268,11 @@ void outsw (unsigned long port, const void *src, unsigned long count) outw(le16_to_cpu(*(unsigned short *)p), port); } break; - - case 0x01: /* Buffer 8-bit aligned */ + + case 0x01: /* Buffer 8-bit aligned */ /* I don't bother with 32bit transfers * in this case, 16bit will have to do -- DE */ - + l = *p << 8; p++; count--; @@ -348,7 +287,7 @@ void outsw (unsigned long port, const void *src, unsigned long count) l2 = *(unsigned char *)p; outw (le16_to_cpu(l | l2>>8), port); break; - + } } @@ -365,10 +304,10 @@ void outsl (unsigned long port, const void *src, unsigned long count) const unsigned char *p; p = (const unsigned char *)src; - + if (!count) return; - + switch (((unsigned long)p) & 0x3) { case 0x00: /* Buffer 32-bit aligned */ @@ -378,13 +317,13 @@ void outsl (unsigned long port, const void *src, unsigned long count) p += 4; } break; - + case 0x02: /* Buffer 16-bit aligned */ --count; - + l = *(unsigned short *)p; p += 2; - + while (count--) { l2 = *(unsigned int *)p; @@ -415,7 +354,7 @@ void outsl (unsigned long port, const void *src, unsigned long count) break; case 0x03: /* Buffer 8-bit aligned */ --count; - + l = *p << 24; p++; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 61c0a2477072..14270715d754 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -562,10 +562,6 @@ void __init mem_init(void) BUILD_BUG_ON(TMPALIAS_MAP_START >= 0x80000000); #endif - high_memory = __va((max_pfn << PAGE_SHIFT)); - set_max_mapnr(max_low_pfn); - memblock_free_all(); - #ifdef CONFIG_PA11 if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) { pcxl_dma_start = (unsigned long)SET_MAP_OFFSET(MAP_START); diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c index fd996472dfe7..0b65c4b3baee 100644 --- a/arch/parisc/mm/ioremap.c +++ b/arch/parisc/mm/ioremap.c @@ -14,7 +14,7 @@ #include <linux/mm.h> void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { #ifdef CONFIG_EISA unsigned long end = phys_addr + size - 1; @@ -41,6 +41,6 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, } } - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b5630f8ad436..6722625a406a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -148,6 +148,7 @@ config PPC select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API select ARCH_HAS_PREEMPT_LAZY + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 @@ -207,7 +208,6 @@ config PPC select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP if PCI - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_VDSO_DATA_STORE @@ -716,6 +716,9 @@ config ARCH_SUPPORTS_CRASH_HOTPLUG def_bool y depends on PPC64 +config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + def_bool CRASH_RESERVE + config FA_DUMP bool "Firmware-assisted dump" depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV) diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 89da51d724fb..9bc2758a6a9a 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -77,4 +77,4 @@ CONFIG_DEBUG_VM_PGTABLE=y CONFIG_DETECT_HUNG_TASK=y CONFIG_BDI_SWITCH=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_GENERIC_PTDUMP=y +CONFIG_PTDUMP_DEBUGFS=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 1eb446452fc0..3086c4a12d6d 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -78,7 +78,6 @@ CONFIG_VIRTIO_BLK=m CONFIG_BLK_DEV_NVME=m CONFIG_NVME_MULTIPATH=y CONFIG_EEPROM_AT24=m -# CONFIG_CXL is not set # CONFIG_OCXL is not set CONFIG_BLK_DEV_SD=m CONFIG_BLK_DEV_SR=m diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index f0bba9c5f9c3..bb786694dd26 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -94,4 +94,10 @@ static inline int check_and_get_huge_psize(int shift) return mmu_psize; } +#define arch_has_huge_bootmem_alloc arch_has_huge_bootmem_alloc + +static inline bool arch_has_huge_bootmem_alloc(void) +{ + return (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()); +} #endif diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h index fd2e166ea02a..81bd176203ab 100644 --- a/arch/powerpc/include/asm/copro.h +++ b/arch/powerpc/include/asm/copro.h @@ -18,10 +18,4 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb); - -#ifdef CONFIG_PPC_COPRO_BASE -void copro_flush_all_slbs(struct mm_struct *mm); -#else -static inline void copro_flush_all_slbs(struct mm_struct *mm) {} -#endif #endif /* _ASM_POWERPC_COPRO_H */ diff --git a/arch/powerpc/include/asm/crash_reserve.h b/arch/powerpc/include/asm/crash_reserve.h new file mode 100644 index 000000000000..6467ce29b1fa --- /dev/null +++ b/arch/powerpc/include/asm/crash_reserve.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_CRASH_RESERVE_H +#define _ASM_POWERPC_CRASH_RESERVE_H + +/* crash kernel regions are Page size agliged */ +#define CRASH_ALIGN PAGE_SIZE + +#endif /* _ASM_POWERPC_CRASH_RESERVE_H */ diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 47ed639f3b8f..a4dc27655b3e 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -38,9 +38,6 @@ struct dev_archdata { #ifdef CONFIG_FAIL_IOMMU int fail_iommu; #endif -#ifdef CONFIG_CXL_BASE - struct cxl_context *cxl_ctx; -#endif #ifdef CONFIG_PCI_IOV void *iov_data; #endif diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 492e8855e00f..7a89754842d6 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -826,7 +826,7 @@ void __iomem *ioremap_wt(phys_addr_t address, unsigned long size); void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); #define ioremap_cache(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL)) + ioremap_prot((addr), (size), PAGE_KERNEL) #define iounmap iounmap diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 601e569303e1..70f2f0517509 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -94,8 +94,10 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long int arch_kimage_file_post_load_cleanup(struct kimage *image); #define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup -int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); -#define arch_kexec_locate_mem_hole arch_kexec_locate_mem_hole +int arch_check_excluded_range(struct kimage *image, unsigned long start, + unsigned long end); +#define arch_check_excluded_range arch_check_excluded_range + int load_crashdump_segments_ppc64(struct kimage *image, struct kexec_buf *kbuf); @@ -112,9 +114,9 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, struct crash_mem #ifdef CONFIG_CRASH_RESERVE int __init overlaps_crashkernel(unsigned long start, unsigned long size); -extern void reserve_crashkernel(void); +extern void arch_reserve_crashkernel(void); #else -static inline void reserve_crashkernel(void) {} +static inline void arch_reserve_crashkernel(void) {} static inline int overlaps_crashkernel(unsigned long start, unsigned long size) { return 0; } #endif diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 8afc92860dbb..7e9a479951a3 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -10,7 +10,6 @@ #include <linux/pci_hotplug.h> #include <linux/irq.h> #include <linux/of.h> -#include <misc/cxl-base.h> #include <asm/opal-api.h> #define PCI_SLOT_ID_PREFIX (1UL << 63) @@ -25,25 +24,9 @@ extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state); extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg); -extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr, - int enable); -int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); -int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, - unsigned int virq); -int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); -void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); -int pnv_cxl_get_irq_count(struct pci_dev *dev); -struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev); int64_t pnv_opal_pci_msi_eoi(struct irq_data *d); bool is_pnv_opal_msi(struct irq_chip *chip); -#ifdef CONFIG_CXL_BASE -int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev, int num); -void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev); -#endif - struct pnv_php_slot { struct hotplug_slot slot; uint64_t id; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index e0059842a1c6..9ed9dde7d231 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -860,7 +860,7 @@ void __init early_init_devtree(void *params) */ if (fadump_reserve_mem() == 0) #endif - reserve_crashkernel(); + arch_reserve_crashkernel(); early_reserve_mem(); if (memory_limit > memblock_phys_mem_size()) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index fbeb1cbac01b..afb690a172b4 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -52,7 +52,7 @@ static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, } static ssize_t data_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { char *data; @@ -85,7 +85,7 @@ data_fail: } static ssize_t update_write(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { int rc; @@ -104,11 +104,11 @@ static struct kobj_attribute format_attr = __ATTR_RO(format); static struct kobj_attribute size_attr = __ATTR_RO(size); -static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); +static struct bin_attribute data_attr __ro_after_init = __BIN_ATTR_RO(data, 0); -static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); +static struct bin_attribute update_attr __ro_after_init = __BIN_ATTR_WO(update, 0); -static struct bin_attribute *secvar_bin_attrs[] = { +static const struct bin_attribute *const secvar_bin_attrs[] = { &data_attr, &update_attr, NULL, @@ -121,7 +121,7 @@ static struct attribute *secvar_attrs[] = { static const struct attribute_group secvar_attr_group = { .attrs = secvar_attrs, - .bin_attrs = secvar_bin_attrs, + .bin_attrs_new = secvar_bin_attrs, }; __ATTRIBUTE_GROUPS(secvar_attr); @@ -130,7 +130,7 @@ static const struct kobj_type secvar_ktype = { .default_groups = secvar_attr_groups, }; -static int update_kobj_size(void) +static __init int update_kobj_size(void) { u64 varsize; @@ -145,7 +145,7 @@ static int update_kobj_size(void) return 0; } -static int secvar_sysfs_config(struct kobject *kobj) +static __init int secvar_sysfs_config(struct kobject *kobj) { struct attribute_group config_group = { .name = "config", @@ -158,7 +158,7 @@ static int secvar_sysfs_config(struct kobject *kobj) return 0; } -static int add_var(const char *name) +static __init int add_var(const char *name) { struct kobject *kobj; int rc; @@ -181,7 +181,7 @@ static int add_var(const char *name) return 0; } -static int secvar_sysfs_load(void) +static __init int secvar_sysfs_load(void) { u64 namesize = 0; char *name; @@ -209,7 +209,7 @@ static int secvar_sysfs_load(void) return rc; } -static int secvar_sysfs_load_static(void) +static __init int secvar_sysfs_load_static(void) { const char * const *name_ptr = secvar_ops->var_names; int rc; @@ -224,7 +224,7 @@ static int secvar_sysfs_load_static(void) return 0; } -static int secvar_sysfs_init(void) +static __init int secvar_sysfs_init(void) { u64 max_size; int rc; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index a08b0ede4e64..68d47c53876c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -957,8 +957,6 @@ void __init setup_arch(char **cmdline_p) /* Parse memory topology */ mem_topology_setup(); - /* Set max_mapnr before paging_init() */ - set_max_mapnr(max_pfn); high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2f776f137a89..6dca92d5a6e8 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -115,10 +115,8 @@ static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long a { struct module *mod = NULL; - preempt_disable(); - mod = __module_text_address(ip); - preempt_enable(); - + scoped_guard(rcu) + mod = __module_text_address(ip); if (!mod) pr_err("No module loaded at addr=%lx\n", ip); diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index ac35015f04c6..5c6e545d1708 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -120,10 +120,8 @@ static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) { struct module *mod; - preempt_disable(); - mod = __module_text_address(rec->ip); - preempt_enable(); - + scoped_guard(rcu) + mod = __module_text_address(rec->ip); if (!mod) pr_err("No module loaded at addr=%lx\n", rec->ip); diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 58a930a47422..00e9c267b912 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -58,38 +58,20 @@ void machine_kexec(struct kimage *image) } #ifdef CONFIG_CRASH_RESERVE -void __init reserve_crashkernel(void) -{ - unsigned long long crash_size, crash_base, total_mem_sz; - int ret; - total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); - /* use common parsing */ - ret = parse_crashkernel(boot_command_line, total_mem_sz, - &crash_size, &crash_base, NULL, NULL); - if (ret == 0 && crash_size > 0) { - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - } - - if (crashk_res.end == crashk_res.start) { - crashk_res.start = crashk_res.end = 0; - return; - } - - /* We might have got these values via the command line or the - * device tree, either way sanitise them now. */ - - crash_size = resource_size(&crashk_res); +static unsigned long long __init get_crash_base(unsigned long long crash_base) +{ #ifndef CONFIG_NONSTATIC_KERNEL - if (crashk_res.start != KDUMP_KERNELBASE) + if (crash_base != KDUMP_KERNELBASE) printk("Crash kernel location must be 0x%x\n", KDUMP_KERNELBASE); - crashk_res.start = KDUMP_KERNELBASE; + return KDUMP_KERNELBASE; #else - if (!crashk_res.start) { + unsigned long long crash_base_align; + + if (!crash_base) { #ifdef CONFIG_PPC64 /* * On the LPAR platform place the crash kernel to mid of @@ -101,53 +83,51 @@ void __init reserve_crashkernel(void) * kernel starts at 128MB offset on other platforms. */ if (firmware_has_feature(FW_FEATURE_LPAR)) - crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_512M); + crash_base = min_t(u64, ppc64_rma_size / 2, SZ_512M); else - crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_128M); + crash_base = min_t(u64, ppc64_rma_size / 2, SZ_128M); #else - crashk_res.start = KDUMP_KERNELBASE; + crash_base = KDUMP_KERNELBASE; #endif } - crash_base = PAGE_ALIGN(crashk_res.start); - if (crash_base != crashk_res.start) { - printk("Crash kernel base must be aligned to 0x%lx\n", - PAGE_SIZE); - crashk_res.start = crash_base; - } + crash_base_align = PAGE_ALIGN(crash_base); + if (crash_base != crash_base_align) + pr_warn("Crash kernel base must be aligned to 0x%lx\n", PAGE_SIZE); + return crash_base_align; #endif - crash_size = PAGE_ALIGN(crash_size); - crashk_res.end = crashk_res.start + crash_size - 1; +} - /* The crash region must not overlap the current kernel */ - if (overlaps_crashkernel(__pa(_stext), _end - _stext)) { - printk(KERN_WARNING - "Crash kernel can not overlap current kernel\n"); - crashk_res.start = crashk_res.end = 0; +void __init arch_reserve_crashkernel(void) +{ + unsigned long long crash_size, crash_base, crash_end; + unsigned long long kernel_start, kernel_size; + unsigned long long total_mem_sz; + int ret; + + total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); + + /* use common parsing */ + ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size, + &crash_base, NULL, NULL); + + if (ret) return; - } - /* Crash kernel trumps memory limit */ - if (memory_limit && memory_limit <= crashk_res.end) { - memory_limit = crashk_res.end + 1; - total_mem_sz = memory_limit; - printk("Adjusted memory limit for crashkernel, now 0x%llx\n", - memory_limit); - } + crash_base = get_crash_base(crash_base); + crash_end = crash_base + crash_size - 1; - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crashk_res.start >> 20), - (unsigned long)(total_mem_sz >> 20)); + kernel_start = __pa(_stext); + kernel_size = _end - _stext; - if (!memblock_is_region_memory(crashk_res.start, crash_size) || - memblock_reserve(crashk_res.start, crash_size)) { - pr_err("Failed to reserve memory for crashkernel!\n"); - crashk_res.start = crashk_res.end = 0; + /* The crash region must not overlap the current kernel */ + if ((kernel_start + kernel_size > crash_base) && (kernel_start <= crash_end)) { + pr_warn("Crash kernel can not overlap current kernel\n"); return; } + + reserve_crashkernel_generic(crash_size, crash_base, 0, false); } int __init overlaps_crashkernel(unsigned long start, unsigned long size) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index dc65c1391157..e7ef8b2a2554 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -49,201 +49,18 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { NULL }; -/** - * __locate_mem_hole_top_down - Looks top down for a large enough memory hole - * in the memory regions between buf_min & buf_max - * for the buffer. If found, sets kbuf->mem. - * @kbuf: Buffer contents and memory parameters. - * @buf_min: Minimum address for the buffer. - * @buf_max: Maximum address for the buffer. - * - * Returns 0 on success, negative errno on error. - */ -static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, - u64 buf_min, u64 buf_max) -{ - int ret = -EADDRNOTAVAIL; - phys_addr_t start, end; - u64 i; - - for_each_mem_range_rev(i, &start, &end) { - /* - * memblock uses [start, end) convention while it is - * [start, end] here. Fix the off-by-one to have the - * same convention. - */ - end -= 1; - - if (start > buf_max) - continue; - - /* Memory hole not found */ - if (end < buf_min) - break; - - /* Adjust memory region based on the given range */ - if (start < buf_min) - start = buf_min; - if (end > buf_max) - end = buf_max; - - start = ALIGN(start, kbuf->buf_align); - if (start < end && (end - start + 1) >= kbuf->memsz) { - /* Suitable memory range found. Set kbuf->mem */ - kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, - kbuf->buf_align); - ret = 0; - break; - } - } - - return ret; -} - -/** - * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a - * suitable buffer with top down approach. - * @kbuf: Buffer contents and memory parameters. - * @buf_min: Minimum address for the buffer. - * @buf_max: Maximum address for the buffer. - * @emem: Exclude memory ranges. - * - * Returns 0 on success, negative errno on error. - */ -static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, - u64 buf_min, u64 buf_max, - const struct crash_mem *emem) +int arch_check_excluded_range(struct kimage *image, unsigned long start, + unsigned long end) { - int i, ret = 0, err = -EADDRNOTAVAIL; - u64 start, end, tmin, tmax; - - tmax = buf_max; - for (i = (emem->nr_ranges - 1); i >= 0; i--) { - start = emem->ranges[i].start; - end = emem->ranges[i].end; - - if (start > tmax) - continue; - - if (end < tmax) { - tmin = (end < buf_min ? buf_min : end + 1); - ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); - if (!ret) - return 0; - } - - tmax = start - 1; - - if (tmax < buf_min) { - ret = err; - break; - } - ret = 0; - } - - if (!ret) { - tmin = buf_min; - ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); - } - return ret; -} - -/** - * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole - * in the memory regions between buf_min & buf_max - * for the buffer. If found, sets kbuf->mem. - * @kbuf: Buffer contents and memory parameters. - * @buf_min: Minimum address for the buffer. - * @buf_max: Maximum address for the buffer. - * - * Returns 0 on success, negative errno on error. - */ -static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, - u64 buf_min, u64 buf_max) -{ - int ret = -EADDRNOTAVAIL; - phys_addr_t start, end; - u64 i; - - for_each_mem_range(i, &start, &end) { - /* - * memblock uses [start, end) convention while it is - * [start, end] here. Fix the off-by-one to have the - * same convention. - */ - end -= 1; - - if (end < buf_min) - continue; - - /* Memory hole not found */ - if (start > buf_max) - break; - - /* Adjust memory region based on the given range */ - if (start < buf_min) - start = buf_min; - if (end > buf_max) - end = buf_max; - - start = ALIGN(start, kbuf->buf_align); - if (start < end && (end - start + 1) >= kbuf->memsz) { - /* Suitable memory range found. Set kbuf->mem */ - kbuf->mem = start; - ret = 0; - break; - } - } - - return ret; -} - -/** - * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a - * suitable buffer with bottom up approach. - * @kbuf: Buffer contents and memory parameters. - * @buf_min: Minimum address for the buffer. - * @buf_max: Maximum address for the buffer. - * @emem: Exclude memory ranges. - * - * Returns 0 on success, negative errno on error. - */ -static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, - u64 buf_min, u64 buf_max, - const struct crash_mem *emem) -{ - int i, ret = 0, err = -EADDRNOTAVAIL; - u64 start, end, tmin, tmax; - - tmin = buf_min; - for (i = 0; i < emem->nr_ranges; i++) { - start = emem->ranges[i].start; - end = emem->ranges[i].end; - - if (end < tmin) - continue; - - if (start > tmin) { - tmax = (start > buf_max ? buf_max : start - 1); - ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); - if (!ret) - return 0; - } - - tmin = end + 1; + struct crash_mem *emem; + int i; - if (tmin > buf_max) { - ret = err; - break; - } - ret = 0; - } + emem = image->arch.exclude_ranges; + for (i = 0; i < emem->nr_ranges; i++) + if (start < emem->ranges[i].end && end > emem->ranges[i].start) + return 1; - if (!ret) { - tmax = buf_max; - ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); - } - return ret; + return 0; } #ifdef CONFIG_CRASH_DUMP @@ -1005,64 +822,6 @@ out: } /** - * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal, - * tce-table, reserved-ranges & such (exclude - * memory ranges) as they can't be used for kexec - * segment buffer. Sets kbuf->mem when a suitable - * memory hole is found. - * @kbuf: Buffer contents and memory parameters. - * - * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align. - * - * Returns 0 on success, negative errno on error. - */ -int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) -{ - struct crash_mem **emem; - u64 buf_min, buf_max; - int ret; - - /* Look up the exclude ranges list while locating the memory hole */ - emem = &(kbuf->image->arch.exclude_ranges); - if (!(*emem) || ((*emem)->nr_ranges == 0)) { - pr_warn("No exclude range list. Using the default locate mem hole method\n"); - return kexec_locate_mem_hole(kbuf); - } - - buf_min = kbuf->buf_min; - buf_max = kbuf->buf_max; - /* Segments for kdump kernel should be within crashkernel region */ - if (IS_ENABLED(CONFIG_CRASH_DUMP) && kbuf->image->type == KEXEC_TYPE_CRASH) { - buf_min = (buf_min < crashk_res.start ? - crashk_res.start : buf_min); - buf_max = (buf_max > crashk_res.end ? - crashk_res.end : buf_max); - } - - if (buf_min > buf_max) { - pr_err("Invalid buffer min and/or max values\n"); - return -EINVAL; - } - - if (kbuf->top_down) - ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max, - *emem); - else - ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max, - *emem); - - /* Add the buffer allocated to the exclude list for the next lookup */ - if (!ret) { - add_mem_range(emem, kbuf->mem, kbuf->memsz); - sort_memory_ranges(*emem, true); - } else { - pr_err("Failed to locate memory buffer of size %lu\n", - kbuf->memsz); - } - return ret; -} - -/** * arch_kexec_kernel_image_probe - Does additional handling needed to setup * kexec segments. * @image: kexec image being loaded. diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 0fe2f085c05a..8c1582b2987d 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -15,5 +15,5 @@ obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o -obj-$(CONFIG_PTDUMP_CORE) += ptdump/ +obj-$(CONFIG_PTDUMP) += ptdump/ obj-$(CONFIG_KASAN) += kasan/ diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 430d1d935a7c..e9e2dd70c060 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -27,8 +27,6 @@ #include <asm/ppc-opcode.h> #include <asm/feature-fixups.h> -#include <misc/cxl-base.h> - #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) #else @@ -217,11 +215,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) static inline void tlbie(unsigned long vpn, int psize, int apsize, int ssize, int local) { - unsigned int use_local; + unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); - if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) @@ -789,10 +785,6 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long psize = batch->psize; int ssize = batch->ssize; int i; - unsigned int use_local; - - use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && - mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use(); local_irq_save(flags); @@ -827,7 +819,8 @@ static void native_flush_hash_range(unsigned long number, int local) } pte_iterate_hashed_end(); } - if (use_local) { + if (mmu_has_feature(MMU_FTR_TLBIEL) && + mmu_psize_defs[psize].tlbiel && local) { asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { vpn = batch->vpn[i]; diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 734610052cf4..5158aefe4873 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -56,7 +56,7 @@ #include <asm/cacheflush.h> #include <asm/cputable.h> #include <asm/sections.h> -#include <asm/copro.h> +#include <asm/spu.h> #include <asm/udbg.h> #include <asm/text-patching.h> #include <asm/fadump.h> @@ -1600,7 +1600,9 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); - copro_flush_all_slbs(mm); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { copy_mm_to_paca(mm); @@ -1869,7 +1871,9 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; - copro_flush_all_slbs(mm); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif } } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index c0c45d033cba..8f7d41ce2ca1 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -10,7 +10,6 @@ #include <linux/pkeys.h> #include <linux/debugfs.h> #include <linux/proc_fs.h> -#include <misc/cxl-base.h> #include <asm/pgalloc.h> #include <asm/tlb.h> diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index bc9a39821d1c..28bec5bc7879 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -22,7 +22,7 @@ #include <linux/security.h> #include <asm/mman.h> #include <asm/mmu.h> -#include <asm/copro.h> +#include <asm/spu.h> #include <asm/hugetlb.h> #include <asm/mmu_context.h> @@ -248,7 +248,9 @@ static void slice_convert(struct mm_struct *mm, spin_unlock_irqrestore(&slice_convert_lock, flags); - copro_flush_all_slbs(mm); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif } /* diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index f49fd873df8d..f5f8692e2c69 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -12,8 +12,6 @@ #include <linux/export.h> #include <asm/reg.h> #include <asm/copro.h> -#include <asm/spu.h> -#include <misc/cxl-base.h> /* * This ought to be kept in sync with the powerpc specific do_page_fault @@ -135,13 +133,4 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) return 0; } EXPORT_SYMBOL_GPL(copro_calculate_slb); - -void copro_flush_all_slbs(struct mm_struct *mm) -{ -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif - cxl_slbia(mm); -} -EXPORT_SYMBOL_GPL(copro_flush_all_slbs); #endif diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 6b043180220a..d3c1b749dcfc 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -113,6 +113,7 @@ static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) gpage_freearray[nr_gpages] = 0; list_add(&m->list, &huge_boot_pages[0]); m->hstate = hstate; + m->flags = 0; return 1; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index d96bbc001e73..b6f3ae03ca9e 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -41,6 +41,7 @@ #include <linux/libfdt.h> #include <linux/memremap.h> #include <linux/memory.h> +#include <linux/bootmem_info.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -386,10 +387,13 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, } #endif + +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE void register_page_bootmem_memmap(unsigned long section_nr, struct page *start_page, unsigned long size) { } +#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 70b08bf3dd1f..4b4feba9873b 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -34,9 +34,9 @@ void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size) return __ioremap_caller(addr, size, prot, caller); } -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags) +void __iomem *ioremap_prot(phys_addr_t addr, size_t size, pgprot_t prot) { - pte_t pte = __pte(flags); + pte_t pte = __pte(pgprot_val(prot)); void *caller = __builtin_return_address(0); /* writeable implies dirty for kernel addresses */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 34806c858e54..3ddbfdbfa941 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -273,7 +273,7 @@ void __init paging_init(void) mark_nonram_nosave(); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { /* * book3s is limited to 16 page sizes due to encoding this in @@ -295,22 +295,6 @@ void __init mem_init(void) kasan_late_init(); - memblock_free_all(); - -#ifdef CONFIG_HIGHMEM - { - unsigned long pfn, highmem_mapnr; - - highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; - for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { - phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; - struct page *page = pfn_to_page(pfn); - if (memblock_is_memory(paddr) && !memblock_is_reserved(paddr)) - free_highmem_page(page); - } - } -#endif /* CONFIG_HIGHMEM */ - #if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP) /* * If smp is enabled, next_tlbcam_idx is initialized in the cpu up @@ -354,7 +338,7 @@ static int __init add_system_ram_resources(void) */ res->end = end - 1; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - WARN_ON(request_resource(&iomem_resource, res) < 0); + WARN_ON(insert_resource(&iomem_resource, res) < 0); } } diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index d400fa391c27..b0768f3d2893 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -998,7 +998,7 @@ e_out: } static ssize_t catalog_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) { long hret; @@ -1108,14 +1108,14 @@ PAGE_0_ATTR(catalog_version, "%lld\n", (unsigned long long)be64_to_cpu(page_0->version)); PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); -static BIN_ATTR_RO(catalog, 0/* real length varies */); +static const BIN_ATTR_RO(catalog, 0/* real length varies */); static DEVICE_ATTR_RO(domains); static DEVICE_ATTR_RO(sockets); static DEVICE_ATTR_RO(chipspersocket); static DEVICE_ATTR_RO(coresperchip); static DEVICE_ATTR_RO(cpumask); -static struct bin_attribute *if_bin_attrs[] = { +static const struct bin_attribute *const if_bin_attrs[] = { &bin_attr_catalog, NULL, }; @@ -1141,7 +1141,7 @@ static struct attribute *if_attrs[] = { static const struct attribute_group if_group = { .name = "interface", - .bin_attrs = if_bin_attrs, + .bin_attrs_new = if_bin_attrs, .attrs = if_attrs, }; diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c index 827d338deaf4..2c2999de6bfa 100644 --- a/arch/powerpc/platforms/cell/spufs/gang.c +++ b/arch/powerpc/platforms/cell/spufs/gang.c @@ -25,6 +25,7 @@ struct spu_gang *alloc_spu_gang(void) mutex_init(&gang->aff_mutex); INIT_LIST_HEAD(&gang->list); INIT_LIST_HEAD(&gang->aff_list_head); + gang->alive = 1; out: return gang; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 70236d1df3d3..9f9e4b871627 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -192,13 +192,32 @@ static int spufs_fill_dir(struct dentry *dir, return -ENOMEM; ret = spufs_new_file(dir->d_sb, dentry, files->ops, files->mode & mode, files->size, ctx); - if (ret) + if (ret) { + dput(dentry); return ret; + } files++; } return 0; } +static void unuse_gang(struct dentry *dir) +{ + struct inode *inode = dir->d_inode; + struct spu_gang *gang = SPUFS_I(inode)->i_gang; + + if (gang) { + bool dead; + + inode_lock(inode); // exclusion with spufs_create_context() + dead = !--gang->alive; + inode_unlock(inode); + + if (dead) + simple_recursive_removal(dir, NULL); + } +} + static int spufs_dir_close(struct inode *inode, struct file *file) { struct inode *parent; @@ -213,6 +232,7 @@ static int spufs_dir_close(struct inode *inode, struct file *file) inode_unlock(parent); WARN_ON(ret); + unuse_gang(dir->d_parent); return dcache_dir_close(inode, file); } @@ -405,7 +425,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, { int ret; int affinity; - struct spu_gang *gang; + struct spu_gang *gang = SPUFS_I(inode)->i_gang; struct spu_context *neighbor; struct path path = {.mnt = mnt, .dentry = dentry}; @@ -420,11 +440,15 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) return -ENODEV; - gang = NULL; + if (gang) { + if (!gang->alive) + return -ENOENT; + gang->alive++; + } + neighbor = NULL; affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); if (affinity) { - gang = SPUFS_I(inode)->i_gang; if (!gang) return -EINVAL; mutex_lock(&gang->aff_mutex); @@ -436,8 +460,11 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, } ret = spufs_mkdir(inode, dentry, flags, mode & 0777); - if (ret) + if (ret) { + if (neighbor) + put_spu_context(neighbor); goto out_aff_unlock; + } if (affinity) { spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx, @@ -453,6 +480,8 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, out_aff_unlock: if (affinity) mutex_unlock(&gang->aff_mutex); + if (ret && gang) + gang->alive--; // can't reach 0 return ret; } @@ -482,6 +511,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_fop = &simple_dir_operations; d_instantiate(dentry, inode); + dget(dentry); inc_nlink(dir); inc_nlink(d_inode(dentry)); return ret; @@ -492,6 +522,21 @@ out: return ret; } +static int spufs_gang_close(struct inode *inode, struct file *file) +{ + unuse_gang(file->f_path.dentry); + return dcache_dir_close(inode, file); +} + +static const struct file_operations spufs_gang_fops = { + .open = dcache_dir_open, + .release = spufs_gang_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .iterate_shared = dcache_readdir, + .fsync = noop_fsync, +}; + static int spufs_gang_open(const struct path *path) { int ret; @@ -511,7 +556,7 @@ static int spufs_gang_open(const struct path *path) return PTR_ERR(filp); } - filp->f_op = &simple_dir_operations; + filp->f_op = &spufs_gang_fops; fd_install(ret, filp); return ret; } @@ -526,10 +571,8 @@ static int spufs_create_gang(struct inode *inode, ret = spufs_mkgang(inode, dentry, mode & 0777); if (!ret) { ret = spufs_gang_open(&path); - if (ret < 0) { - int err = simple_rmdir(inode, dentry); - WARN_ON(err); - } + if (ret < 0) + unuse_gang(dentry); } return ret; } diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 84958487f696..d33787c57c39 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -151,6 +151,8 @@ struct spu_gang { int aff_flags; struct spu *aff_ref_spu; atomic_t aff_sched_count; + + int alive; }; /* Flag bits for spu_gang aff_flags */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 19f0fc5c6f1b..9e5d0c847ee2 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o obj-$(CONFIG_OPAL_CORE) += opal-core.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o obj-$(CONFIG_PCI_IOV) += pci-sriov.o -obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index a379ff86c120..e652da8f986f 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -159,7 +159,7 @@ static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf, * Returns number of bytes read on success, -errno on failure. */ static ssize_t read_opalcore(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { struct opalcore *m; @@ -206,9 +206,9 @@ static ssize_t read_opalcore(struct file *file, struct kobject *kobj, return (tpos - pos); } -static struct bin_attribute opal_core_attr = { +static struct bin_attribute opal_core_attr __ro_after_init = { .attr = {.name = "core", .mode = 0400}, - .read = read_opalcore + .read_new = read_opalcore }; /* @@ -599,7 +599,7 @@ static struct attribute *mpipl_attr[] = { NULL, }; -static struct bin_attribute *mpipl_bin_attr[] = { +static const struct bin_attribute *const mpipl_bin_attr[] = { &opal_core_attr, NULL, @@ -607,7 +607,7 @@ static struct bin_attribute *mpipl_bin_attr[] = { static const struct attribute_group mpipl_group = { .attrs = mpipl_attr, - .bin_attrs = mpipl_bin_attr, + .bin_attrs_new = mpipl_bin_attr, }; static int __init opalcore_init(void) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 608e4b68c5ea..27e25693cf39 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -286,7 +286,7 @@ out: } static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { ssize_t rc; @@ -342,7 +342,7 @@ static void create_dump_obj(uint32_t id, size_t size, uint32_t type) dump->dump_attr.attr.name = "dump"; dump->dump_attr.attr.mode = 0400; dump->dump_attr.size = size; - dump->dump_attr.read = dump_attr_read; + dump->dump_attr.read_new = dump_attr_read; dump->id = id; dump->size = size; diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 5db1e733143b..de33f354e9fd 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -156,7 +156,7 @@ static const struct kobj_type elog_ktype = { #define OPAL_MAX_ERRLOG_SIZE 16384 static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { int opal_rc; @@ -203,7 +203,7 @@ static void create_elog_obj(uint64_t id, size_t size, uint64_t type) elog->raw_attr.attr.name = "raw"; elog->raw_attr.attr.mode = 0400; elog->raw_attr.size = size; - elog->raw_attr.read = raw_attr_read; + elog->raw_attr.read_new = raw_attr_read; elog->id = id; elog->size = size; diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index d5ea04e8e4c5..fd8c8621e973 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -432,7 +432,7 @@ static int alloc_image_buf(char *buffer, size_t count) * and pre-allocate required memory. */ static ssize_t image_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { int rc; @@ -493,7 +493,7 @@ out: static const struct bin_attribute image_data_attr = { .attr = {.name = "image", .mode = 0200}, .size = MAX_IMAGE_SIZE, /* Limit image size */ - .write = image_data_write, + .write_new = image_data_write, }; static struct kobj_attribute validate_attribute = diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 22d6efe17b0d..f1988d0ab45c 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -94,15 +94,15 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) } static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { return opal_msglog_copy(to, pos, count); } -static struct bin_attribute opal_msglog_attr = { +static struct bin_attribute opal_msglog_attr __ro_after_init = { .attr = {.name = "msglog", .mode = 0400}, - .read = opal_msglog_read + .read_new = opal_msglog_read }; struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name) diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c deleted file mode 100644 index 7e419de71db8..000000000000 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2014-2016 IBM Corp. - */ - -#include <linux/module.h> -#include <misc/cxl-base.h> -#include <asm/pnv-pci.h> -#include <asm/opal.h> - -#include "pci.h" - -int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct pnv_ioda_pe *pe; - int rc; - - pe = pnv_ioda_get_pe(dev); - if (!pe) - return -ENODEV; - - pe_info(pe, "Switching PHB to CXL\n"); - - rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); - if (rc == OPAL_UNSUPPORTED) - dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n"); - else if (rc) - dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); - - return rc; -} -EXPORT_SYMBOL(pnv_phb_to_cxl_mode); - -/* Find PHB for cxl dev and allocate MSI hwirqs? - * Returns the absolute hardware IRQ number - */ -int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); - - if (hwirq < 0) { - dev_warn(&dev->dev, "Failed to find a free MSI\n"); - return -ENOSPC; - } - - return phb->msi_base + hwirq; -} -EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); - -void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); -} -EXPORT_SYMBOL(pnv_cxl_release_hwirqs); - -void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int i, hwirq; - - for (i = 1; i < CXL_IRQ_RANGES; i++) { - if (!irqs->range[i]) - continue; - pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", - i, irqs->offset[i], - irqs->range[i]); - hwirq = irqs->offset[i] - phb->msi_base; - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, - irqs->range[i]); - } -} -EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); - -int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int i, hwirq, try; - - memset(irqs, 0, sizeof(struct cxl_irq_ranges)); - - /* 0 is reserved for the multiplexed PSL DSI interrupt */ - for (i = 1; i < CXL_IRQ_RANGES && num; i++) { - try = num; - while (try) { - hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); - if (hwirq >= 0) - break; - try /= 2; - } - if (!try) - goto fail; - - irqs->offset[i] = phb->msi_base + hwirq; - irqs->range[i] = try; - pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", - i, irqs->offset[i], irqs->range[i]); - num -= try; - } - if (num) - goto fail; - - return 0; -fail: - pnv_cxl_release_hwirq_ranges(irqs, dev); - return -ENOSPC; -} -EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); - -int pnv_cxl_get_irq_count(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - return phb->msi_bmp.irq_count; -} -EXPORT_SYMBOL(pnv_cxl_get_irq_count); - -int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, - unsigned int virq) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - unsigned int xive_num = hwirq - phb->msi_base; - struct pnv_ioda_pe *pe; - int rc; - - if (!(pe = pnv_ioda_get_pe(dev))) - return -ENODEV; - - /* Assign XIVE to PE */ - rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); - if (rc) { - pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " - "hwirq 0x%x XIVE 0x%x PE\n", - pci_name(dev), rc, phb->msi_base, hwirq, xive_num); - return -EIO; - } - pnv_set_msi_irq_chip(phb, virq); - - return 0; -} -EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b0a14e48175c..d2a8e0287811 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -39,8 +39,6 @@ #include <asm/mmzone.h> #include <asm/xive.h> -#include <misc/cxl-base.h> - #include "powernv.h" #include "pci.h" #include "../../../../drivers/pci/pci.h" @@ -1636,47 +1634,6 @@ int64_t pnv_opal_pci_msi_eoi(struct irq_data *d) return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq); } -/* - * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers - */ -static void pnv_ioda2_msi_eoi(struct irq_data *d) -{ - int64_t rc; - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct pci_controller *hose = irq_data_get_irq_chip_data(d); - struct pnv_phb *phb = hose->private_data; - - rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); - WARN_ON_ONCE(rc); - - icp_native_eoi(d); -} - -/* P8/CXL only */ -void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) -{ - struct irq_data *idata; - struct irq_chip *ichip; - - /* The MSI EOI OPAL call is only needed on PHB3 */ - if (phb->model != PNV_PHB_MODEL_PHB3) - return; - - if (!phb->ioda.irq_chip_init) { - /* - * First time we setup an MSI IRQ, we need to setup the - * corresponding IRQ chip to route correctly. - */ - idata = irq_get_irq_data(virq); - ichip = irq_data_get_irq_chip(idata); - phb->ioda.irq_chip_init = 1; - phb->ioda.irq_chip = *ichip; - phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; - } - irq_set_chip(virq, &phb->ioda.irq_chip); - irq_set_chip_data(virq, phb->hose); -} - static struct irq_chip pnv_pci_msi_irq_chip; /* diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 35f566aa0424..b2c1da025410 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -14,7 +14,6 @@ #include <linux/io.h> #include <linux/msi.h> #include <linux/iommu.h> -#include <linux/sched/mm.h> #include <asm/sections.h> #include <asm/io.h> @@ -33,8 +32,6 @@ #include "powernv.h" #include "pci.h" -static DEFINE_MUTEX(tunnel_mutex); - int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) { struct device_node *node = np; @@ -744,64 +741,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid) return tbl; } -struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - - return of_node_get(hose->dn); -} -EXPORT_SYMBOL(pnv_pci_get_phb_node); - -int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) -{ - struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); - u64 tunnel_bar; - __be64 val; - int rc; - - if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) - return -ENXIO; - if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) - return -ENXIO; - - mutex_lock(&tunnel_mutex); - rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); - if (rc != OPAL_SUCCESS) { - rc = -EIO; - goto out; - } - tunnel_bar = be64_to_cpu(val); - if (enable) { - /* - * Only one device per PHB can use atomics. - * Our policy is first-come, first-served. - */ - if (tunnel_bar) { - if (tunnel_bar != addr) - rc = -EBUSY; - else - rc = 0; /* Setting same address twice is ok */ - goto out; - } - } else { - /* - * The device that owns atomics and wants to release - * them must pass the same address with enable == 0. - */ - if (tunnel_bar != addr) { - rc = -EPERM; - goto out; - } - addr = 0x0ULL; - } - rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr); - rc = opal_error_code(rc); -out: - mutex_unlock(&tunnel_mutex); - return rc; -} -EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar); - void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 93fba1f8661f..42075501663b 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -163,7 +163,6 @@ struct pnv_phb { unsigned int *io_segmap; /* IRQ chip */ - int irq_chip_init; struct irq_chip irq_chip; /* Sorted list of used PE's based @@ -281,7 +280,6 @@ extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); -extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels); extern int pnv_eeh_post_init(void); diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index 67c8c4b2d8b1..157d9a8134e4 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -32,15 +32,15 @@ int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname, static struct memcons *uv_memcons; static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { return memcons_copy(uv_memcons, to, pos, count); } -static struct bin_attribute uv_msglog_attr = { +static struct bin_attribute uv_msglog_attr __ro_after_init = { .attr = {.name = "msglog", .mode = 0400}, - .read = uv_msglog_read + .read_new = uv_msglog_read }; static int __init uv_init(void) diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 4a2520ec6d7f..61b37c9400b2 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -190,10 +190,10 @@ static void spu_unmap(struct spu *spu) static int __init setup_areas(struct spu *spu) { struct table {char* name; unsigned long addr; unsigned long size;}; - unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO)); spu_pdata(spu)->shadow = ioremap_prot(spu_pdata(spu)->shadow_addr, - sizeof(struct spe_shadow), shadow_flags); + sizeof(struct spe_shadow), + pgprot_noncached_wc(PAGE_KERNEL_RO)); if (!spu_pdata(spu)->shadow) { pr_debug("%s:%d: ioremap shadow failed\n", __func__, __LINE__); goto fail_ioremap; diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index 2c585f7a0b6e..126fb738fc44 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += kernel/ mm/ net/ -obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ obj-$(CONFIG_CRYPTO) += crypto/ obj-y += errata/ obj-$(CONFIG_KVM) += kvm/ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 10116f68569d..1fd197afd2f7 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -45,6 +45,7 @@ config RISCV select ARCH_HAS_PMEM_API select ARCH_HAS_PREEMPT_LAZY select ARCH_HAS_PREPARE_SYNC_CORE_CMD + select ARCH_HAS_PTDUMP if MMU select ARCH_HAS_PTE_DEVMAP if 64BIT && MMU select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU @@ -64,6 +65,7 @@ config RISCV select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU + select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 @@ -115,7 +117,6 @@ config RISCV select GENERIC_LIB_DEVMEM_IS_ALLOWED select GENERIC_PENDING_IRQ if SMP select GENERIC_PCI_IOMAP - select GENERIC_PTDUMP if MMU select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT @@ -153,7 +154,7 @@ config RISCV select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU @@ -205,6 +206,7 @@ config RISCV select PCI_DOMAINS_GENERIC if PCI select PCI_ECAM if (ACPI && PCI) select PCI_MSI if PCI + select RELOCATABLE if !MMU && !PHYS_RAM_BASE_FIXED select RISCV_ALTERNATIVE if !XIP_KERNEL select RISCV_APLIC select RISCV_IMSIC @@ -292,13 +294,6 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. -config PAGE_OFFSET - hex - default 0x80000000 if !MMU && RISCV_M_MODE - default 0x80200000 if !MMU - default 0xc0000000 if 32BIT - default 0xff60000000000000 if 64BIT - config KASAN_SHADOW_OFFSET hex depends on KASAN_GENERIC @@ -570,7 +565,8 @@ config RISCV_ISA_C help Adds "C" to the ISA subsets that the toolchain is allowed to emit when building Linux, which results in compressed instructions in the - Linux binary. + Linux binary. This option produces a kernel that will not run on + systems that do not support compressed instructions. If you don't know what to do here, say Y. @@ -591,8 +587,8 @@ config RISCV_ISA_SVNAPOT depends on RISCV_ALTERNATIVE default y help - Allow kernel to detect the Svnapot ISA-extension dynamically at boot - time and enable its usage. + Enable support for the Svnapot ISA-extension when it is detected + at boot. The Svnapot extension is used to mark contiguous PTEs as a range of contiguous virtual-to-physical translations for a naturally @@ -610,9 +606,8 @@ config RISCV_ISA_SVPBMT depends on RISCV_ALTERNATIVE default y help - Adds support to dynamically detect the presence of the Svpbmt - ISA-extension (Supervisor-mode: page-based memory types) and - enable its usage. + Add support for the Svpbmt ISA-extension (Supervisor-mode: + page-based memory types) in the kernel when it is detected at boot. The memory type for a page contains a combination of attributes that indicate the cacheability, idempotency, and ordering @@ -631,14 +626,15 @@ config TOOLCHAIN_HAS_V depends on AS_HAS_OPTION_ARCH config RISCV_ISA_V - bool "VECTOR extension support" + bool "Vector extension support" depends on TOOLCHAIN_HAS_V depends on FPU select DYNAMIC_SIGFRAME default y help - Say N here if you want to disable all vector related procedure - in the kernel. + Add support for the Vector extension when it is detected at boot. + When this option is disabled, neither the kernel nor userspace may + use vector procedures. If you don't know what to do here, say Y. @@ -737,6 +733,14 @@ config TOOLCHAIN_HAS_VECTOR_CRYPTO def_bool $(as-instr, .option arch$(comma) +v$(comma) +zvkb) depends on AS_HAS_OPTION_ARCH +config TOOLCHAIN_HAS_ZBA + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba) + depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 + depends on AS_HAS_OPTION_ARCH + config RISCV_ISA_ZBA bool "Zba extension support for bit manipulation instructions" default y @@ -751,12 +755,12 @@ config RISCV_ISA_ZBA config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" - depends on TOOLCHAIN_HAS_ZBB depends on RISCV_ALTERNATIVE default y help - Adds support to dynamically detect the presence of the ZBB - extension (basic bit manipulation) and enable its usage. + Add support for enabling optimisations in the kernel when the + Zbb extension is detected at boot. Some optimisations may + additionally depend on toolchain support for Zbb. The Zbb extension provides instructions to accelerate a number of bit-specific operations (count bit population, sign extending, @@ -787,6 +791,28 @@ config RISCV_ISA_ZBC If you don't know what to do here, say Y. +config TOOLCHAIN_HAS_ZBKB + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbkb) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbkb) + depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZBKB + bool "Zbkb extension support for bit manipulation instructions" + depends on TOOLCHAIN_HAS_ZBKB + depends on RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the ZBKB + extension (bit manipulation for cryptography) and enable its usage. + + The Zbkb extension provides instructions to accelerate a number + of common cryptography operations (pack, zip, etc). + + If you don't know what to do here, say Y. + config RISCV_ISA_ZICBOM bool "Zicbom extension support for non-coherent DMA operation" depends on MMU @@ -795,9 +821,9 @@ config RISCV_ISA_ZICBOM select RISCV_DMA_NONCOHERENT select DMA_DIRECT_REMAP help - Adds support to dynamically detect the presence of the ZICBOM - extension (Cache Block Management Operations) and enable its - usage. + Add support for the Zicbom extension (Cache Block Management + Operations) and enable its use in the kernel when it is detected + at boot. The Zicbom extension can be used to handle for example non-coherent DMA support on devices that need it. @@ -810,7 +836,7 @@ config RISCV_ISA_ZICBOZ default y help Enable the use of the Zicboz extension (cbo.zero instruction) - when available. + in the kernel when it is detected at boot. The Zicboz extension is used for faster zeroing of memory. @@ -848,8 +874,9 @@ config FPU bool "FPU support" default y help - Say N here if you want to disable all floating-point related procedure - in the kernel. + Add support for floating point operations when an FPU is detected at + boot. When this option is disabled, neither the kernel nor userspace + may use the floating point unit. If you don't know what to do here, say Y. @@ -1079,7 +1106,7 @@ config PARAVIRT_TIME_ACCOUNTING config RELOCATABLE bool "Build a relocatable kernel" - depends on MMU && 64BIT && !XIP_KERNEL + depends on !XIP_KERNEL select MODULE_SECTIONS if MODULES help This builds a kernel as a Position Independent Executable (PIE), @@ -1273,13 +1300,14 @@ config RISCV_ISA_FALLBACK config BUILTIN_DTB bool "Built-in device tree" depends on OF && NONPORTABLE + select GENERIC_BUILTIN_DTB help Build a device tree into the Linux image. This option should be selected if no bootloader is being used. If unsure, say N. -config BUILTIN_DTB_SOURCE +config BUILTIN_DTB_NAME string "Built-in device tree source" depends on BUILTIN_DTB help diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 17606940bb52..8b503e54fa1b 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -59,7 +59,6 @@ config ARCH_THEAD config ARCH_VIRT bool "QEMU Virt Machine" - select CLINT_TIMER if RISCV_M_MODE select POWER_RESET select POWER_RESET_SYSCON select POWER_RESET_SYSCON_POWEROFF @@ -79,7 +78,6 @@ config ARCH_CANAAN config SOC_CANAAN_K210 bool "Canaan Kendryte K210 SoC" depends on !MMU && ARCH_CANAAN - select CLINT_TIMER if RISCV_M_MODE select ARCH_HAS_RESET_CONTROLLER select PINCTRL select COMMON_CLK diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 13fbc0f94238..600df90bc141 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -98,7 +98,6 @@ KBUILD_AFLAGS += -march=$(riscv-march-y) CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/') KBUILD_CFLAGS += -mno-save-restore -KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) ifeq ($(CONFIG_CMODEL_MEDLOW),y) KBUILD_CFLAGS += -mcmodel=medlow diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile index bff887d38abe..64a898da9aee 100644 --- a/arch/riscv/boot/dts/Makefile +++ b/arch/riscv/boot/dts/Makefile @@ -8,5 +8,3 @@ subdir-y += sophgo subdir-y += spacemit subdir-y += starfive subdir-y += thead - -obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_SOURCE)) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 0f7dcbe3c45b..3c8e16d71e17 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -26,7 +26,6 @@ CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_ARCH_MICROCHIP=y -CONFIG_ARCH_RENESAS=y CONFIG_ARCH_SIFIVE=y CONFIG_ARCH_SOPHGO=y CONFIG_ARCH_SPACEMIT=y @@ -202,7 +201,6 @@ CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_XHCI_PLATFORM=y -# CONFIG_USB_XHCI_RCAR is not set CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 87ff5a1233af..ee18d1e333f2 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -35,7 +35,7 @@ CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0" CONFIG_CMDLINE_FORCE=y CONFIG_BUILTIN_DTB=y -CONFIG_BUILTIN_DTB_SOURCE="canaan/k210_generic" +CONFIG_BUILTIN_DTB_NAME="canaan/k210_generic" # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set # CONFIG_GCC_PLUGINS is not set diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 95cbd574f291..e770d81b738e 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -27,7 +27,7 @@ CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro" CONFIG_CMDLINE_FORCE=y CONFIG_BUILTIN_DTB=y -CONFIG_BUILTIN_DTB_SOURCE="canaan/k210_generic" +CONFIG_BUILTIN_DTB_NAME="canaan/k210_generic" # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set # CONFIG_GCC_PLUGINS is not set diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index f0da9d7b39c3..bc6c77ba837d 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -1,5 +1,9 @@ ifdef CONFIG_RELOCATABLE -KBUILD_CFLAGS += -fno-pie +# We can't use PIC/PIE when handling early-boot errata parsing, as the kernel +# doesn't have a GOT setup at that point. So instead just use medany: it's +# usually position-independent, so it should be good enough for the errata +# handling. +KBUILD_CFLAGS += -fno-pie -mcmodel=medany endif ifdef CONFIG_RISCV_ALTERNATIVE_EARLY diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h index 613769b9cdc9..0e7cdbbec8ef 100644 --- a/arch/riscv/include/asm/arch_hweight.h +++ b/arch/riscv/include/asm/arch_hweight.h @@ -19,7 +19,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -50,7 +50,7 @@ static inline unsigned int __arch_hweight8(unsigned int w) #if BITS_PER_LONG == 64 static __always_inline unsigned long __arch_hweight64(__u64 w) { -# ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -64,7 +64,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) return w; legacy: -# endif +#endif return __sw_hweight64(w); } #else /* BITS_PER_LONG == 64 */ diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 776354895b81..a8a2af6dfe9d 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -27,6 +27,7 @@ #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) +#define SRLI __REG_SEL(srliw, srli) #if __SIZEOF_POINTER__ == 8 #ifdef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 49a0f48d93df..d59310f74c2b 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -15,7 +15,7 @@ #include <asm/barrier.h> #include <asm/bitsperlong.h> -#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) +#if !(defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)) || defined(NO_ALTERNATIVE) #include <asm-generic/bitops/__ffs.h> #include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/ffs.h> @@ -175,7 +175,7 @@ legacy: variable_fls(x_); \ }) -#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */ +#endif /* !(defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)) || defined(NO_ALTERNATIVE) */ #include <asm-generic/bitops/ffz.h> #include <asm-generic/bitops/fls64.h> diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h index 88e6f1499e88..da378856f1d5 100644 --- a/arch/riscv/include/asm/checksum.h +++ b/arch/riscv/include/asm/checksum.h @@ -49,8 +49,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * ZBB only saves three instructions on 32-bit and five on 64-bit so not * worth checking if supported without Alternatives. */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 427c41dde643..2ec119eb147b 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr, { unsigned long tmp; + u32 *__ptr32b; + ulong __s, __val, __mask; + asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 0, RISCV_ISA_EXT_ZAWRS, 1) : : : : no_zawrs); switch (size) { case 1: - fallthrough; + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); + __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; + __val = val << __s; + __mask = 0xff << __s; + + asm volatile( + " lr.w %0, %1\n" + " and %0, %0, %3\n" + " xor %0, %0, %2\n" + " bnez %0, 1f\n" + ZAWRS_WRS_NTO "\n" + "1:" + : "=&r" (tmp), "+A" (*(__ptr32b)) + : "r" (__val), "r" (__mask) + : "memory"); + break; case 2: - /* RISC-V doesn't have lr instructions on byte and half-word. */ - goto no_zawrs; + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); + __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; + __val = val << __s; + __mask = 0xffff << __s; + + asm volatile( + " lr.w %0, %1\n" + " and %0, %0, %3\n" + " xor %0, %0, %2\n" + " bnez %0, 1f\n" + ZAWRS_WRS_NTO "\n" + "1:" + : "=&r" (tmp), "+A" (*(__ptr32b)) + : "r" (__val), "r" (__mask) + : "memory"); + break; case 4: asm volatile( " lr.w %0, %1\n" diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 569140d6e639..f56b409361fb 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -56,6 +56,9 @@ void __init riscv_user_isa_enable(void); #define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ ARRAY_SIZE(_bundled_exts), NULL) +#define __RISCV_ISA_EXT_BUNDLE_VALIDATE(_name, _bundled_exts, _validate) \ + _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \ + ARRAY_SIZE(_bundled_exts), _validate) /* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ #define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ @@ -63,7 +66,7 @@ void __init riscv_user_isa_enable(void); #define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) -bool check_unaligned_access_emulated_all_cpus(void); +bool __init check_unaligned_access_emulated_all_cpus(void); #if defined(CONFIG_RISCV_SCALAR_MISALIGNED) void check_unaligned_access_emulated(struct work_struct *work __always_unused); void unaligned_emulation_finish(void); @@ -76,7 +79,7 @@ static inline bool unaligned_ctl_available(void) } #endif -bool check_vector_unaligned_access_emulated_all_cpus(void); +bool __init check_vector_unaligned_access_emulated_all_cpus(void); #if defined(CONFIG_RISCV_VECTOR_MISALIGNED) void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused); DECLARE_PER_CPU(long, vector_misaligned_access); diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index c4721ce44ca4..d627f63ee289 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -79,7 +79,6 @@ struct dyn_arch_ftrace { #define AUIPC_RA (0x00000097) #define JALR_T0 (0x000282e7) #define AUIPC_T0 (0x00000297) -#define NOP4 (0x00000013) #define to_jalr_t0(offset) \ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0) @@ -92,7 +91,7 @@ struct dyn_arch_ftrace { #define make_call_t0(caller, callee, call) \ do { \ unsigned int offset = \ - (unsigned long) callee - (unsigned long) caller; \ + (unsigned long) (callee) - (unsigned long) (caller); \ call[0] = to_auipc_t0(offset); \ call[1] = to_jalr_t0(offset); \ } while (0) @@ -108,7 +107,7 @@ do { \ #define make_call_ra(caller, callee, call) \ do { \ unsigned int offset = \ - (unsigned long) callee - (unsigned long) caller; \ + (unsigned long) (callee) - (unsigned long) (caller); \ call[0] = to_auipc_ra(offset); \ call[1] = to_jalr_ra(offset); \ } while (0) @@ -207,7 +206,7 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) { struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs); - memcpy(®s->a0, afregs->args, sizeof(afregs->args)); + memcpy(®s->a_regs, afregs->args, sizeof(afregs->args)); regs->epc = afregs->epc; regs->ra = afregs->ra; regs->sp = afregs->sp; diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 869da082252a..e3cbf203cdde 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -100,6 +100,11 @@ #define RISCV_ISA_EXT_ZICCRSE 91 #define RISCV_ISA_EXT_SVADE 92 #define RISCV_ISA_EXT_SVADU 93 +#define RISCV_ISA_EXT_ZFBFMIN 94 +#define RISCV_ISA_EXT_ZVFBFMIN 95 +#define RISCV_ISA_EXT_ZVFBFWMA 96 +#define RISCV_ISA_EXT_ZAAMO 97 +#define RISCV_ISA_EXT_ZALRSC 98 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index dd624523981c..1f690fea0e03 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 11 +#define RISCV_HWPROBE_MAX_KEY 12 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 9a913010cdd9..71060a2f838e 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -199,5 +199,8 @@ #define RISCV_PAUSE ".4byte 0x100000f" #define ZAWRS_WRS_NTO ".4byte 0x00d00073" #define ZAWRS_WRS_STO ".4byte 0x01d00073" +#define RISCV_NOP4 ".4byte 0x00000013" + +#define RISCV_INSN_NOP4 _AC(0x00000013, U) #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 0257f4aa7ff4..a0e51840b9db 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -137,7 +137,7 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #ifdef CONFIG_MMU #define arch_memremap_wb(addr, size, flags) \ - ((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL)) + ((__force void *)ioremap_prot((addr), (size), __pgprot(_PAGE_KERNEL))) #endif #endif /* _ASM_RISCV_IO_H */ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 125f5ecd9565..572a141ddecd 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -24,21 +24,22 @@ * When not using MMU this corresponds to the first free page in * physical memory (aligned on a page boundary). */ -#ifdef CONFIG_64BIT #ifdef CONFIG_MMU -#define PAGE_OFFSET kernel_map.page_offset -#else -#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) -#endif -/* - * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so - * define the PAGE_OFFSET value for SV48 and SV39. - */ +#ifdef CONFIG_64BIT +#define PAGE_OFFSET_L5 _AC(0xff60000000000000, UL) #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) #define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL) +#ifdef CONFIG_XIP_KERNEL +#define PAGE_OFFSET PAGE_OFFSET_L3 #else -#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) +#define PAGE_OFFSET kernel_map.page_offset +#endif /* CONFIG_XIP_KERNEL */ +#else +#define PAGE_OFFSET _AC(0xc0000000, UL) #endif /* CONFIG_64BIT */ +#else +#define PAGE_OFFSET ((unsigned long)phys_ram_base) +#endif /* CONFIG_MMU */ #ifndef __ASSEMBLY__ @@ -95,14 +96,9 @@ typedef struct page *pgtable_t; #define MIN_MEMBLOCK_ADDR 0 #endif -#ifdef CONFIG_MMU #define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base)) -#else -#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) -#endif /* CONFIG_MMU */ struct kernel_mapping { - unsigned long page_offset; unsigned long virt_addr; unsigned long virt_offset; uintptr_t phys_addr; @@ -116,6 +112,7 @@ struct kernel_mapping { uintptr_t xiprom; uintptr_t xiprom_sz; #else + unsigned long page_offset; unsigned long va_kernel_pa_offset; #endif }; diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 3e2aebea6312..770ce18a7328 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -15,24 +15,6 @@ #define __HAVE_ARCH_PUD_FREE #include <asm-generic/pgalloc.h> -/* - * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to - * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use - * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this - * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the - * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h - * for more details. - */ -static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) -{ - if (riscv_use_sbi_for_rfence()) { - tlb_remove_ptdesc(tlb, pt); - } else { - pagetable_dtor(pt); - tlb_remove_page_ptdesc(tlb, pt); - } -} - static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -108,14 +90,14 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long addr) { if (pgtable_l4_enabled) - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr) { if (pgtable_l5_enabled) - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -143,7 +125,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) { - riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -151,7 +133,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - riscv_tlb_remove_ptdesc(tlb, page_ptdesc(pte)); + tlb_remove_ptdesc(tlb, page_ptdesc(pte)); } #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 050fdc49b5ad..428e48e5f57d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -12,7 +12,11 @@ #include <asm/pgtable-bits.h> #ifndef CONFIG_MMU -#define KERNEL_LINK_ADDR PAGE_OFFSET +#ifdef CONFIG_RELOCATABLE +#define KERNEL_LINK_ADDR UL(0) +#else +#define KERNEL_LINK_ADDR _AC(CONFIG_PHYS_RAM_BASE, UL) +#endif #define KERN_VIRT_SIZE (UL(-1)) #else @@ -341,6 +345,14 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) +#define pte_pgprot pte_pgprot +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + static inline int pte_present(pte_t pte) { return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -674,6 +686,11 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} + static inline pmd_t pmd_mkhuge(pmd_t pmd) { return pmd; @@ -699,6 +716,18 @@ static inline unsigned long pud_pfn(pud_t pud) return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT); } +#define pmd_pgprot pmd_pgprot +static inline pgprot_t pmd_pgprot(pmd_t pmd) +{ + return pte_pgprot(pmd_pte(pmd)); +} + +#define pud_pgprot pud_pgprot +static inline pgprot_t pud_pgprot(pud_t pud) +{ + return pte_pgprot(pud_pte(pud)); +} + static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); @@ -768,6 +797,30 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) return pte_pmd(pte_mkdevmap(pmd_pte(pmd))); } +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP +static inline bool pmd_special(pmd_t pmd) +{ + return pte_special(pmd_pte(pmd)); +} + +static inline pmd_t pmd_mkspecial(pmd_t pmd) +{ + return pte_pmd(pte_mkspecial(pmd_pte(pmd))); +} +#endif + +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP +static inline bool pud_special(pud_t pud) +{ + return pte_special(pud_pte(pud)); +} + +static inline pud_t pud_mkspecial(pud_t pud) +{ + return pte_pud(pte_mkspecial(pud_pte(pud))); +} +#endif + static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index b5b0adcc85c1..2910231977cb 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -23,14 +23,16 @@ struct pt_regs { unsigned long t2; unsigned long s0; unsigned long s1; - unsigned long a0; - unsigned long a1; - unsigned long a2; - unsigned long a3; - unsigned long a4; - unsigned long a5; - unsigned long a6; - unsigned long a7; + struct_group(a_regs, + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + ); unsigned long s2; unsigned long s3; unsigned long s4; diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h new file mode 100644 index 000000000000..451fd76b8811 --- /dev/null +++ b/arch/riscv/include/asm/runtime-const.h @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_RUNTIME_CONST_H +#define _ASM_RISCV_RUNTIME_CONST_H + +#include <asm/asm.h> +#include <asm/alternative.h> +#include <asm/cacheflush.h> +#include <asm/insn-def.h> +#include <linux/memory.h> +#include <asm/text-patching.h> + +#include <linux/uaccess.h> + +#ifdef CONFIG_32BIT +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret; \ + asm_inline(".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + "lui %[__ret],0x89abd\n\t" \ + "addi %[__ret],%[__ret],-0x211\n\t" \ + ".option pop\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : [__ret] "=r" (__ret)); \ + __ret; \ +}) +#else +/* + * Loading 64-bit constants into a register from immediates is a non-trivial + * task on riscv64. To get it somewhat performant, load 32 bits into two + * different registers and then combine the results. + * + * If the processor supports the Zbkb extension, we can combine the final + * "slli,slli,srli,add" into the single "pack" instruction. If the processor + * doesn't support Zbkb but does support the Zbb extension, we can + * combine the final "slli,srli,add" into one instruction "add.uw". + */ +#define RISCV_RUNTIME_CONST_64_PREAMBLE \ + ".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + "lui %[__ret],0x89abd\n\t" \ + "lui %[__tmp],0x1234\n\t" \ + "addiw %[__ret],%[__ret],-0x211\n\t" \ + "addiw %[__tmp],%[__tmp],0x567\n\t" \ + +#define RISCV_RUNTIME_CONST_64_BASE \ + "slli %[__tmp],%[__tmp],32\n\t" \ + "slli %[__ret],%[__ret],32\n\t" \ + "srli %[__ret],%[__ret],32\n\t" \ + "add %[__ret],%[__ret],%[__tmp]\n\t" \ + +#define RISCV_RUNTIME_CONST_64_ZBA \ + ".option push\n\t" \ + ".option arch,+zba\n\t" \ + ".option norvc\n\t" \ + "slli %[__tmp],%[__tmp],32\n\t" \ + "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".option pop\n\t" \ + +#define RISCV_RUNTIME_CONST_64_ZBKB \ + ".option push\n\t" \ + ".option arch,+zbkb\n\t" \ + ".option norvc\n\t" \ + "pack %[__ret],%[__ret],%[__tmp]\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".option pop\n\t" \ + +#define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + ".option pop\n\t" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + +#if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) \ + && defined(CONFIG_RISCV_ISA_ZBKB) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE_2( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBA, \ + 0, RISCV_ISA_EXT_ZBA, 1, \ + RISCV_RUNTIME_CONST_64_ZBKB, \ + 0, RISCV_ISA_EXT_ZBKB, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBA, \ + 0, RISCV_ISA_EXT_ZBA, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#elif defined(CONFIG_RISCV_ISA_ZBKB) +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + ALTERNATIVE( \ + RISCV_RUNTIME_CONST_64_BASE, \ + RISCV_RUNTIME_CONST_64_ZBKB, \ + 0, RISCV_ISA_EXT_ZBKB, 1 \ + ) \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#else +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret, __tmp; \ + asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ + RISCV_RUNTIME_CONST_64_BASE \ + RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ + : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ + __ret; \ +}) +#endif +#endif + +#define runtime_const_shift_right_32(val, sym) \ +({ \ + u32 __ret; \ + asm_inline(".option push\n\t" \ + ".option norvc\n\t" \ + "1:\t" \ + SRLI " %[__ret],%[__val],12\n\t" \ + ".option pop\n\t" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : [__ret] "=r" (__ret) \ + : [__val] "r" (val)); \ + __ret; \ +}) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + /* On riscv there are currently only cache-wide flushes so va is ignored. */ + __always_unused uintptr_t va = (uintptr_t)where; + + flush_icache_range(va, va + 4 * insns); +} + +/* + * The 32-bit immediate is stored in a lui+addi pairing. + * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction. + * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction. + */ +static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val) +{ + unsigned int lower_immediate, upper_immediate; + u32 lui_insn, addi_insn, addi_insn_mask; + __le32 lui_res, addi_res; + + /* Mask out upper 12 bit of addi */ + addi_insn_mask = 0x000fffff; + + lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16; + addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16; + + lower_immediate = sign_extend32(val, 11); + upper_immediate = (val - lower_immediate); + + if (upper_immediate & 0xfffff000) { + /* replace upper 20 bits of lui with upper immediate */ + lui_insn &= 0x00000fff; + lui_insn |= upper_immediate & 0xfffff000; + } else { + /* replace lui with nop if immediate is small enough to fit in addi */ + lui_insn = RISCV_INSN_NOP4; + /* + * lui is being skipped, so do a load instead of an add. A load + * is performed by adding with the x0 register. Setting rs to + * zero with the following mask will accomplish this goal. + */ + addi_insn_mask &= 0x07fff; + } + + if (lower_immediate & 0x00000fff) { + /* replace upper 12 bits of addi with lower 12 bits of val */ + addi_insn &= addi_insn_mask; + addi_insn |= (lower_immediate & 0x00000fff) << 20; + } else { + /* replace addi with nop if lower_immediate is empty */ + addi_insn = RISCV_INSN_NOP4; + } + + addi_res = cpu_to_le32(addi_insn); + lui_res = cpu_to_le32(lui_insn); + mutex_lock(&text_mutex); + patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res)); + patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res)); + mutex_unlock(&text_mutex); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ +#ifdef CONFIG_32BIT + __runtime_fixup_32(where, where + 4, val); + __runtime_fixup_caches(where, 2); +#else + __runtime_fixup_32(where, where + 8, val); + __runtime_fixup_32(where + 4, where + 12, val >> 32); + __runtime_fixup_caches(where, 4); +#endif +} + +/* + * Replace the least significant 5 bits of the srli/srliw immediate that is + * located at bits 20-24 + */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le16 *parcel = where; + __le32 res; + u32 insn; + + insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; + + insn &= 0xfe0fffff; + insn |= (val & 0b11111) << 20; + + res = cpu_to_le32(insn); + mutex_lock(&text_mutex); + patch_text_nosync(where, &res, sizeof(insn)); + mutex_unlock(&text_mutex); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif /* _ASM_RISCV_RUNTIME_CONST_H */ diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h index 4ffb022b097f..dc5782b5fbad 100644 --- a/arch/riscv/include/asm/suspend.h +++ b/arch/riscv/include/asm/suspend.h @@ -18,6 +18,10 @@ struct suspend_context { unsigned long ie; #ifdef CONFIG_MMU unsigned long satp; + unsigned long stimecmp; +#if __riscv_xlen < 64 + unsigned long stimecmph; +#endif #endif }; diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 72e559934952..ce0dd0fed764 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -60,8 +60,7 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, bool arch_tlbbatch_should_defer(struct mm_struct *mm); void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm, - unsigned long uaddr); + struct mm_struct *mm, unsigned long start, unsigned long end); void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index c3c1cc951cb9..3c2fce939673 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -73,6 +73,14 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) #define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) #define RISCV_HWPROBE_EXT_SUPM (1ULL << 49) +#define RISCV_HWPROBE_EXT_ZICNTR (1ULL << 50) +#define RISCV_HWPROBE_EXT_ZIHPM (1ULL << 51) +#define RISCV_HWPROBE_EXT_ZFBFMIN (1ULL << 52) +#define RISCV_HWPROBE_EXT_ZVFBFMIN (1ULL << 53) +#define RISCV_HWPROBE_EXT_ZVFBFWMA (1ULL << 54) +#define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55) +#define RISCV_HWPROBE_EXT_ZAAMO (1ULL << 56) +#define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -95,6 +103,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 #define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 +#define RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE 12 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index f06bc5efcd79..5f59fd226cc5 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -182,6 +182,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SVVPTC, KVM_RISCV_ISA_EXT_ZABHA, KVM_RISCV_ISA_EXT_ZICCRSE, + KVM_RISCV_ISA_EXT_ZAAMO, + KVM_RISCV_ISA_EXT_ZALRSC, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 2fd29695a788..3f6d5a6789e8 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -305,7 +305,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) } } - return ioremap_prot(phys, size, pgprot_val(prot)); + return ioremap_prot(phys, size, prot); } #ifdef CONFIG_PCI diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index e89455a6a0e5..16490755304e 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -36,7 +36,6 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); - OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 40ac72e407b6..2054f6c4b0ae 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -32,6 +32,7 @@ #define NUM_ALPHA_EXTS ('z' - 'a' + 1) static bool any_cpu_has_zicboz; +static bool any_cpu_has_zicbom; unsigned long elf_hwcap __read_mostly; @@ -53,9 +54,7 @@ u32 thead_vlenb_of; */ unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap) { - if (!isa_bitmap) - return riscv_isa[0]; - return isa_bitmap[0]; + return !isa_bitmap ? riscv_isa[0] : isa_bitmap[0]; } EXPORT_SYMBOL_GPL(riscv_isa_extension_base); @@ -76,10 +75,19 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned i if (bit >= RISCV_ISA_EXT_MAX) return false; - return test_bit(bit, bmap) ? true : false; + return test_bit(bit, bmap); } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); +static int riscv_ext_f_depends(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f)) + return 0; + + return -EPROBE_DEFER; +} + static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -91,6 +99,8 @@ static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); return -EINVAL; } + + any_cpu_has_zicbom = true; return 0; } @@ -109,6 +119,82 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, return 0; } +static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + /* + * Due to extension ordering, d is checked before f, so no deferral + * is required. + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) { + pr_warn_once("This kernel does not support systems with F but not D\n"); + return -EINVAL; + } + + return 0; +} + +static int riscv_ext_d_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + return 0; +} + +static int riscv_ext_vector_x_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + return 0; +} + +static int riscv_ext_vector_float_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + /* + * The kernel doesn't support systems that don't implement both of + * F and D, so if any of the vector extensions that do floating point + * are to be usable, both floating point extensions need to be usable. + * + * Since this function validates vector only, and v/Zve* are probed + * after f/d, there's no need for a deferral here. + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) + return -EINVAL; + + return 0; +} + +static int riscv_ext_vector_crypto_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + /* + * It isn't the kernel's job to check that the binding is correct, so + * it should be enough to check that any of the vector extensions are + * enabled, which in-turn means that vector is usable in this kernel + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32X)) + return -EPROBE_DEFER; + + return 0; +} + static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -140,6 +226,28 @@ static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, return -EPROBE_DEFER; } +static int riscv_vector_f_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32F)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_ext_zvfbfwma_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZFBFMIN) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVFBFMIN)) + return 0; + + return -EPROBE_DEFER; +} + static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -150,6 +258,11 @@ static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, return 0; } +static const unsigned int riscv_a_exts[] = { + RISCV_ISA_EXT_ZAAMO, + RISCV_ISA_EXT_ZALRSC, +}; + static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, RISCV_ISA_EXT_ZBKC, @@ -321,17 +434,15 @@ static const unsigned int riscv_c_exts[] = { const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(i, RISCV_ISA_EXT_i), __RISCV_ISA_EXT_DATA(m, RISCV_ISA_EXT_m), - __RISCV_ISA_EXT_DATA(a, RISCV_ISA_EXT_a), - __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f), - __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d), + __RISCV_ISA_EXT_SUPERSET(a, RISCV_ISA_EXT_a, riscv_a_exts), + __RISCV_ISA_EXT_DATA_VALIDATE(f, RISCV_ISA_EXT_f, riscv_ext_f_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(d, RISCV_ISA_EXT_d, riscv_ext_d_validate), __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q), __RISCV_ISA_EXT_SUPERSET(c, RISCV_ISA_EXT_c, riscv_c_exts), - __RISCV_ISA_EXT_SUPERSET(v, RISCV_ISA_EXT_v, riscv_v_exts), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), - __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, - riscv_ext_zicbom_validate), - __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, - riscv_ext_zicboz_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate), __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), @@ -341,10 +452,13 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zaamo, RISCV_ISA_EXT_ZAAMO), __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), + __RISCV_ISA_EXT_DATA(zalrsc, RISCV_ISA_EXT_ZALRSC), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), + __RISCV_ISA_EXT_DATA_VALIDATE(zfbfmin, RISCV_ISA_EXT_ZFBFMIN, riscv_ext_f_depends), __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH), __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN), __RISCV_ISA_EXT_DATA(zca, RISCV_ISA_EXT_ZCA), @@ -370,29 +484,31 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zksed, RISCV_ISA_EXT_ZKSED), __RISCV_ISA_EXT_DATA(zksh, RISCV_ISA_EXT_ZKSH), __RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO), - __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts), - __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC), - __RISCV_ISA_EXT_SUPERSET(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts), - __RISCV_ISA_EXT_DATA(zve32x, RISCV_ISA_EXT_ZVE32X), - __RISCV_ISA_EXT_SUPERSET(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts), - __RISCV_ISA_EXT_SUPERSET(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts), - __RISCV_ISA_EXT_SUPERSET(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvbc, RISCV_ISA_EXT_ZVBC, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zve32x, RISCV_ISA_EXT_ZVE32X, riscv_ext_vector_x_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts, riscv_ext_vector_x_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfmin, RISCV_ISA_EXT_ZVFBFMIN, riscv_vector_f_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfwma, RISCV_ISA_EXT_ZVFBFWMA, riscv_ext_zvfbfwma_validate), __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH), __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN), - __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB), - __RISCV_ISA_EXT_DATA(zvkg, RISCV_ISA_EXT_ZVKG), - __RISCV_ISA_EXT_BUNDLE(zvkn, riscv_zvkn_bundled_exts), - __RISCV_ISA_EXT_BUNDLE(zvknc, riscv_zvknc_bundled_exts), - __RISCV_ISA_EXT_DATA(zvkned, RISCV_ISA_EXT_ZVKNED), - __RISCV_ISA_EXT_BUNDLE(zvkng, riscv_zvkng_bundled_exts), - __RISCV_ISA_EXT_DATA(zvknha, RISCV_ISA_EXT_ZVKNHA), - __RISCV_ISA_EXT_DATA(zvknhb, RISCV_ISA_EXT_ZVKNHB), - __RISCV_ISA_EXT_BUNDLE(zvks, riscv_zvks_bundled_exts), - __RISCV_ISA_EXT_BUNDLE(zvksc, riscv_zvksc_bundled_exts), - __RISCV_ISA_EXT_DATA(zvksed, RISCV_ISA_EXT_ZVKSED), - __RISCV_ISA_EXT_DATA(zvksh, RISCV_ISA_EXT_ZVKSH), - __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts), - __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkb, RISCV_ISA_EXT_ZVKB, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkg, RISCV_ISA_EXT_ZVKG, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvkn, riscv_zvkn_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvknc, riscv_zvknc_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkned, RISCV_ISA_EXT_ZVKNED, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvkng, riscv_zvkng_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvknha, RISCV_ISA_EXT_ZVKNHA, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvknhb, RISCV_ISA_EXT_ZVKNHB, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvks, riscv_zvks_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvksc, riscv_zvksc_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvksed, RISCV_ISA_EXT_ZVKSED, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvksh, RISCV_ISA_EXT_ZVKSH, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvksg, riscv_zvksg_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkt, RISCV_ISA_EXT_ZVKT, riscv_ext_vector_crypto_validate), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM), __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts), @@ -960,16 +1076,6 @@ void __init riscv_fill_hwcap(void) riscv_v_setup_vsize(); } - if (elf_hwcap & COMPAT_HWCAP_ISA_V) { - /* - * ISA string in device tree might have 'v' flag, but - * CONFIG_RISCV_ISA_V is disabled in kernel. - * Clear V flag in elf_hwcap if CONFIG_RISCV_ISA_V is disabled. - */ - if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) - elf_hwcap &= ~COMPAT_HWCAP_ISA_V; - } - memset(print_str, 0, sizeof(print_str)); for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) @@ -1001,6 +1107,11 @@ void __init riscv_user_isa_enable(void) current->thread.envcfg |= ENVCFG_CBZE; else if (any_cpu_has_zicboz) pr_warn("Zicboz disabled as it is unavailable on some harts\n"); + + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOM)) + current->thread.envcfg |= ENVCFG_CBCFE; + else if (any_cpu_has_zicbom) + pr_warn("Zicbom disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 3c37661801f9..e783a72d051f 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -468,6 +468,9 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, case R_RISCV_ALIGN: case R_RISCV_RELAX: break; + case R_RISCV_64: + *(u64 *)loc = val; + break; default: pr_err("Unknown rela relocation: %d\n", r_type); return -ENOEXEC; diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 3524db5e4fa0..674dcdfae7a1 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -36,7 +36,7 @@ static int ftrace_check_current_call(unsigned long hook_pos, unsigned int *expected) { unsigned int replaced[2]; - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; /* we expect nops at the hook position */ if (!expected) @@ -68,7 +68,7 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, bool enable, bool ra) { unsigned int call[2]; - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; if (ra) make_call_ra(hook_pos, target, call); @@ -97,7 +97,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int nops[2] = {NOP4, NOP4}; + unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) return -EPERM; diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index 654ed159c830..b4c1a6a3fbd2 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -11,8 +11,8 @@ #include <asm/bug.h> #include <asm/cacheflush.h> #include <asm/text-patching.h> +#include <asm/insn-def.h> -#define RISCV_INSN_NOP 0x00000013U #define RISCV_INSN_JAL 0x0000006fU bool arch_jump_label_transform_queue(struct jump_entry *entry, @@ -33,7 +33,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, (((u32)offset & GENMASK(10, 1)) << (21 - 1)) | (((u32)offset & GENMASK(20, 20)) << (31 - 20)); } else { - insn = RISCV_INSN_NOP; + insn = RISCV_INSN_NOP4; } if (early_boot_irqs_disabled) { diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 068168046e0e..da4a4000e57e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -12,8 +12,6 @@ #include <asm/asm-offsets.h> #include <asm/ftrace.h> -#define ABI_SIZE_ON_STACK 80 - .text .macro SAVE_ABI_STATE @@ -28,12 +26,12 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -ABI_SIZE_ON_STACK - REG_S ra, 1*SZREG(sp) - REG_S s0, 8*SZREG(sp) - REG_S a0, 10*SZREG(sp) - REG_S a1, 11*SZREG(sp) - addi s0, sp, ABI_SIZE_ON_STACK + addi sp, sp, -FREGS_SIZE_ON_STACK + REG_S ra, FREGS_RA(sp) + REG_S s0, FREGS_S0(sp) + REG_S a0, FREGS_A0(sp) + REG_S a1, FREGS_A1(sp) + addi s0, sp, FREGS_SIZE_ON_STACK .endm .macro RESTORE_ABI_STATE @@ -43,11 +41,11 @@ .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 1*SZREG(sp) - REG_L s0, 8*SZREG(sp) - REG_L a0, 10*SZREG(sp) - REG_L a1, 11*SZREG(sp) - addi sp, sp, ABI_SIZE_ON_STACK + REG_L ra, FREGS_RA(sp) + REG_L s0, FREGS_S0(sp) + REG_L a0, FREGS_A0(sp) + REG_L a1, FREGS_A1(sp) + addi sp, sp, FREGS_SIZE_ON_STACK .endm SYM_TYPED_FUNC_START(ftrace_stub) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4fe45daa6281..c174544eefc8 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -235,11 +235,6 @@ static void __init parse_dtb(void) } else { pr_err("No DTB passed to the kernel\n"); } - -#ifdef CONFIG_CMDLINE_FORCE - strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); - pr_info("Forcing kernel command line to: %s\n", boot_command_line); -#endif } #if defined(CONFIG_RISCV_COMBO_SPINLOCKS) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index d58b5e751286..e650dec44817 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(__cpuid_to_hartid_map); void __init smp_setup_processor_id(void) { cpuid_to_hartid_map(0) = boot_cpu_hartid; + + pr_info("Booting Linux on hartid %lu\n", boot_cpu_hartid); } static DEFINE_PER_CPU_READ_MOSTLY(int, ipi_dummy_dev); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index e36d20205bd7..601a321e0f17 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -231,6 +231,10 @@ asmlinkage __visible void smp_callin(void) riscv_ipi_enable(); numa_add_cpu(curr_cpuid); + + pr_debug("CPU%u: Booted secondary hartid %lu\n", curr_cpuid, + cpuid_to_hartid_map(curr_cpuid)); + set_cpu_online(curr_cpuid, true); /* diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index d4355c770c36..3fe9e6edef8f 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -74,7 +74,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, &frame->ra); if (pc >= (unsigned long)handle_exception && pc < (unsigned long)&ret_from_exception_end) { - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + if (unlikely(!fn(arg, pc))) break; pc = ((struct pt_regs *)sp)->epc; diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 9a8a0dc035b2..24b3f57d467f 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -30,6 +30,13 @@ void suspend_save_csrs(struct suspend_context *context) */ #ifdef CONFIG_MMU + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) { + context->stimecmp = csr_read(CSR_STIMECMP); +#if __riscv_xlen < 64 + context->stimecmph = csr_read(CSR_STIMECMPH); +#endif + } + context->satp = csr_read(CSR_SATP); #endif } @@ -43,6 +50,13 @@ void suspend_restore_csrs(struct suspend_context *context) csr_write(CSR_IE, context->ie); #ifdef CONFIG_MMU + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) { + csr_write(CSR_STIMECMP, context->stimecmp); +#if __riscv_xlen < 64 + csr_write(CSR_STIMECMPH, context->stimecmph); +#endif + } + csr_write(CSR_SATP, context->satp); #endif } diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 04a4e5495512..249aec8594a9 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -95,7 +95,9 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * regardless of the kernel's configuration, as no other checks, besides * presence in the hart_isa bitmap, are made. */ + EXT_KEY(ZAAMO); EXT_KEY(ZACAS); + EXT_KEY(ZALRSC); EXT_KEY(ZAWRS); EXT_KEY(ZBA); EXT_KEY(ZBB); @@ -107,10 +109,13 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCA); EXT_KEY(ZCB); EXT_KEY(ZCMOP); + EXT_KEY(ZICBOM); EXT_KEY(ZICBOZ); + EXT_KEY(ZICNTR); EXT_KEY(ZICOND); EXT_KEY(ZIHINTNTL); EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIHPM); EXT_KEY(ZIMOP); EXT_KEY(ZKND); EXT_KEY(ZKNE); @@ -132,6 +137,8 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZVE64D); EXT_KEY(ZVE64F); EXT_KEY(ZVE64X); + EXT_KEY(ZVFBFMIN); + EXT_KEY(ZVFBFWMA); EXT_KEY(ZVFH); EXT_KEY(ZVFHMIN); EXT_KEY(ZVKB); @@ -148,6 +155,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCD); EXT_KEY(ZCF); EXT_KEY(ZFA); + EXT_KEY(ZFBFMIN); EXT_KEY(ZFH); EXT_KEY(ZFHMIN); } @@ -161,7 +169,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, pair->value &= ~missing; } -static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) { struct riscv_hwprobe pair; @@ -279,6 +287,11 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) pair->value = riscv_cboz_block_size; break; + case RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE: + pair->value = 0; + if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOM)) + pair->value = riscv_cbom_block_size; + break; case RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS: pair->value = user_max_virt_addr(); break; diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 7cc108aed74e..4354c87c0376 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -605,16 +605,10 @@ void check_vector_unaligned_access_emulated(struct work_struct *work __always_un kernel_vector_end(); } -bool check_vector_unaligned_access_emulated_all_cpus(void) +bool __init check_vector_unaligned_access_emulated_all_cpus(void) { int cpu; - if (!has_vector()) { - for_each_online_cpu(cpu) - per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - return false; - } - schedule_on_each_cpu(check_vector_unaligned_access_emulated); for_each_online_cpu(cpu) @@ -625,7 +619,7 @@ bool check_vector_unaligned_access_emulated_all_cpus(void) return true; } #else -bool check_vector_unaligned_access_emulated_all_cpus(void) +bool __init check_vector_unaligned_access_emulated_all_cpus(void) { return false; } @@ -659,7 +653,7 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused) } } -bool check_unaligned_access_emulated_all_cpus(void) +bool __init check_unaligned_access_emulated_all_cpus(void) { int cpu; @@ -684,7 +678,7 @@ bool unaligned_ctl_available(void) return unaligned_ctl; } #else -bool check_unaligned_access_emulated_all_cpus(void) +bool __init check_unaligned_access_emulated_all_cpus(void) { return false; } diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 91f189cf1611..585d2dcf2dab 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -24,8 +24,12 @@ DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS +static long unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +static long unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + static cpumask_t fast_misaligned_access; + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS static int check_unaligned_access(void *param) { int cpu = smp_processor_id(); @@ -121,7 +125,7 @@ static int check_unaligned_access(void *param) return 0; } -static void check_unaligned_access_nonboot_cpu(void *param) +static void __init check_unaligned_access_nonboot_cpu(void *param) { unsigned int cpu = smp_processor_id(); struct page **pages = param; @@ -130,6 +134,50 @@ static void check_unaligned_access_nonboot_cpu(void *param) check_unaligned_access(pages[cpu]); } +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static void __init check_unaligned_access_speed_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + +out: + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); +} +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static void __init check_unaligned_access_speed_all_cpus(void) +{ +} +#endif + DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); static void modify_unaligned_access_branches(cpumask_t *mask, int weight) @@ -175,7 +223,7 @@ static void set_unaligned_access_static_branches(void) modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); } -static int lock_and_set_unaligned_access_static_branch(void) +static int __init lock_and_set_unaligned_access_static_branch(void) { cpus_read_lock(); set_unaligned_access_static_branches(); @@ -188,21 +236,29 @@ arch_initcall_sync(lock_and_set_unaligned_access_static_branch); static int riscv_online_cpu(unsigned int cpu) { - static struct page *buf; - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + goto exit; + } else if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; goto exit; - - check_unaligned_access_emulated(NULL); - buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!buf) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return -ENOMEM; } - check_unaligned_access(buf); - __free_pages(buf, MISALIGNED_BUFFER_ORDER); +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS + { + static struct page *buf; + + check_unaligned_access_emulated(NULL); + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + } +#endif exit: set_unaligned_access_static_branches(); @@ -217,59 +273,6 @@ static int riscv_offline_cpu(unsigned int cpu) return 0; } -/* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int check_unaligned_access_speed_all_cpus(void) -{ - unsigned int cpu; - unsigned int cpu_count = num_possible_cpus(); - struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); - - if (!bufs) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return 0; - } - - /* - * Allocate separate buffers for each CPU so there's no fighting over - * cache lines. - */ - for_each_cpu(cpu, cpu_online_mask) { - bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!bufs[cpu]) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - goto out; - } - } - - /* Check everybody except 0, who stays behind to tend jiffies. */ - on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); - - /* Check core 0. */ - smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, riscv_offline_cpu); - -out: - for_each_cpu(cpu, cpu_online_mask) { - if (bufs[cpu]) - __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); - } - - kfree(bufs); - return 0; -} -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static int check_unaligned_access_speed_all_cpus(void) -{ - return 0; -} -#endif - #ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS static void check_vector_unaligned_access(struct work_struct *work __always_unused) { @@ -349,7 +352,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", cpu); - return; + goto free; } if (word_cycles < byte_cycles) @@ -363,57 +366,112 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); per_cpu(vector_misaligned_access, cpu) = speed; + +free: + __free_pages(page, MISALIGNED_BUFFER_ORDER); +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + schedule_on_each_cpu(check_vector_unaligned_access); + + return 0; } +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + return 0; +} +#endif static int riscv_online_cpu_vec(unsigned int cpu) { - if (!has_vector()) + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; return 0; + } - if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) return 0; check_vector_unaligned_access_emulated(NULL); check_vector_unaligned_access(NULL); +#endif + return 0; } -/* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) -{ - schedule_on_each_cpu(check_vector_unaligned_access); +static const char * const speed_str[] __initconst = { NULL, NULL, "slow", "fast", "unsupported" }; - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu_vec, NULL); +static int __init set_unaligned_scalar_speed_param(char *str) +{ + if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_FAST])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED; + else + return -EINVAL; - return 0; + return 1; } -#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ -static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +__setup("unaligned_scalar_speed=", set_unaligned_scalar_speed_param); + +static int __init set_unaligned_vector_speed_param(char *str) { - return 0; + if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_FAST])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + else + return -EINVAL; + + return 1; } -#endif +__setup("unaligned_vector_speed=", set_unaligned_vector_speed_param); -static int check_unaligned_access_all_cpus(void) +static int __init check_unaligned_access_all_cpus(void) { - bool all_cpus_emulated, all_cpus_vec_unsupported; + int cpu; + + if (unaligned_scalar_speed_param == RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN && + !check_unaligned_access_emulated_all_cpus()) { + check_unaligned_access_speed_all_cpus(); + } else { + pr_info("scalar unaligned access speed set to '%s' by command line\n", + speed_str[unaligned_scalar_speed_param]); + for_each_online_cpu(cpu) + per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; + } - all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); - all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus(); + if (!has_vector()) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - if (!all_cpus_vec_unsupported && + if (unaligned_vector_speed_param == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN && + !check_vector_unaligned_access_emulated_all_cpus() && IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { kthread_run(vec_check_unaligned_access_speed_all_cpus, NULL, "vec_check_unaligned_access_speed_all_cpus"); + } else { + pr_info("vector unaligned access speed set to '%s' by command line\n", + speed_str[unaligned_vector_speed_param]); + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; } - if (!all_cpus_emulated) - return check_unaligned_access_speed_all_cpus(); + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, riscv_offline_cpu); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); return 0; } diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S index d16f19f1b3b6..7ce4de6f6e69 100644 --- a/arch/riscv/kernel/vec-copy-unaligned.S +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -11,7 +11,7 @@ #define WORD_SEW CONCATENATE(e, WORD_EEW) #define VEC_L CONCATENATE(vle, WORD_EEW).v -#define VEC_S CONCATENATE(vle, WORD_EEW).v +#define VEC_S CONCATENATE(vse, WORD_EEW).v /* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index a31ff84740eb..9feb7f67a0a3 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -61,6 +61,6 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig if (bit >= RISCV_ISA_VENDOR_EXT_MAX) return false; - return test_bit(bit, bmap->isa) ? true : false; + return test_bit(bit, bmap->isa); } EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 002ca58dd998..61bd5ba6680a 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -97,6 +97,9 @@ SECTIONS { EXIT_DATA } + + RUNTIME_CONST_VARIABLES + PERCPU_SECTION(L1_CACHE_BYTES) .rel.dyn : { diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 43ee8e33ba23..2e1b646f0d61 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -47,8 +47,10 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), @@ -149,8 +151,10 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_RISCV_ISA_EXT_ZAAMO: case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: + case KVM_RISCV_ISA_EXT_ZALRSC: case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: case KVM_RISCV_ISA_EXT_ZBB: diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c index 7fb12c59e571..9408f50ca59a 100644 --- a/arch/riscv/lib/csum.c +++ b/arch/riscv/lib/csum.c @@ -40,12 +40,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, uproto = (__force unsigned int)htonl(proto); sum += uproto; - /* - * Zbb support saves 4 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* @@ -157,12 +152,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) csum = do_csum_common(ptr, end, data); #ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT - /* - * Zbb support saves 6 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* @@ -244,12 +234,7 @@ do_csum_no_alignment(const unsigned char *buff, int len) end = (const unsigned long *)(buff + len); csum = do_csum_common(ptr, end, data); - /* - * Zbb support saves 6 instructions, so not worth checking without - * alternatives if supported - */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && - IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { unsigned long fold_temp; /* diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S index 57a5c0066231..65027e742af1 100644 --- a/arch/riscv/lib/strcmp.S +++ b/arch/riscv/lib/strcmp.S @@ -8,7 +8,8 @@ /* int strcmp(const char *cs, const char *ct) */ SYM_FUNC_START(strcmp) - ALTERNATIVE("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -43,7 +44,7 @@ SYM_FUNC_START(strcmp) * The code was published as part of the bitmanip manual * in Appendix A. */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strcmp_zbb: .option push diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S index 962983b73251..eb4d2b7ed22b 100644 --- a/arch/riscv/lib/strlen.S +++ b/arch/riscv/lib/strlen.S @@ -8,7 +8,8 @@ /* int strlen(const char *s) */ SYM_FUNC_START(strlen) - ALTERNATIVE("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -33,7 +34,7 @@ SYM_FUNC_START(strlen) /* * Variant of strlen using the ZBB extension if available */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strlen_zbb: #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S index 7b2d0ff9ed6c..062000c468c8 100644 --- a/arch/riscv/lib/strncmp.S +++ b/arch/riscv/lib/strncmp.S @@ -8,7 +8,8 @@ /* int strncmp(const char *cs, const char *ct, size_t count) */ SYM_FUNC_START(strncmp) - ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB) + __ALTERNATIVE_CFG("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) /* * Returns @@ -46,7 +47,7 @@ SYM_FUNC_START(strncmp) /* * Variant of strncmp using the ZBB extension if available */ -#ifdef CONFIG_RISCV_ISA_ZBB +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) strncmp_zbb: .option push diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index cbe4d775ef56..b916a68d324a 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -19,7 +19,7 @@ obj-y += context.o obj-y += pmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += ptdump.o +obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_KASAN) += kasan_init.o ifdef CONFIG_KASAN diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 4abe3de23225..55c20ad1f744 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -158,7 +158,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) * * - We get a zero back from the cmpxchg and end up waiting on the * lock. Taking the lock synchronises with the rollover and so - * we are forced to see the updated verion. + * we are forced to see the updated version. * * - We get a valid context back from the cmpxchg then we continue * using old ASID because __flush_context() would have marked ASID diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index b4a78a4b35cf..375dd96bb4a0 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -148,22 +148,25 @@ unsigned long hugetlb_mask_last_page(struct hstate *h) static pte_t get_clear_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - unsigned long pte_num) + unsigned long ncontig) { - pte_t orig_pte = ptep_get(ptep); - unsigned long i; - - for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { - pte_t pte = ptep_get_and_clear(mm, addr, ptep); - - if (pte_dirty(pte)) - orig_pte = pte_mkdirty(orig_pte); - - if (pte_young(pte)) - orig_pte = pte_mkyoung(orig_pte); + pte_t pte, tmp_pte; + bool present; + + pte = ptep_get_and_clear(mm, addr, ptep); + present = pte_present(pte); + while (--ncontig) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = ptep_get_and_clear(mm, addr, ptep); + if (present) { + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } } - - return orig_pte; + return pte; } static pte_t get_clear_contig_flush(struct mm_struct *mm, @@ -212,6 +215,26 @@ static void clear_flush(struct mm_struct *mm, flush_tlb_range(&vma, saddr, addr); } +static int num_contig_ptes_from_size(unsigned long sz, size_t *pgsize) +{ + unsigned long hugepage_shift; + + if (sz >= PGDIR_SIZE) + hugepage_shift = PGDIR_SHIFT; + else if (sz >= P4D_SIZE) + hugepage_shift = P4D_SHIFT; + else if (sz >= PUD_SIZE) + hugepage_shift = PUD_SHIFT; + else if (sz >= PMD_SIZE) + hugepage_shift = PMD_SHIFT; + else + hugepage_shift = PAGE_SHIFT; + + *pgsize = 1 << hugepage_shift; + + return sz >> hugepage_shift; +} + /* * When dealing with NAPOT mappings, the privileged specification indicates that * "if an update needs to be made, the OS generally should first mark all of the @@ -226,22 +249,10 @@ void set_huge_pte_at(struct mm_struct *mm, pte_t pte, unsigned long sz) { - unsigned long hugepage_shift, pgsize; + size_t pgsize; int i, pte_num; - if (sz >= PGDIR_SIZE) - hugepage_shift = PGDIR_SHIFT; - else if (sz >= P4D_SIZE) - hugepage_shift = P4D_SHIFT; - else if (sz >= PUD_SIZE) - hugepage_shift = PUD_SHIFT; - else if (sz >= PMD_SIZE) - hugepage_shift = PMD_SHIFT; - else - hugepage_shift = PAGE_SHIFT; - - pte_num = sz >> hugepage_shift; - pgsize = 1 << hugepage_shift; + pte_num = num_contig_ptes_from_size(sz, &pgsize); if (!pte_present(pte)) { for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) @@ -295,13 +306,14 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) { + size_t pgsize; pte_t orig_pte = ptep_get(ptep); int pte_num; if (!pte_napot(orig_pte)) return ptep_get_and_clear(mm, addr, ptep); - pte_num = napot_pte_num(napot_cont_order(orig_pte)); + pte_num = num_contig_ptes_from_size(sz, &pgsize); return get_clear_contig(mm, addr, ptep, pte_num); } @@ -351,6 +363,7 @@ void huge_pte_clear(struct mm_struct *mm, pte_t *ptep, unsigned long sz) { + size_t pgsize; pte_t pte = ptep_get(ptep); int i, pte_num; @@ -359,8 +372,9 @@ void huge_pte_clear(struct mm_struct *mm, return; } - pte_num = napot_pte_num(napot_cont_order(pte)); - for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + pte_num = num_contig_ptes_from_size(sz, &pgsize); + + for (i = 0; i < pte_num; i++, addr += pgsize, ptep++) pte_clear(mm, addr, ptep); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 15b2eda4c364..ab475ec6ca42 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -20,15 +20,13 @@ #include <linux/dma-map-ops.h> #include <linux/crash_dump.h> #include <linux/hugetlb.h> -#ifdef CONFIG_RELOCATABLE -#include <linux/elf.h> -#endif #include <linux/kfence.h> #include <linux/execmem.h> #include <asm/fixmap.h> #include <asm/io.h> #include <asm/kasan.h> +#include <asm/module.h> #include <asm/numa.h> #include <asm/pgtable.h> #include <asm/sections.h> @@ -171,7 +169,7 @@ static void __init print_vm_layout(void) static void print_vm_layout(void) { } #endif /* CONFIG_DEBUG_VM */ -void __init mem_init(void) +void __init arch_mm_preinit(void) { bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit); #ifdef CONFIG_FLATMEM @@ -192,7 +190,6 @@ void __init mem_init(void) } swiotlb_init(swiotlb, SWIOTLB_VERBOSE); - memblock_free_all(); print_vm_layout(); } @@ -295,10 +292,8 @@ static void __init setup_bootmem(void) phys_ram_end = memblock_end_of_DRAM(); min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); - high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); - set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); reserve_initrd_mem(); @@ -323,6 +318,44 @@ static void __init setup_bootmem(void) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); } +#ifdef CONFIG_RELOCATABLE +extern unsigned long __rela_dyn_start, __rela_dyn_end; + +static void __init relocate_kernel(void) +{ + Elf_Rela *rela = (Elf_Rela *)&__rela_dyn_start; + /* + * This holds the offset between the linked virtual address and the + * relocated virtual address. + */ + uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; + /* + * This holds the offset between kernel linked virtual address and + * physical address. + */ + uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; + + for ( ; rela < (Elf_Rela *)&__rela_dyn_end; rela++) { + Elf_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); + Elf_Addr relocated_addr = rela->r_addend; + + if (rela->r_info != R_RISCV_RELATIVE) + continue; + + /* + * Make sure to not relocate vdso symbols like rt_sigreturn + * which are linked from the address 0 in vmlinux since + * vdso symbol addresses are actually used as an offset from + * mm->context.vdso in VDSO_OFFSET macro. + */ + if (relocated_addr >= KERNEL_LINK_ADDR) + relocated_addr += reloc_offset; + + *(Elf_Addr *)addr = relocated_addr; + } +} +#endif /* CONFIG_RELOCATABLE */ + #ifdef CONFIG_MMU struct pt_alloc_ops pt_ops __meminitdata; @@ -823,6 +856,8 @@ static __init void set_satp_mode(uintptr_t dtb_pa) uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); + kernel_map.page_offset = PAGE_OFFSET_L5; + if (satp_mode_cmdline == SATP_MODE_57) { disable_pgtable_l5(); } else if (satp_mode_cmdline == SATP_MODE_48) { @@ -893,44 +928,6 @@ retry: #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif -#ifdef CONFIG_RELOCATABLE -extern unsigned long __rela_dyn_start, __rela_dyn_end; - -static void __init relocate_kernel(void) -{ - Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; - /* - * This holds the offset between the linked virtual address and the - * relocated virtual address. - */ - uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; - /* - * This holds the offset between kernel linked virtual address and - * physical address. - */ - uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; - - for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { - Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); - Elf64_Addr relocated_addr = rela->r_addend; - - if (rela->r_info != R_RISCV_RELATIVE) - continue; - - /* - * Make sure to not relocate vdso symbols like rt_sigreturn - * which are linked from the address 0 in vmlinux since - * vdso symbol addresses are actually used as an offset from - * mm->context.vdso in VDSO_OFFSET macro. - */ - if (relocated_addr >= KERNEL_LINK_ADDR) - relocated_addr += reloc_offset; - - *(Elf64_Addr *)addr = relocated_addr; - } -} -#endif /* CONFIG_RELOCATABLE */ - #ifdef CONFIG_XIP_KERNEL static void __init create_kernel_page_table(pgd_t *pgdir, __always_unused bool early) @@ -1108,11 +1105,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset; #ifdef CONFIG_XIP_KERNEL -#ifdef CONFIG_64BIT - kernel_map.page_offset = PAGE_OFFSET_L3; -#else - kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); -#endif kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); @@ -1127,7 +1119,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr + (uintptr_t)&_sdata - (uintptr_t)&_start; #else - kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; @@ -1174,7 +1165,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * makes the kernel cross over a PUD_SIZE boundary, raise a bug * since a part of the kernel would not get mapped. */ - BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); + if (IS_ENABLED(CONFIG_64BIT)) + BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); relocate_kernel(); #endif @@ -1378,6 +1370,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) { dtb_early_va = (void *)dtb_pa; dtb_early_pa = dtb_pa; + +#ifdef CONFIG_RELOCATABLE + kernel_map.virt_addr = (uintptr_t)_start; + kernel_map.phys_addr = (uintptr_t)_start; + relocate_kernel(); +#endif } static inline void setup_vm_final(void) @@ -1396,21 +1394,19 @@ static void __init arch_reserve_crashkernel(void) { unsigned long long low_size = 0; unsigned long long crash_base, crash_size; - char *cmdline = boot_command_line; bool high = false; int ret; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) return; - reserve_crashkernel_generic(cmdline, crash_size, crash_base, - low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } void __init paging_init(void) diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index 18706f457da7..559d291fac5c 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -12,7 +12,7 @@ phys_addr_t __virt_to_phys(unsigned long x) * Boundary checking aginst the kernel linear mapping space. */ WARN(!is_linear_mapping(x) && !is_kernel_mapping(x), - "virt_to_phys used for non-linear address: %pK (%pS)\n", + "virt_to_phys used for non-linear address: %p (%pS)\n", (void *)x, (void *)x); return __va_to_pa_nodebug(x); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 9b6e86ce3867..f9e27ba1df99 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -4,6 +4,7 @@ #include <linux/smp.h> #include <linux/sched.h> #include <linux/hugetlb.h> +#include <linux/mmu_notifier.h> #include <asm/sbi.h> #include <asm/mmu_context.h> @@ -78,10 +79,17 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, +static inline unsigned long get_mm_asid(struct mm_struct *mm) +{ + return mm ? cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; +} + +static void __flush_tlb_range(struct mm_struct *mm, + const struct cpumask *cmask, unsigned long start, unsigned long size, unsigned long stride) { + unsigned long asid = get_mm_asid(mm); unsigned int cpu; if (cpumask_empty(cmask)) @@ -105,30 +113,26 @@ static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, } put_cpu(); -} -static inline unsigned long get_mm_asid(struct mm_struct *mm) -{ - return cntx2asid(atomic_long_read(&mm->context.id)); + if (mm) + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, start + size); } void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), - 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + __flush_tlb_range(mm, mm_cpumask(mm), 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); } void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int page_size) { - __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), - start, end - start, page_size); + __flush_tlb_range(mm, mm_cpumask(mm), start, end - start, page_size); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, PAGE_SIZE); } @@ -161,13 +165,13 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } } - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), start, end - start, stride_size); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID, + __flush_tlb_range(NULL, cpu_online_mask, start, end - start, PAGE_SIZE); } @@ -175,7 +179,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), start, end - start, PMD_SIZE); } #endif @@ -186,10 +190,10 @@ bool arch_tlbbatch_should_defer(struct mm_struct *mm) } void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm, - unsigned long uaddr) + struct mm_struct *mm, unsigned long start, unsigned long end) { cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); } void arch_flush_tlb_batched_pending(struct mm_struct *mm) @@ -199,7 +203,7 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm) void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { - __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, - FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + __flush_tlb_range(NULL, &batch->cpumask, + 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); cpumask_clear(&batch->cpumask); } diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 0e6ca6d5ae4b..c5db2f072c34 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -12,6 +12,7 @@ .text +.align 2 SYM_CODE_START(purgatory_start) lla sp, .Lstack diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e220589dae11..44771eef8c05 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -92,6 +92,7 @@ config S390 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PREEMPT_LAZY + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_DIRECT_MAP @@ -136,6 +137,7 @@ config S390 select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP @@ -159,7 +161,6 @@ config S390 select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_VDSO_DATA_STORE diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 8b825e3578d8..6f2c9ce1b154 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -92,7 +92,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 8392f8a5ad6d..f18a7d97ac21 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -86,7 +86,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index 1e17e288cee4..ede951dc0085 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -209,6 +209,8 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), cpu_info)); cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_get_info_type(), cpu_info) - cpu_info__lp_time(diag204_get_info_type(), cpu_info)); diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 251e0372ccbd..faddb9aef3b8 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -33,7 +33,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL))) + ioremap_prot((addr), (size), pgprot_writecombine(PAGE_KERNEL)) static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) { diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 70c8f9ad13cd..430feb1a5013 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -80,7 +80,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_mapping); if (IS_ERR(vma)) { diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f6c2db7a8669..9726b91fe7e4 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -9,6 +9,6 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o obj-$(CONFIG_PFAULT) += pfault.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f4ac69506608..afa085e8186c 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -74,8 +74,6 @@ static void __init setup_zero_pages(void) { unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; - struct page *page; - int i; /* Latest machines require a mapping granularity of 512KB */ order = 7; @@ -84,16 +82,7 @@ static void __init setup_zero_pages(void) while (order > 2 && (total_pages >> 10) < (1UL << order)) order--; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Out of memory in setup_zero_pages"); - - page = virt_to_page((void *) empty_zero_page); - split_page(page, order); - for (i = 1 << order; i > 0; i--) { - mark_page_reserved(page); - page++; - } + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -166,18 +155,13 @@ static void pv_init(void) swiotlb_update_mem_attributes(); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - set_max_mapnr(max_low_pfn); - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - pv_init(); - /* this will put all low memory onto the freelists */ - memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ } @@ -239,16 +223,13 @@ struct s390_cma_mem_data { static int s390_cma_check_range(struct cma *cma, void *data) { struct s390_cma_mem_data *mem_data; - unsigned long start, end; mem_data = data; - start = cma_get_base(cma); - end = start + cma_get_size(cma); - if (end < mem_data->start) - return 0; - if (start >= mem_data->end) - return 0; - return -EBUSY; + + if (cma_intersects(cma, mem_data->start, mem_data->end)) + return -EBUSY; + + return 0; } static int s390_cma_mem_notifier(struct notifier_block *nb, diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9d440a0b729e..0776dfde2dba 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2919,10 +2919,16 @@ bool bpf_jit_supports_arena(void) bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) { - /* - * Currently the verifier uses this function only to check which - * atomic stores to arena are supported, and they all are. - */ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + return false; + } return true; } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 054240c6798f..5bbdc4190b8b 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -255,7 +255,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { /* * When PCI MIO instructions are unavailable the "physical" address @@ -265,7 +265,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, if (!static_branch_unlikely(&have_mio)) return (void __iomem *)phys_addr; - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); diff --git a/arch/sh/boards/mach-landisk/setup.c b/arch/sh/boards/mach-landisk/setup.c index 2c44b94f82fb..1b3f43c3ac46 100644 --- a/arch/sh/boards/mach-landisk/setup.c +++ b/arch/sh/boards/mach-landisk/setup.c @@ -58,7 +58,7 @@ static int __init landisk_devices_setup(void) /* open I/O area window */ paddrbase = virt_to_phys((void *)PA_AREA5_IO); prot = PAGE_KERNEL_PCC(1, _PAGE_PCC_IO16); - cf_ide_base = ioremap_prot(paddrbase, PAGE_SIZE, pgprot_val(prot)); + cf_ide_base = ioremap_prot(paddrbase, PAGE_SIZE, prot); if (!cf_ide_base) { printk("allocate_cf_area : can't open CF I/O window!\n"); return -ENOMEM; diff --git a/arch/sh/boards/mach-lboxre2/setup.c b/arch/sh/boards/mach-lboxre2/setup.c index 20d01b430f2a..e95bde207adb 100644 --- a/arch/sh/boards/mach-lboxre2/setup.c +++ b/arch/sh/boards/mach-lboxre2/setup.c @@ -53,7 +53,7 @@ static int __init lboxre2_devices_setup(void) paddrbase = virt_to_phys((void*)PA_AREA5_IO); psize = PAGE_SIZE; prot = PAGE_KERNEL_PCC(1, _PAGE_PCC_IO16); - cf0_io_base = (u32)ioremap_prot(paddrbase, psize, pgprot_val(prot)); + cf0_io_base = (u32)ioremap_prot(paddrbase, psize, prot); if (!cf0_io_base) { printk(KERN_ERR "%s : can't open CF I/O window!\n" , __func__ ); return -ENOMEM; diff --git a/arch/sh/boards/mach-sh03/setup.c b/arch/sh/boards/mach-sh03/setup.c index 3901b6031ad5..5c9312f334d3 100644 --- a/arch/sh/boards/mach-sh03/setup.c +++ b/arch/sh/boards/mach-sh03/setup.c @@ -75,7 +75,7 @@ static int __init sh03_devices_setup(void) /* open I/O area window */ paddrbase = virt_to_phys((void *)PA_AREA5_IO); prot = PAGE_KERNEL_PCC(1, _PAGE_PCC_IO16); - cf_ide_base = ioremap_prot(paddrbase, PAGE_SIZE, pgprot_val(prot)); + cf_ide_base = ioremap_prot(paddrbase, PAGE_SIZE, prot); if (!cf_ide_base) { printk("allocate_cf_area : can't open CF I/O window!\n"); return -ENOMEM; diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 0f663ebec700..6d282b253815 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -279,7 +279,7 @@ unsigned long long poke_real_address_q(unsigned long long addr, #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_NOCACHE) #define ioremap_cache(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL)) + ioremap_prot((addr), (size), PAGE_KERNEL) #endif /* CONFIG_MMU */ #include <asm-generic/io.h> diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 96d938fdf224..6fe7123d38fa 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -32,10 +32,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ -} while (0) +#define __pte_free_tlb(tlb, pte, addr) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #endif /* __ASM_SH_PGALLOC_H */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 289a2fecebef..99e302eeeec1 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -290,7 +290,6 @@ void __init paging_init(void) */ max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; min_low_pfn = __MEMORY_START >> PAGE_SHIFT; - set_max_mapnr(max_low_pfn - min_low_pfn); nodes_clear(node_online_map); @@ -331,15 +330,6 @@ unsigned int mem_init_done = 0; void __init mem_init(void) { - pg_data_t *pgdat; - - high_memory = NULL; - for_each_online_pgdat(pgdat) - high_memory = max_t(void *, high_memory, - __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT)); - - memblock_free_all(); - /* Set this up early, so we can take care of the zero page */ cpu_cache_init(); diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index 33d20f34560f..5bbde53fb32d 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -73,10 +73,9 @@ __ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot) #endif /* CONFIG_29BIT */ void __iomem __ref *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t pgprot) { void __iomem *mapped; - pgprot_t pgprot = __pgprot(prot); mapped = __ioremap_trapped(phys_addr, size); if (mapped) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 2b7f358762c1..dc28f2c4eee3 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -936,7 +936,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned int nr) { - arch_enter_lazy_mmu_mode(); for (;;) { __set_pte_at(mm, addr, ptep, pte, 0); if (--nr == 0) @@ -945,7 +944,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, pte_val(pte) += PAGE_SIZE; addr += PAGE_SIZE; } - arch_leave_lazy_mmu_mode(); } #define set_ptes set_ptes diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index d96a14ffceeb..fdc93dd12c3e 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -232,19 +232,7 @@ static void __init taint_real_pages(void) } } -static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) -{ - unsigned long tmp; - -#ifdef CONFIG_DEBUG_HIGHMEM - printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn); -#endif - - for (tmp = start_pfn; tmp < end_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); -} - -void __init mem_init(void) +void __init arch_mm_preinit(void) { int i; @@ -274,23 +262,6 @@ void __init mem_init(void) memset(sparc_valid_addr_bitmap, 0, i << 2); taint_real_pages(); - - max_mapnr = last_valid_pfn - pfn_base; - high_memory = __va(max_low_pfn << PAGE_SHIFT); - memblock_free_all(); - - for (i = 0; sp_banks[i].num_bytes != 0; i++) { - unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; - unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT; - - if (end_pfn <= highstart_pfn) - continue; - - if (start_pfn < highstart_pfn) - start_pfn = highstart_pfn; - - map_high_region(start_pfn, end_pfn); - } } void sparc_flush_page_to_ram(struct page *page) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 05882bca5b73..760818950464 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2505,10 +2505,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - high_memory = __va(last_valid_pfn << PAGE_SHIFT); - - memblock_free_all(); - /* * Must be done after boot memory is put on freelist, because here we * might set fields in deferred struct pages that have not yet been diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 8648a50afe88..a35ddcca5e76 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -52,8 +52,10 @@ out: void arch_enter_lazy_mmu_mode(void) { - struct tlb_batch *tb = this_cpu_ptr(&tlb_batch); + struct tlb_batch *tb; + preempt_disable(); + tb = this_cpu_ptr(&tlb_batch); tb->active = 1; } @@ -64,6 +66,7 @@ void arch_leave_lazy_mmu_mode(void) if (tb->tlb_nr) flush_tlb_pending(); tb->active = 0; + preempt_enable(); } static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 18051b1cfce0..79509c7f39de 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -12,6 +12,7 @@ config UML select ARCH_HAS_KCOV select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER + select ARCH_HAS_STRICT_KERNEL_RWX select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index ede40a160c5e..9cb196070614 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -345,16 +345,20 @@ config UML_RTC by providing a fake RTC clock that causes a wakeup at the right time. -config UML_PCI_OVER_VIRTIO - bool "Enable PCI over VIRTIO device simulation" - # in theory, just VIRTIO is enough, but that causes recursion - depends on VIRTIO_UML +config UML_PCI + bool select FORCE_PCI select UML_IOMEM_EMULATION select UML_DMA_EMULATION select PCI_MSI select PCI_LOCKLESS_CONFIG +config UML_PCI_OVER_VIRTIO + bool "Enable PCI over VIRTIO device simulation" + # in theory, just VIRTIO is enough, but that causes recursion + depends on VIRTIO_UML + select UML_PCI + config UML_PCI_OVER_VIRTIO_DEVICE_ID int "set the virtio device ID for PCI emulation" default -1 diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 57882e6bc215..0a5820343ad3 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -60,7 +60,8 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o obj-$(CONFIG_UML_RTC) += rtc.o -obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o +obj-$(CONFIG_UML_PCI) += virt-pci.o +obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o # pcap_user.o must be added explicitly. USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index da985e0dc69a..ca08c91f47a3 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -79,7 +79,7 @@ static int __init rng_init (void) if (err < 0) goto err_out_cleanup_hw; - sigio_broken(random_fd); + sigio_broken(); hwrng.name = RNG_MODULE_NAME; hwrng.read = rng_dev_read; diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c index 7c3cec4c68cf..51e79f3148cd 100644 --- a/arch/um/drivers/rtc_user.c +++ b/arch/um/drivers/rtc_user.c @@ -39,7 +39,7 @@ int uml_rtc_start(bool timetravel) } /* apparently timerfd won't send SIGIO, use workaround */ - sigio_broken(uml_rtc_irq_fds[0]); + sigio_broken(); err = add_sigio_fd(uml_rtc_irq_fds[0]); if (err < 0) { close(uml_rtc_irq_fds[0]); diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h index f016fe15499f..2985c14661f4 100644 --- a/arch/um/drivers/ubd.h +++ b/arch/um/drivers/ubd.h @@ -7,8 +7,10 @@ #ifndef __UM_UBD_USER_H #define __UM_UBD_USER_H -extern int start_io_thread(unsigned long sp, int *fds_out); -extern int io_thread(void *arg); +#include <os.h> + +int start_io_thread(struct os_helper_thread **td_out, int *fd_out); +void *io_thread(void *arg); extern int kernel_fd; extern int ubd_read_poll(int timeout); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 0b1e61f72fb3..4de6613e7468 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -474,12 +474,12 @@ static irqreturn_t ubd_intr(int irq, void *dev) } /* Only changed by ubd_init, which is an initcall. */ -static int io_pid = -1; +static struct os_helper_thread *io_td; static void kill_io_thread(void) { - if(io_pid != -1) - os_kill_process(io_pid, 1); + if (io_td) + os_kill_helper_thread(io_td); } __uml_exitcall(kill_io_thread); @@ -1104,8 +1104,8 @@ static int __init ubd_init(void) late_initcall(ubd_init); -static int __init ubd_driver_init(void){ - unsigned long stack; +static int __init ubd_driver_init(void) +{ int err; /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ @@ -1114,13 +1114,11 @@ static int __init ubd_driver_init(void){ /* Letting ubd=sync be like using ubd#s= instead of ubd#= is * enough. So use anyway the io thread. */ } - stack = alloc_stack(0, 0); - io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd); - if(io_pid < 0){ + err = start_io_thread(&io_td, &thread_fd); + if (err < 0) { printk(KERN_ERR "ubd : Failed to start I/O thread (errno = %d) - " - "falling back to synchronous I/O\n", -io_pid); - io_pid = -1; + "falling back to synchronous I/O\n", -err); return 0; } err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, @@ -1496,12 +1494,11 @@ int kernel_fd = -1; /* Only changed by the io thread. XXX: currently unused. */ static int io_count; -int io_thread(void *arg) +void *io_thread(void *arg) { int n, count, written, res; - os_set_pdeathsig(); - os_fix_helper_signals(); + os_fix_helper_thread_signals(); while(1){ n = bulk_req_safe_read( @@ -1543,5 +1540,5 @@ int io_thread(void *arg) } while (written < n); } - return 0; + return NULL; } diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index b4f8b8e60564..c5e6545f6fcf 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -25,9 +25,9 @@ static struct pollfd kernel_pollfd; -int start_io_thread(unsigned long sp, int *fd_out) +int start_io_thread(struct os_helper_thread **td_out, int *fd_out) { - int pid, fds[2], err; + int fds[2], err; err = os_pipe(fds, 1, 1); if(err < 0){ @@ -47,14 +47,14 @@ int start_io_thread(unsigned long sp, int *fd_out) goto out_close; } - pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL); - if(pid < 0){ - err = -errno; - printk("start_io_thread - clone failed : errno = %d\n", errno); + err = os_run_helper_thread(td_out, io_thread, NULL); + if (err < 0) { + printk("%s - failed to run helper thread, err = %d\n", + __func__, -err); goto out_close; } - return(pid); + return 0; out_close: os_close_file(fds[0]); diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index dd5580f975cc..b83b5a765d4e 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -5,52 +5,19 @@ */ #include <linux/module.h> #include <linux/pci.h> -#include <linux/virtio.h> -#include <linux/virtio_config.h> #include <linux/logic_iomem.h> #include <linux/of_platform.h> #include <linux/irqdomain.h> -#include <linux/virtio_pcidev.h> -#include <linux/virtio-uml.h> -#include <linux/delay.h> #include <linux/msi.h> #include <linux/unaligned.h> #include <irq_kern.h> +#include "virt-pci.h" + #define MAX_DEVICES 8 #define MAX_MSI_VECTORS 32 #define CFG_SPACE_SIZE 4096 -/* for MSI-X we have a 32-bit payload */ -#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) -#define NUM_IRQ_MSGS 10 - -struct um_pci_message_buffer { - struct virtio_pcidev_msg hdr; - u8 data[8]; -}; - -struct um_pci_device { - struct virtio_device *vdev; - - /* for now just standard BARs */ - u8 resptr[PCI_STD_NUM_BARS]; - - struct virtqueue *cmd_vq, *irq_vq; - -#define UM_PCI_WRITE_BUFS 20 - struct um_pci_message_buffer bufs[UM_PCI_WRITE_BUFS + 1]; - void *extra_ptrs[UM_PCI_WRITE_BUFS + 1]; - DECLARE_BITMAP(used_bufs, UM_PCI_WRITE_BUFS); - -#define UM_PCI_STAT_WAITING 0 - unsigned long status; - - int irq; - - bool platform; -}; - struct um_pci_device_reg { struct um_pci_device *dev; void __iomem *iomem; @@ -65,179 +32,15 @@ static struct irq_domain *um_pci_inner_domain; static struct irq_domain *um_pci_msi_domain; static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; -static unsigned int um_pci_max_delay_us = 40000; -module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644); - -static int um_pci_get_buf(struct um_pci_device *dev, bool *posted) -{ - int i; - - for (i = 0; i < UM_PCI_WRITE_BUFS; i++) { - if (!test_and_set_bit(i, dev->used_bufs)) - return i; - } - - *posted = false; - return UM_PCI_WRITE_BUFS; -} - -static void um_pci_free_buf(struct um_pci_device *dev, void *buf) -{ - int i; - - if (buf == &dev->bufs[UM_PCI_WRITE_BUFS]) { - kfree(dev->extra_ptrs[UM_PCI_WRITE_BUFS]); - dev->extra_ptrs[UM_PCI_WRITE_BUFS] = NULL; - return; - } - - for (i = 0; i < UM_PCI_WRITE_BUFS; i++) { - if (buf == &dev->bufs[i]) { - kfree(dev->extra_ptrs[i]); - dev->extra_ptrs[i] = NULL; - WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); - return; - } - } - - WARN_ON(1); -} - -static int um_pci_send_cmd(struct um_pci_device *dev, - struct virtio_pcidev_msg *cmd, - unsigned int cmd_size, - const void *extra, unsigned int extra_size, - void *out, unsigned int out_size) -{ - struct scatterlist out_sg, extra_sg, in_sg; - struct scatterlist *sgs_list[] = { - [0] = &out_sg, - [1] = extra ? &extra_sg : &in_sg, - [2] = extra ? &in_sg : NULL, - }; - struct um_pci_message_buffer *buf; - int delay_count = 0; - bool bounce_out; - int ret, len; - int buf_idx; - bool posted; - - if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) - return -EINVAL; - - switch (cmd->op) { - case VIRTIO_PCIDEV_OP_CFG_WRITE: - case VIRTIO_PCIDEV_OP_MMIO_WRITE: - case VIRTIO_PCIDEV_OP_MMIO_MEMSET: - /* in PCI, writes are posted, so don't wait */ - posted = !out; - WARN_ON(!posted); - break; - default: - posted = false; - break; - } - - bounce_out = !posted && cmd_size <= sizeof(*cmd) && - out && out_size <= sizeof(buf->data); - - buf_idx = um_pci_get_buf(dev, &posted); - buf = &dev->bufs[buf_idx]; - memcpy(buf, cmd, cmd_size); - - if (posted && extra && extra_size > sizeof(buf) - cmd_size) { - dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, - GFP_ATOMIC); - - if (!dev->extra_ptrs[buf_idx]) { - um_pci_free_buf(dev, buf); - return -ENOMEM; - } - extra = dev->extra_ptrs[buf_idx]; - } else if (extra && extra_size <= sizeof(buf) - cmd_size) { - memcpy((u8 *)buf + cmd_size, extra, extra_size); - cmd_size += extra_size; - extra_size = 0; - extra = NULL; - cmd = (void *)buf; - } else { - cmd = (void *)buf; - } - - sg_init_one(&out_sg, cmd, cmd_size); - if (extra) - sg_init_one(&extra_sg, extra, extra_size); - /* allow stack for small buffers */ - if (bounce_out) - sg_init_one(&in_sg, buf->data, out_size); - else if (out) - sg_init_one(&in_sg, out, out_size); - - /* add to internal virtio queue */ - ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, - extra ? 2 : 1, - out ? 1 : 0, - cmd, GFP_ATOMIC); - if (ret) { - um_pci_free_buf(dev, buf); - return ret; - } - - if (posted) { - virtqueue_kick(dev->cmd_vq); - return 0; - } - - /* kick and poll for getting a response on the queue */ - set_bit(UM_PCI_STAT_WAITING, &dev->status); - virtqueue_kick(dev->cmd_vq); - ret = 0; - - while (1) { - void *completed = virtqueue_get_buf(dev->cmd_vq, &len); - - if (completed == buf) - break; - - if (completed) - um_pci_free_buf(dev, completed); - - if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || - ++delay_count > um_pci_max_delay_us, - "um virt-pci delay: %d", delay_count)) { - ret = -EIO; - break; - } - udelay(1); - } - clear_bit(UM_PCI_STAT_WAITING, &dev->status); - - if (bounce_out) - memcpy(out, buf->data, out_size); - - um_pci_free_buf(dev, buf); - - return ret; -} - static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, int size) { struct um_pci_device_reg *reg = priv; struct um_pci_device *dev = reg->dev; - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_READ, - .size = size, - .addr = offset, - }; - /* max 8, we might not use it all */ - u8 data[8]; if (!dev) return ULONG_MAX; - memset(data, 0xff, sizeof(data)); - switch (size) { case 1: case 2: @@ -251,23 +54,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, return ULONG_MAX; } - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) - return ULONG_MAX; - - switch (size) { - case 1: - return data[0]; - case 2: - return le16_to_cpup((void *)data); - case 4: - return le32_to_cpup((void *)data); -#ifdef CONFIG_64BIT - case 8: - return le64_to_cpup((void *)data); -#endif - default: - return ULONG_MAX; - } + return dev->ops->cfgspace_read(dev, offset, size); } static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, @@ -275,42 +62,24 @@ static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, { struct um_pci_device_reg *reg = priv; struct um_pci_device *dev = reg->dev; - struct { - struct virtio_pcidev_msg hdr; - /* maximum size - we may only use parts of it */ - u8 data[8]; - } msg = { - .hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_WRITE, - .size = size, - .addr = offset, - }, - }; if (!dev) return; switch (size) { case 1: - msg.data[0] = (u8)val; - break; case 2: - put_unaligned_le16(val, (void *)msg.data); - break; case 4: - put_unaligned_le32(val, (void *)msg.data); - break; #ifdef CONFIG_64BIT case 8: - put_unaligned_le64(val, (void *)msg.data); - break; #endif + break; default: WARN(1, "invalid config space write size %d\n", size); return; } - WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); + dev->ops->cfgspace_write(dev, offset, size, val); } static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { @@ -318,30 +87,14 @@ static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { .write = um_pci_cfgspace_write, }; -static void um_pci_bar_copy_from(void *priv, void *buffer, - unsigned int offset, int size) +static unsigned long um_pci_bar_read(void *priv, unsigned int offset, + int size) { u8 *resptr = priv; struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_MMIO_READ, - .bar = *resptr, - .size = size, - .addr = offset, - }; - - memset(buffer, 0xff, size); - - um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); -} - -static unsigned long um_pci_bar_read(void *priv, unsigned int offset, - int size) -{ - /* 8 is maximum size - we may only use parts of it */ - u8 data[8]; + u8 bar = *resptr; switch (size) { case 1: @@ -352,72 +105,60 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset, #endif break; default: - WARN(1, "invalid config space read size %d\n", size); + WARN(1, "invalid bar read size %d\n", size); return ULONG_MAX; } - um_pci_bar_copy_from(priv, data, offset, size); + return dev->ops->bar_read(dev, bar, offset, size); +} + +static void um_pci_bar_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; switch (size) { case 1: - return data[0]; case 2: - return le16_to_cpup((void *)data); case 4: - return le32_to_cpup((void *)data); #ifdef CONFIG_64BIT case 8: - return le64_to_cpup((void *)data); #endif + break; default: - return ULONG_MAX; + WARN(1, "invalid bar write size %d\n", size); + return; } + + dev->ops->bar_write(dev, bar, offset, size, val); } -static void um_pci_bar_copy_to(void *priv, unsigned int offset, - const void *buffer, int size) +static void um_pci_bar_copy_from(void *priv, void *buffer, + unsigned int offset, int size) { u8 *resptr = priv; struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct virtio_pcidev_msg hdr = { - .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, - .bar = *resptr, - .size = size, - .addr = offset, - }; + u8 bar = *resptr; - um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); + dev->ops->bar_copy_from(dev, bar, buffer, offset, size); } -static void um_pci_bar_write(void *priv, unsigned int offset, int size, - unsigned long val) +static void um_pci_bar_copy_to(void *priv, unsigned int offset, + const void *buffer, int size) { - /* maximum size - we may only use parts of it */ - u8 data[8]; - - switch (size) { - case 1: - data[0] = (u8)val; - break; - case 2: - put_unaligned_le16(val, (void *)data); - break; - case 4: - put_unaligned_le32(val, (void *)data); - break; -#ifdef CONFIG_64BIT - case 8: - put_unaligned_le64(val, (void *)data); - break; -#endif - default: - WARN(1, "invalid config space write size %d\n", size); - return; - } + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; - um_pci_bar_copy_to(priv, offset, data, size); + dev->ops->bar_copy_to(dev, bar, offset, buffer, size); } static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) @@ -426,20 +167,9 @@ static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) struct um_pci_device *dev = container_of(resptr - *resptr, struct um_pci_device, resptr[0]); - struct { - struct virtio_pcidev_msg hdr; - u8 data; - } msg = { - .hdr = { - .op = VIRTIO_PCIDEV_OP_CFG_WRITE, - .bar = *resptr, - .size = size, - .addr = offset, - }, - .data = value, - }; + u8 bar = *resptr; - um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); + dev->ops->bar_set(dev, bar, offset, value, size); } static const struct logic_iomem_ops um_pci_device_bar_ops = { @@ -486,76 +216,6 @@ static void um_pci_rescan(void) pci_unlock_rescan_remove(); } -static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) -{ - struct scatterlist sg[1]; - - sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); - if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) - kfree(buf); - else if (kick) - virtqueue_kick(vq); -} - -static void um_pci_handle_irq_message(struct virtqueue *vq, - struct virtio_pcidev_msg *msg) -{ - struct virtio_device *vdev = vq->vdev; - struct um_pci_device *dev = vdev->priv; - - if (!dev->irq) - return; - - /* we should properly chain interrupts, but on ARCH=um we don't care */ - - switch (msg->op) { - case VIRTIO_PCIDEV_OP_INT: - generic_handle_irq(dev->irq); - break; - case VIRTIO_PCIDEV_OP_MSI: - /* our MSI message is just the interrupt number */ - if (msg->size == sizeof(u32)) - generic_handle_irq(le32_to_cpup((void *)msg->data)); - else - generic_handle_irq(le16_to_cpup((void *)msg->data)); - break; - case VIRTIO_PCIDEV_OP_PME: - /* nothing to do - we already woke up due to the message */ - break; - default: - dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); - break; - } -} - -static void um_pci_cmd_vq_cb(struct virtqueue *vq) -{ - struct virtio_device *vdev = vq->vdev; - struct um_pci_device *dev = vdev->priv; - void *cmd; - int len; - - if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) - return; - - while ((cmd = virtqueue_get_buf(vq, &len))) - um_pci_free_buf(dev, cmd); -} - -static void um_pci_irq_vq_cb(struct virtqueue *vq) -{ - struct virtio_pcidev_msg *msg; - int len; - - while ((msg = virtqueue_get_buf(vq, &len))) { - if (len >= sizeof(*msg)) - um_pci_handle_irq_message(vq, msg); - - /* recycle the message buffer */ - um_pci_irq_vq_addbuf(vq, msg, true); - } -} - #ifdef CONFIG_OF /* Copied from arch/x86/kernel/devicetree.c */ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) @@ -577,200 +237,6 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) } #endif -static int um_pci_init_vqs(struct um_pci_device *dev) -{ - struct virtqueue_info vqs_info[] = { - { "cmd", um_pci_cmd_vq_cb }, - { "irq", um_pci_irq_vq_cb }, - }; - struct virtqueue *vqs[2]; - int err, i; - - err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL); - if (err) - return err; - - dev->cmd_vq = vqs[0]; - dev->irq_vq = vqs[1]; - - virtio_device_ready(dev->vdev); - - for (i = 0; i < NUM_IRQ_MSGS; i++) { - void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); - - if (msg) - um_pci_irq_vq_addbuf(dev->irq_vq, msg, false); - } - - virtqueue_kick(dev->irq_vq); - - return 0; -} - -static void __um_pci_virtio_platform_remove(struct virtio_device *vdev, - struct um_pci_device *dev) -{ - virtio_reset_device(vdev); - vdev->config->del_vqs(vdev); - - mutex_lock(&um_pci_mtx); - um_pci_platform_device = NULL; - mutex_unlock(&um_pci_mtx); - - kfree(dev); -} - -static int um_pci_virtio_platform_probe(struct virtio_device *vdev, - struct um_pci_device *dev) -{ - int ret; - - dev->platform = true; - - mutex_lock(&um_pci_mtx); - - if (um_pci_platform_device) { - mutex_unlock(&um_pci_mtx); - ret = -EBUSY; - goto out_free; - } - - ret = um_pci_init_vqs(dev); - if (ret) { - mutex_unlock(&um_pci_mtx); - goto out_free; - } - - um_pci_platform_device = dev; - - mutex_unlock(&um_pci_mtx); - - ret = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); - if (ret) - __um_pci_virtio_platform_remove(vdev, dev); - - return ret; - -out_free: - kfree(dev); - return ret; -} - -static int um_pci_virtio_probe(struct virtio_device *vdev) -{ - struct um_pci_device *dev; - int i, free = -1; - int err = -ENOSPC; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return -ENOMEM; - - dev->vdev = vdev; - vdev->priv = dev; - - if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) - return um_pci_virtio_platform_probe(vdev, dev); - - mutex_lock(&um_pci_mtx); - for (i = 0; i < MAX_DEVICES; i++) { - if (um_pci_devices[i].dev) - continue; - free = i; - break; - } - - if (free < 0) - goto error; - - err = um_pci_init_vqs(dev); - if (err) - goto error; - - dev->irq = irq_alloc_desc(numa_node_id()); - if (dev->irq < 0) { - err = dev->irq; - goto err_reset; - } - um_pci_devices[free].dev = dev; - vdev->priv = dev; - - mutex_unlock(&um_pci_mtx); - - device_set_wakeup_enable(&vdev->dev, true); - - /* - * In order to do suspend-resume properly, don't allow VQs - * to be suspended. - */ - virtio_uml_set_no_vq_suspend(vdev, true); - - um_pci_rescan(); - return 0; -err_reset: - virtio_reset_device(vdev); - vdev->config->del_vqs(vdev); -error: - mutex_unlock(&um_pci_mtx); - kfree(dev); - return err; -} - -static void um_pci_virtio_remove(struct virtio_device *vdev) -{ - struct um_pci_device *dev = vdev->priv; - int i; - - if (dev->platform) { - of_platform_depopulate(&vdev->dev); - __um_pci_virtio_platform_remove(vdev, dev); - return; - } - - device_set_wakeup_enable(&vdev->dev, false); - - mutex_lock(&um_pci_mtx); - for (i = 0; i < MAX_DEVICES; i++) { - if (um_pci_devices[i].dev != dev) - continue; - - um_pci_devices[i].dev = NULL; - irq_free_desc(dev->irq); - - break; - } - mutex_unlock(&um_pci_mtx); - - if (i < MAX_DEVICES) { - struct pci_dev *pci_dev; - - pci_dev = pci_get_slot(bridge->bus, i); - if (pci_dev) - pci_stop_and_remove_bus_device_locked(pci_dev); - } - - /* Stop all virtqueues */ - virtio_reset_device(vdev); - dev->cmd_vq = NULL; - dev->irq_vq = NULL; - vdev->config->del_vqs(vdev); - - kfree(dev); -} - -static struct virtio_device_id id_table[] = { - { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, - { 0 }, -}; -MODULE_DEVICE_TABLE(virtio, id_table); - -static struct virtio_driver um_pci_virtio_driver = { - .driver.name = "virtio-pci", - .id_table = id_table, - .probe = um_pci_virtio_probe, - .remove = um_pci_virtio_remove, -}; - static struct resource virt_cfgspace_resource = { .name = "PCI config space", .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE, @@ -889,7 +355,7 @@ static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) } static struct irq_chip um_pci_msi_bottom_irq_chip = { - .name = "UM virtio MSI", + .name = "UM virtual MSI", .irq_compose_msi_msg = um_pci_compose_msi_msg, }; @@ -939,7 +405,7 @@ static const struct irq_domain_ops um_pci_inner_domain_ops = { }; static struct irq_chip um_pci_msi_irq_chip = { - .name = "UM virtio PCIe MSI", + .name = "UM virtual PCIe MSI", .irq_mask = pci_msi_mask_irq, .irq_unmask = pci_msi_unmask_irq, }; @@ -998,6 +464,78 @@ static struct resource virt_platform_resource = { .flags = IORESOURCE_MEM, }; +int um_pci_device_register(struct um_pci_device *dev) +{ + int i, free = -1; + int err = 0; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev) + continue; + free = i; + break; + } + + if (free < 0) { + err = -ENOSPC; + goto out; + } + + dev->irq = irq_alloc_desc(numa_node_id()); + if (dev->irq < 0) { + err = dev->irq; + goto out; + } + + um_pci_devices[free].dev = dev; + +out: + mutex_unlock(&um_pci_mtx); + if (!err) + um_pci_rescan(); + return err; +} + +void um_pci_device_unregister(struct um_pci_device *dev) +{ + int i; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev != dev) + continue; + um_pci_devices[i].dev = NULL; + irq_free_desc(dev->irq); + break; + } + mutex_unlock(&um_pci_mtx); + + if (i < MAX_DEVICES) { + struct pci_dev *pci_dev; + + pci_dev = pci_get_slot(bridge->bus, i); + if (pci_dev) + pci_stop_and_remove_bus_device_locked(pci_dev); + } +} + +int um_pci_platform_device_register(struct um_pci_device *dev) +{ + guard(mutex)(&um_pci_mtx); + if (um_pci_platform_device) + return -EBUSY; + um_pci_platform_device = dev; + return 0; +} + +void um_pci_platform_device_unregister(struct um_pci_device *dev) +{ + guard(mutex)(&um_pci_mtx); + if (um_pci_platform_device == dev) + um_pci_platform_device = NULL; +} + static int __init um_pci_init(void) { struct irq_domain_info inner_domain_info = { @@ -1014,10 +552,6 @@ static int __init um_pci_init(void) WARN_ON(logic_iomem_add_region(&virt_platform_resource, &um_pci_platform_ops)); - if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, - "No virtio device ID configured for PCI - no PCI support\n")) - return 0; - bridge = pci_alloc_host_bridge(0); if (!bridge) { err = -ENOMEM; @@ -1065,10 +599,8 @@ static int __init um_pci_init(void) if (err) goto free; - err = register_virtio_driver(&um_pci_virtio_driver); - if (err) - goto free; return 0; + free: if (!IS_ERR_OR_NULL(um_pci_inner_domain)) irq_domain_remove(um_pci_inner_domain); @@ -1080,11 +612,10 @@ free: } return err; } -module_init(um_pci_init); +device_initcall(um_pci_init); static void __exit um_pci_exit(void) { - unregister_virtio_driver(&um_pci_virtio_driver); irq_domain_remove(um_pci_msi_domain); irq_domain_remove(um_pci_inner_domain); pci_free_resource_list(&bridge->windows); diff --git a/arch/um/drivers/virt-pci.h b/arch/um/drivers/virt-pci.h new file mode 100644 index 000000000000..b20d1475d1eb --- /dev/null +++ b/arch/um/drivers/virt-pci.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_VIRT_PCI_H +#define __UM_VIRT_PCI_H + +#include <linux/pci.h> + +struct um_pci_device { + const struct um_pci_ops *ops; + + /* for now just standard BARs */ + u8 resptr[PCI_STD_NUM_BARS]; + + int irq; +}; + +struct um_pci_ops { + unsigned long (*cfgspace_read)(struct um_pci_device *dev, + unsigned int offset, int size); + void (*cfgspace_write)(struct um_pci_device *dev, unsigned int offset, + int size, unsigned long val); + + unsigned long (*bar_read)(struct um_pci_device *dev, int bar, + unsigned int offset, int size); + void (*bar_write)(struct um_pci_device *dev, int bar, + unsigned int offset, int size, unsigned long val); + + void (*bar_copy_from)(struct um_pci_device *dev, int bar, void *buffer, + unsigned int offset, int size); + void (*bar_copy_to)(struct um_pci_device *dev, int bar, + unsigned int offset, const void *buffer, int size); + void (*bar_set)(struct um_pci_device *dev, int bar, + unsigned int offset, u8 value, int size); +}; + +int um_pci_device_register(struct um_pci_device *dev); +void um_pci_device_unregister(struct um_pci_device *dev); + +int um_pci_platform_device_register(struct um_pci_device *dev); +void um_pci_platform_device_unregister(struct um_pci_device *dev); + +#endif /* __UM_VIRT_PCI_H */ diff --git a/arch/um/drivers/virtio_pcidev.c b/arch/um/drivers/virtio_pcidev.c new file mode 100644 index 000000000000..3c4c4c928fdd --- /dev/null +++ b/arch/um/drivers/virtio_pcidev.c @@ -0,0 +1,628 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/logic_iomem.h> +#include <linux/of_platform.h> +#include <linux/irqdomain.h> +#include <linux/virtio_pcidev.h> +#include <linux/virtio-uml.h> +#include <linux/delay.h> +#include <linux/msi.h> +#include <linux/unaligned.h> +#include <irq_kern.h> + +#include "virt-pci.h" + +#define to_virtio_pcidev(_pdev) \ + container_of(_pdev, struct virtio_pcidev_device, pdev) + +/* for MSI-X we have a 32-bit payload */ +#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) +#define NUM_IRQ_MSGS 10 + +struct virtio_pcidev_message_buffer { + struct virtio_pcidev_msg hdr; + u8 data[8]; +}; + +struct virtio_pcidev_device { + struct um_pci_device pdev; + struct virtio_device *vdev; + + struct virtqueue *cmd_vq, *irq_vq; + +#define VIRTIO_PCIDEV_WRITE_BUFS 20 + struct virtio_pcidev_message_buffer bufs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; + void *extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; + DECLARE_BITMAP(used_bufs, VIRTIO_PCIDEV_WRITE_BUFS); + +#define UM_PCI_STAT_WAITING 0 + unsigned long status; + + bool platform; +}; + +static unsigned int virtio_pcidev_max_delay_us = 40000; +module_param_named(max_delay_us, virtio_pcidev_max_delay_us, uint, 0644); + +static int virtio_pcidev_get_buf(struct virtio_pcidev_device *dev, bool *posted) +{ + int i; + + for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { + if (!test_and_set_bit(i, dev->used_bufs)) + return i; + } + + *posted = false; + return VIRTIO_PCIDEV_WRITE_BUFS; +} + +static void virtio_pcidev_free_buf(struct virtio_pcidev_device *dev, void *buf) +{ + int i; + + if (buf == &dev->bufs[VIRTIO_PCIDEV_WRITE_BUFS]) { + kfree(dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS]); + dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS] = NULL; + return; + } + + for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { + if (buf == &dev->bufs[i]) { + kfree(dev->extra_ptrs[i]); + dev->extra_ptrs[i] = NULL; + WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); + return; + } + } + + WARN_ON(1); +} + +static int virtio_pcidev_send_cmd(struct virtio_pcidev_device *dev, + struct virtio_pcidev_msg *cmd, + unsigned int cmd_size, + const void *extra, unsigned int extra_size, + void *out, unsigned int out_size) +{ + struct scatterlist out_sg, extra_sg, in_sg; + struct scatterlist *sgs_list[] = { + [0] = &out_sg, + [1] = extra ? &extra_sg : &in_sg, + [2] = extra ? &in_sg : NULL, + }; + struct virtio_pcidev_message_buffer *buf; + int delay_count = 0; + bool bounce_out; + int ret, len; + int buf_idx; + bool posted; + + if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) + return -EINVAL; + + switch (cmd->op) { + case VIRTIO_PCIDEV_OP_CFG_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_MEMSET: + /* in PCI, writes are posted, so don't wait */ + posted = !out; + WARN_ON(!posted); + break; + default: + posted = false; + break; + } + + bounce_out = !posted && cmd_size <= sizeof(*cmd) && + out && out_size <= sizeof(buf->data); + + buf_idx = virtio_pcidev_get_buf(dev, &posted); + buf = &dev->bufs[buf_idx]; + memcpy(buf, cmd, cmd_size); + + if (posted && extra && extra_size > sizeof(buf) - cmd_size) { + dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, + GFP_ATOMIC); + + if (!dev->extra_ptrs[buf_idx]) { + virtio_pcidev_free_buf(dev, buf); + return -ENOMEM; + } + extra = dev->extra_ptrs[buf_idx]; + } else if (extra && extra_size <= sizeof(buf) - cmd_size) { + memcpy((u8 *)buf + cmd_size, extra, extra_size); + cmd_size += extra_size; + extra_size = 0; + extra = NULL; + cmd = (void *)buf; + } else { + cmd = (void *)buf; + } + + sg_init_one(&out_sg, cmd, cmd_size); + if (extra) + sg_init_one(&extra_sg, extra, extra_size); + /* allow stack for small buffers */ + if (bounce_out) + sg_init_one(&in_sg, buf->data, out_size); + else if (out) + sg_init_one(&in_sg, out, out_size); + + /* add to internal virtio queue */ + ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, + extra ? 2 : 1, + out ? 1 : 0, + cmd, GFP_ATOMIC); + if (ret) { + virtio_pcidev_free_buf(dev, buf); + return ret; + } + + if (posted) { + virtqueue_kick(dev->cmd_vq); + return 0; + } + + /* kick and poll for getting a response on the queue */ + set_bit(UM_PCI_STAT_WAITING, &dev->status); + virtqueue_kick(dev->cmd_vq); + ret = 0; + + while (1) { + void *completed = virtqueue_get_buf(dev->cmd_vq, &len); + + if (completed == buf) + break; + + if (completed) + virtio_pcidev_free_buf(dev, completed); + + if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || + ++delay_count > virtio_pcidev_max_delay_us, + "um virt-pci delay: %d", delay_count)) { + ret = -EIO; + break; + } + udelay(1); + } + clear_bit(UM_PCI_STAT_WAITING, &dev->status); + + if (bounce_out) + memcpy(out, buf->data, out_size); + + virtio_pcidev_free_buf(dev, buf); + + return ret; +} + +static unsigned long virtio_pcidev_cfgspace_read(struct um_pci_device *pdev, + unsigned int offset, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_READ, + .size = size, + .addr = offset, + }; + /* max 8, we might not use it all */ + u8 data[8]; + + memset(data, 0xff, sizeof(data)); + + /* size has been checked in um_pci_cfgspace_read() */ + if (virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) + return ULONG_MAX; + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void virtio_pcidev_cfgspace_write(struct um_pci_device *pdev, + unsigned int offset, int size, + unsigned long val) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct { + struct virtio_pcidev_msg hdr; + /* maximum size - we may only use parts of it */ + u8 data[8]; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .size = size, + .addr = offset, + }, + }; + + /* size has been checked in um_pci_cfgspace_write() */ + switch (size) { + case 1: + msg.data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)msg.data); + break; + case 4: + put_unaligned_le32(val, (void *)msg.data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)msg.data); + break; +#endif + } + + WARN_ON(virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); +} + +static void virtio_pcidev_bar_copy_from(struct um_pci_device *pdev, + int bar, void *buffer, + unsigned int offset, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_READ, + .bar = bar, + .size = size, + .addr = offset, + }; + + memset(buffer, 0xff, size); + + virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); +} + +static unsigned long virtio_pcidev_bar_read(struct um_pci_device *pdev, int bar, + unsigned int offset, int size) +{ + /* 8 is maximum size - we may only use parts of it */ + u8 data[8]; + + /* size has been checked in um_pci_bar_read() */ + virtio_pcidev_bar_copy_from(pdev, bar, data, offset, size); + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void virtio_pcidev_bar_copy_to(struct um_pci_device *pdev, + int bar, unsigned int offset, + const void *buffer, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, + .bar = bar, + .size = size, + .addr = offset, + }; + + virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); +} + +static void virtio_pcidev_bar_write(struct um_pci_device *pdev, int bar, + unsigned int offset, int size, + unsigned long val) +{ + /* maximum size - we may only use parts of it */ + u8 data[8]; + + /* size has been checked in um_pci_bar_write() */ + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + } + + virtio_pcidev_bar_copy_to(pdev, bar, offset, data, size); +} + +static void virtio_pcidev_bar_set(struct um_pci_device *pdev, int bar, + unsigned int offset, u8 value, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct { + struct virtio_pcidev_msg hdr; + u8 data; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .bar = bar, + .size = size, + .addr = offset, + }, + .data = value, + }; + + virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); +} + +static const struct um_pci_ops virtio_pcidev_um_pci_ops = { + .cfgspace_read = virtio_pcidev_cfgspace_read, + .cfgspace_write = virtio_pcidev_cfgspace_write, + .bar_read = virtio_pcidev_bar_read, + .bar_write = virtio_pcidev_bar_write, + .bar_copy_from = virtio_pcidev_bar_copy_from, + .bar_copy_to = virtio_pcidev_bar_copy_to, + .bar_set = virtio_pcidev_bar_set, +}; + +static void virtio_pcidev_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) +{ + struct scatterlist sg[1]; + + sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); + if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) + kfree(buf); + else if (kick) + virtqueue_kick(vq); +} + +static void virtio_pcidev_handle_irq_message(struct virtqueue *vq, + struct virtio_pcidev_msg *msg) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pcidev_device *dev = vdev->priv; + + if (!dev->pdev.irq) + return; + + /* we should properly chain interrupts, but on ARCH=um we don't care */ + + switch (msg->op) { + case VIRTIO_PCIDEV_OP_INT: + generic_handle_irq(dev->pdev.irq); + break; + case VIRTIO_PCIDEV_OP_MSI: + /* our MSI message is just the interrupt number */ + if (msg->size == sizeof(u32)) + generic_handle_irq(le32_to_cpup((void *)msg->data)); + else + generic_handle_irq(le16_to_cpup((void *)msg->data)); + break; + case VIRTIO_PCIDEV_OP_PME: + /* nothing to do - we already woke up due to the message */ + break; + default: + dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); + break; + } +} + +static void virtio_pcidev_cmd_vq_cb(struct virtqueue *vq) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pcidev_device *dev = vdev->priv; + void *cmd; + int len; + + if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) + return; + + while ((cmd = virtqueue_get_buf(vq, &len))) + virtio_pcidev_free_buf(dev, cmd); +} + +static void virtio_pcidev_irq_vq_cb(struct virtqueue *vq) +{ + struct virtio_pcidev_msg *msg; + int len; + + while ((msg = virtqueue_get_buf(vq, &len))) { + if (len >= sizeof(*msg)) + virtio_pcidev_handle_irq_message(vq, msg); + + /* recycle the message buffer */ + virtio_pcidev_irq_vq_addbuf(vq, msg, true); + } +} + +static int virtio_pcidev_init_vqs(struct virtio_pcidev_device *dev) +{ + struct virtqueue_info vqs_info[] = { + { "cmd", virtio_pcidev_cmd_vq_cb }, + { "irq", virtio_pcidev_irq_vq_cb }, + }; + struct virtqueue *vqs[2]; + int err, i; + + err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL); + if (err) + return err; + + dev->cmd_vq = vqs[0]; + dev->irq_vq = vqs[1]; + + virtio_device_ready(dev->vdev); + + for (i = 0; i < NUM_IRQ_MSGS; i++) { + void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); + + if (msg) + virtio_pcidev_irq_vq_addbuf(dev->irq_vq, msg, false); + } + + virtqueue_kick(dev->irq_vq); + + return 0; +} + +static void __virtio_pcidev_virtio_platform_remove(struct virtio_device *vdev, + struct virtio_pcidev_device *dev) +{ + um_pci_platform_device_unregister(&dev->pdev); + + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static int virtio_pcidev_virtio_platform_probe(struct virtio_device *vdev, + struct virtio_pcidev_device *dev) +{ + int err; + + dev->platform = true; + + err = virtio_pcidev_init_vqs(dev); + if (err) + goto err_free; + + err = um_pci_platform_device_register(&dev->pdev); + if (err) + goto err_reset; + + err = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); + if (err) + goto err_unregister; + + return 0; + +err_unregister: + um_pci_platform_device_unregister(&dev->pdev); +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +err_free: + kfree(dev); + return err; +} + +static int virtio_pcidev_virtio_probe(struct virtio_device *vdev) +{ + struct virtio_pcidev_device *dev; + int err; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->vdev = vdev; + vdev->priv = dev; + + dev->pdev.ops = &virtio_pcidev_um_pci_ops; + + if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) + return virtio_pcidev_virtio_platform_probe(vdev, dev); + + err = virtio_pcidev_init_vqs(dev); + if (err) + goto err_free; + + err = um_pci_device_register(&dev->pdev); + if (err) + goto err_reset; + + device_set_wakeup_enable(&vdev->dev, true); + + /* + * In order to do suspend-resume properly, don't allow VQs + * to be suspended. + */ + virtio_uml_set_no_vq_suspend(vdev, true); + + return 0; + +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +err_free: + kfree(dev); + return err; +} + +static void virtio_pcidev_virtio_remove(struct virtio_device *vdev) +{ + struct virtio_pcidev_device *dev = vdev->priv; + + if (dev->platform) { + of_platform_depopulate(&vdev->dev); + __virtio_pcidev_virtio_platform_remove(vdev, dev); + return; + } + + device_set_wakeup_enable(&vdev->dev, false); + + um_pci_device_unregister(&dev->pdev); + + /* Stop all virtqueues */ + virtio_reset_device(vdev); + dev->cmd_vq = NULL; + dev->irq_vq = NULL; + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static struct virtio_device_id id_table[] = { + { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; +MODULE_DEVICE_TABLE(virtio, id_table); + +static struct virtio_driver virtio_pcidev_virtio_driver = { + .driver.name = "virtio-pci", + .id_table = id_table, + .probe = virtio_pcidev_virtio_probe, + .remove = virtio_pcidev_virtio_remove, +}; + +static int __init virtio_pcidev_init(void) +{ + if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, + "No virtio device ID configured for PCI - no PCI support\n")) + return 0; + + return register_virtio_driver(&virtio_pcidev_virtio_driver); +} +late_initcall(virtio_pcidev_init); + +static void __exit virtio_pcidev_exit(void) +{ + unregister_virtio_driver(&virtio_pcidev_virtio_driver); +} +module_exit(virtio_pcidev_exit); diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 428f2c5158c2..04ab3b653a48 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -13,6 +13,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h generic-y += mmiowb.h +generic-y += module.h generic-y += module.lds.h generic-y += param.h generic-y += parport.h diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index f0af23c3aeb2..826ec44b58cd 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -25,27 +25,18 @@ */ extern pgd_t *pgd_alloc(struct mm_struct *); -#define __pte_free_tlb(tlb, pte, address) \ -do { \ - pagetable_dtor(page_ptdesc(pte)); \ - tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ -} while (0) +#define __pte_free_tlb(tlb, pte, address) \ + tlb_remove_ptdesc((tlb), page_ptdesc(pte)) #if CONFIG_PGTABLE_LEVELS > 2 -#define __pmd_free_tlb(tlb, pmd, address) \ -do { \ - pagetable_dtor(virt_to_ptdesc(pmd)); \ - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ -} while (0) +#define __pmd_free_tlb(tlb, pmd, address) \ + tlb_remove_ptdesc((tlb), virt_to_ptdesc(pmd)) #if CONFIG_PGTABLE_LEVELS > 3 -#define __pud_free_tlb(tlb, pud, address) \ -do { \ - pagetable_dtor(virt_to_ptdesc(pud)); \ - tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ -} while (0) +#define __pud_free_tlb(tlb, pud, address) \ + tlb_remove_ptdesc((tlb), virt_to_ptdesc(pud)) #endif #endif diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 5d6356eafffe..8a789c17acd8 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -31,6 +31,8 @@ struct thread_struct { } thread; } request; + void *segv_continue; + /* Contains variable sized FP registers */ struct pt_regs regs; }; diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 1d4b6bbc1b65..3a08f9029a3f 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -9,6 +9,7 @@ #include <asm/elf.h> #include <linux/unaligned.h> +#include <sysdep/faultinfo.h> #define __under_task_size(addr, size) \ (((unsigned long) (addr) < TASK_SIZE) && \ @@ -44,19 +45,28 @@ static inline int __access_ok(const void __user *ptr, unsigned long size) __access_ok_vsyscall(addr, size)); } -/* no pagefaults for kernel addresses in um */ #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - *((type *)dst) = get_unaligned((type *)(src)); \ - if (0) /* make sure the label looks used to the compiler */ \ + int __faulted; \ + \ + ___backtrack_faulted(__faulted); \ + if (__faulted) { \ + *((type *)dst) = (type) 0; \ goto err_label; \ + } \ + *((type *)dst) = get_unaligned((type *)(src)); \ + current->thread.segv_continue = NULL; \ } while (0) #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - put_unaligned(*((type *)src), (type *)(dst)); \ - if (0) /* make sure the label looks used to the compiler */ \ + int __faulted; \ + \ + ___backtrack_faulted(__faulted); \ + if (__faulted) \ goto err_label; \ + put_unaligned(*((type *)src), (type *)(dst)); \ + current->thread.segv_continue = NULL; \ } while (0) #endif diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index b22226634ff6..138908b999d7 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -83,6 +83,8 @@ extern void time_travel_not_configured(void); #define time_travel_del_event(...) time_travel_not_configured() #endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ +extern unsigned long tt_extra_sched_jiffies; + /* * Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used, * which is intentional since we really shouldn't link it in that case. diff --git a/arch/um/include/shared/arch.h b/arch/um/include/shared/arch.h index 880ee42a3329..cc398a21ad96 100644 --- a/arch/um/include/shared/arch.h +++ b/arch/um/include/shared/arch.h @@ -12,4 +12,6 @@ extern void arch_check_bugs(void); extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs); extern void arch_examine_signal(int sig, struct uml_pt_regs *regs); +void mc_set_rip(void *_mc, void *target); + #endif diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index ea65f151bf48..4f44dcce8a7c 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -50,7 +50,7 @@ extern int linux_main(int argc, char **argv, char **envp); extern void uml_finishsetup(void); struct siginfo; -extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *); +extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *, void *); #endif diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index da0f6eea30d0..88835b52ae2b 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -15,7 +15,8 @@ enum um_irq_type { }; struct siginfo; -extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void sigio_handler(int sig, struct siginfo *unused_si, + struct uml_pt_regs *regs, void *mc); void sigio_run_timetravel_handlers(void); extern void free_irq_by_fd(int fd); extern void deactivate_fd(int fd, int irqnum); diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index f21dc8517538..00ca3e12fd9a 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -24,10 +24,12 @@ extern void free_stack(unsigned long stack, int order); struct pt_regs; extern void do_signal(struct pt_regs *regs); extern void interrupt_end(void); -extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs); +extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs, + void *mc); extern unsigned long segv(struct faultinfo fi, unsigned long ip, - int is_user, struct uml_pt_regs *regs); + int is_user, struct uml_pt_regs *regs, + void *mc); extern int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out); @@ -59,8 +61,10 @@ extern unsigned long from_irq_stack(int nested); extern int singlestepping(void); -extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); -extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc); +extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc); extern void fatal_sigsegv(void) __attribute__ ((noreturn)); void um_idle_sleep(void); diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index adfa08062f88..d4727efcf23d 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -47,7 +47,6 @@ extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern void mem_total_pages(unsigned long physmem, unsigned long iomem); extern void setup_physmem(unsigned long start, unsigned long usable, unsigned long len); extern void map_memory(unsigned long virt, unsigned long phys, diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 5babad8c5f75..152a60080d5b 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -213,7 +213,6 @@ extern int os_protect_memory(void *addr, unsigned long len, extern int os_unmap_memory(void *addr, int len); extern int os_drop_memory(void *addr, int length); extern int can_drop_memory(void); -extern int os_mincore(void *addr, unsigned long len); void os_set_pdeathsig(void); @@ -225,6 +224,11 @@ extern int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long *stack_out); extern int helper_wait(int pid); +struct os_helper_thread; +int os_run_helper_thread(struct os_helper_thread **td_out, + void *(*routine)(void *), void *arg); +void os_kill_helper_thread(struct os_helper_thread *td); +void os_fix_helper_thread_signals(void); /* umid.c */ extern int umid_file_name(char *name, char *buf, int len); @@ -310,7 +314,7 @@ extern void um_irqs_resume(void); extern int add_sigio_fd(int fd); extern int ignore_sigio_fd(int fd); extern void maybe_sigio_broken(int fd); -extern void sigio_broken(int fd); +extern void sigio_broken(void); /* * unlocked versions for IRQ controller code. * diff --git a/arch/um/include/shared/sigio.h b/arch/um/include/shared/sigio.h index e60c8b227844..c6c2edce1f6d 100644 --- a/arch/um/include/shared/sigio.h +++ b/arch/um/include/shared/sigio.h @@ -6,7 +6,6 @@ #ifndef __SIGIO_H__ #define __SIGIO_H__ -extern int write_sigio_irq(int fd); extern void sigio_lock(void); extern void sigio_unlock(void); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index f8567b933ffa..4df1cd0d2017 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -17,7 +17,7 @@ extra-y := vmlinux.lds obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ physmem.o process.o ptrace.o reboot.o sigio.o \ signal.o sysrq.o time.o tlb.o trap.o \ - um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ + um_arch.o umid.o kmsg_dump.o capflags.o skas/ obj-y += load_file.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index a4991746f5ea..abe8f30a521c 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -236,7 +236,8 @@ static void _sigio_handler(struct uml_pt_regs *regs, free_irqs(); } -void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc) { preempt_disable(); _sigio_handler(regs, irqs_suspended); diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c deleted file mode 100644 index 8ccd56813f68..000000000000 --- a/arch/um/kernel/maccess.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2013 Richard Weinberger <richrd@nod.at> - */ - -#include <linux/uaccess.h> -#include <linux/kernel.h> -#include <os.h> - -bool copy_from_kernel_nofault_allowed(const void *src, size_t size) -{ - void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); - - if ((unsigned long)src < PAGE_SIZE || size <= 0) - return false; - if (os_mincore(psrc, size + src - psrc) <= 0) - return false; - return true; -} diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index befed230aac2..76bec7de81b5 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -9,6 +9,8 @@ #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> +#include <linux/init.h> +#include <asm/sections.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <as-layout.h> @@ -54,7 +56,7 @@ int kmalloc_ok = 0; /* Used during early boot */ static unsigned long brk_end; -void __init mem_init(void) +void __init arch_mm_preinit(void) { /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); @@ -66,10 +68,12 @@ void __init mem_init(void) map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); memblock_free((void *)brk_end, uml_reserved - brk_end); uml_reserved = brk_end; - - /* this will put all low memory onto the freelists */ - memblock_free_all(); + min_low_pfn = PFN_UP(__pa(uml_reserved)); max_pfn = max_low_pfn; +} + +void __init mem_init(void) +{ kmalloc_ok = 1; } @@ -241,3 +245,11 @@ static const pgprot_t protection_map[16] = { [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED }; DECLARE_VM_GET_PAGE_PROT + +void mark_rodata_ro(void) +{ + unsigned long rodata_start = PFN_ALIGN(__start_rodata); + unsigned long rodata_end = PFN_ALIGN(__end_rodata); + + os_protect_memory((void *)rodata_start, rodata_end - rodata_start, 1, 0, 0); +} diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index a74f17b033c4..af02b5f9911d 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -22,18 +22,6 @@ static int physmem_fd = -1; unsigned long high_physmem; EXPORT_SYMBOL(high_physmem); -void __init mem_total_pages(unsigned long physmem, unsigned long iomem) -{ - unsigned long phys_pages, iomem_pages, total_pages; - - phys_pages = physmem >> PAGE_SHIFT; - iomem_pages = iomem >> PAGE_SHIFT; - - total_pages = phys_pages + iomem_pages; - - max_mapnr = total_pages; -} - void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x) { diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index 5085a50c3b8c..4fc04742048a 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -8,32 +8,6 @@ #include <os.h> #include <sigio.h> -/* Protected by sigio_lock() called from write_sigio_workaround */ -static int sigio_irq_fd = -1; - -static irqreturn_t sigio_interrupt(int irq, void *data) -{ - char c; - - os_read_file(sigio_irq_fd, &c, sizeof(c)); - return IRQ_HANDLED; -} - -int write_sigio_irq(int fd) -{ - int err; - - err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, - 0, "write sigio", NULL); - if (err < 0) { - printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " - "err = %d\n", err); - return -1; - } - sigio_irq_fd = fd; - return 0; -} - /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ static DEFINE_MUTEX(sigio_mutex); diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index b09e85279d2b..a5beaea2967e 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -31,6 +31,17 @@ void handle_syscall(struct uml_pt_regs *r) goto out; syscall = UPT_SYSCALL_NR(r); + + /* + * If no time passes, then sched_yield may not actually yield, causing + * broken spinlock implementations in userspace (ASAN) to hang for long + * periods of time. + */ + if ((time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) && + syscall == __NR_sched_yield) + tt_extra_sched_jiffies += 1; + if (syscall >= 0 && syscall < __NR_syscalls) { unsigned long ret = EXECUTE_SYSCALL(syscall, regs); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index cdaee3e94273..ce073150dc20 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -16,6 +16,7 @@ #include <kern_util.h> #include <os.h> #include <skas.h> +#include <arch.h> /* * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by @@ -175,12 +176,14 @@ void fatal_sigsegv(void) * @sig: the signal number * @unused_si: the signal info struct; unused in this handler * @regs: the ptrace register information + * @mc: the mcontext of the signal * * The handler first extracts the faultinfo from the UML ptrace regs struct. * If the userfault did not happen in an UML userspace process, bad_segv is called. * Otherwise the signal did happen in a cloned userspace process, handle it. */ -void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc) { struct faultinfo * fi = UPT_FAULTINFO(regs); @@ -189,7 +192,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) bad_segv(*fi, UPT_IP(regs)); return; } - segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); + segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs, mc); } /* @@ -199,7 +202,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) * give us bad data! */ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, - struct uml_pt_regs *regs) + struct uml_pt_regs *regs, void *mc) { int si_code; int err; @@ -223,6 +226,19 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, goto out; } else if (current->mm == NULL) { + if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); + } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); + } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; + goto out; + } show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } @@ -274,7 +290,8 @@ out: return 0; } -void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) +void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs, + void *mc) { int code, err; if (!UPT_IS_USER(regs)) { @@ -302,7 +319,8 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) } } -void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, + void *mc) { do_IRQ(WINCH_IRQ, regs); } diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8be91974e786..d4b3b6742ec8 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -12,6 +12,7 @@ #include <linux/panic_notifier.h> #include <linux/seq_file.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/utsname.h> #include <linux/sched.h> #include <linux/sched/task.h> @@ -78,7 +79,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "model name\t: UML\n"); seq_printf(m, "mode\t\t: skas\n"); seq_printf(m, "host\t\t: %s\n", host_info); - seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no"); + seq_printf(m, "fpu\t\t: %s\n", str_yes_no(cpu_has(&boot_cpu_data, X86_FEATURE_FPU))); seq_printf(m, "flags\t\t:"); for (i = 0; i < 32*NCAPINTS; i++) if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL)) @@ -385,7 +386,6 @@ int __init linux_main(int argc, char **argv, char **envp) high_physmem = uml_physmem + physmem_size; end_iomem = high_physmem + iomem_size; - high_memory = (void *) end_iomem; start_vm = VMALLOC_START; @@ -419,7 +419,6 @@ void __init setup_arch(char **cmdline_p) stack_protections((unsigned long) init_task.stack); setup_physmem(uml_physmem, uml_reserved, physmem_size); - mem_total_pages(physmem_size, iomem_size); uml_dtb_init(); read_initrd(); diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 3cb8ac63be6e..89c2ad2a4e3a 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -8,6 +8,7 @@ #include <unistd.h> #include <errno.h> #include <sched.h> +#include <pthread.h> #include <linux/limits.h> #include <sys/socket.h> #include <sys/wait.h> @@ -121,6 +122,10 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long stack, sp; int pid, status, err; + /* To share memory space, use os_run_helper_thread() instead. */ + if (flags & CLONE_VM) + return -EINVAL; + stack = alloc_stack(0, __uml_cant_sleep()); if (stack == 0) return -ENOMEM; @@ -167,3 +172,65 @@ int helper_wait(int pid) } else return 0; } + +struct os_helper_thread { + pthread_t handle; +}; + +int os_run_helper_thread(struct os_helper_thread **td_out, + void *(*routine)(void *), void *arg) +{ + struct os_helper_thread *td; + sigset_t sigset, oset; + int err, flags; + + flags = __uml_cant_sleep() ? UM_GFP_ATOMIC : UM_GFP_KERNEL; + td = uml_kmalloc(sizeof(*td), flags); + if (!td) + return -ENOMEM; + + sigfillset(&sigset); + if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) { + err = -errno; + kfree(td); + return err; + } + + err = pthread_create(&td->handle, NULL, routine, arg); + + if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0) + panic("Failed to restore the signal mask: %d", errno); + + if (err != 0) + kfree(td); + else + *td_out = td; + + return -err; +} + +void os_kill_helper_thread(struct os_helper_thread *td) +{ + pthread_cancel(td->handle); + pthread_join(td->handle, NULL); + kfree(td); +} + +void os_fix_helper_thread_signals(void) +{ + sigset_t sigset; + + sigemptyset(&sigset); + + sigaddset(&sigset, SIGWINCH); + sigaddset(&sigset, SIGPIPE); + sigaddset(&sigset, SIGPROF); + sigaddset(&sigset, SIGINT); + sigaddset(&sigset, SIGTERM); + sigaddset(&sigset, SIGCHLD); + sigaddset(&sigset, SIGALRM); + sigaddset(&sigset, SIGIO); + sigaddset(&sigset, SIGUSR1); + + pthread_sigmask(SIG_SETMASK, &sigset, NULL); +} diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 9f086f939420..184566edeee9 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -142,57 +142,6 @@ out: return ok; } -static int os_page_mincore(void *addr) -{ - char vec[2]; - int ret; - - ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); - if (ret < 0) { - if (errno == ENOMEM || errno == EINVAL) - return 0; - else - return -errno; - } - - return vec[0] & 1; -} - -int os_mincore(void *addr, unsigned long len) -{ - char *vec; - int ret, i; - - if (len <= UM_KERN_PAGE_SIZE) - return os_page_mincore(addr); - - vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); - if (!vec) - return -ENOMEM; - - ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); - if (ret < 0) { - if (errno == ENOMEM || errno == EINVAL) - ret = 0; - else - ret = -errno; - - goto out; - } - - for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) { - if (!(vec[i] & 1)) { - ret = 0; - goto out; - } - } - - ret = 1; -out: - free(vec); - return ret; -} - void init_new_thread_signals(void) { set_handler(SIGSEGV); diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 9aac8def4d63..a05a6ecee756 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -11,6 +11,7 @@ #include <sched.h> #include <signal.h> #include <string.h> +#include <sys/epoll.h> #include <kern_util.h> #include <init.h> #include <os.h> @@ -21,184 +22,51 @@ * Protected by sigio_lock(), also used by sigio_cleanup, which is an * exitcall. */ -static int write_sigio_pid = -1; -static unsigned long write_sigio_stack; +static struct os_helper_thread *write_sigio_td; -/* - * These arrays are initialized before the sigio thread is started, and - * the descriptors closed after it is killed. So, it can't see them change. - * On the UML side, they are changed under the sigio_lock. - */ -#define SIGIO_FDS_INIT {-1, -1} - -static int write_sigio_fds[2] = SIGIO_FDS_INIT; -static int sigio_private[2] = SIGIO_FDS_INIT; +static int epollfd = -1; -struct pollfds { - struct pollfd *poll; - int size; - int used; -}; +#define MAX_EPOLL_EVENTS 64 -/* - * Protected by sigio_lock(). Used by the sigio thread, but the UML thread - * synchronizes with it. - */ -static struct pollfds current_poll; -static struct pollfds next_poll; -static struct pollfds all_sigio_fds; +static struct epoll_event epoll_events[MAX_EPOLL_EVENTS]; -static int write_sigio_thread(void *unused) +static void *write_sigio_thread(void *unused) { - struct pollfds *fds, tmp; - struct pollfd *p; - int i, n, respond_fd; - char c; - - os_set_pdeathsig(); - os_fix_helper_signals(); - fds = ¤t_poll; + int pid = getpid(); + int r; + + os_fix_helper_thread_signals(); + while (1) { - n = poll(fds->poll, fds->used, -1); - if (n < 0) { + r = epoll_wait(epollfd, epoll_events, MAX_EPOLL_EVENTS, -1); + if (r < 0) { if (errno == EINTR) continue; - printk(UM_KERN_ERR "write_sigio_thread : poll returned " - "%d, errno = %d\n", n, errno); + printk(UM_KERN_ERR "%s: epoll_wait failed, errno = %d\n", + __func__, errno); } - for (i = 0; i < fds->used; i++) { - p = &fds->poll[i]; - if (p->revents == 0) - continue; - if (p->fd == sigio_private[1]) { - CATCH_EINTR(n = read(sigio_private[1], &c, - sizeof(c))); - if (n != sizeof(c)) - printk(UM_KERN_ERR - "write_sigio_thread : " - "read on socket failed, " - "err = %d\n", errno); - tmp = current_poll; - current_poll = next_poll; - next_poll = tmp; - respond_fd = sigio_private[1]; - } - else { - respond_fd = write_sigio_fds[1]; - fds->used--; - memmove(&fds->poll[i], &fds->poll[i + 1], - (fds->used - i) * sizeof(*fds->poll)); - } - - CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); - if (n != sizeof(c)) - printk(UM_KERN_ERR "write_sigio_thread : " - "write on socket failed, err = %d\n", - errno); - } - } - return 0; -} - -static int need_poll(struct pollfds *polls, int n) -{ - struct pollfd *new; - - if (n <= polls->size) - return 0; - - new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); - if (new == NULL) { - printk(UM_KERN_ERR "need_poll : failed to allocate new " - "pollfds\n"); - return -ENOMEM; + CATCH_EINTR(r = tgkill(pid, pid, SIGIO)); + if (r < 0) + printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n", + __func__, errno); } - memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); - kfree(polls->poll); - - polls->poll = new; - polls->size = n; - return 0; -} - -/* - * Must be called with sigio_lock held, because it's needed by the marked - * critical section. - */ -static void update_thread(void) -{ - unsigned long flags; - int n; - char c; - - flags = um_set_signals_trace(0); - CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); - if (n != sizeof(c)) { - printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", - errno); - goto fail; - } - - CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); - if (n != sizeof(c)) { - printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", - errno); - goto fail; - } - - um_set_signals_trace(flags); - return; - fail: - /* Critical section start */ - if (write_sigio_pid != -1) { - os_kill_process(write_sigio_pid, 1); - free_stack(write_sigio_stack, 0); - } - write_sigio_pid = -1; - close(sigio_private[0]); - close(sigio_private[1]); - close(write_sigio_fds[0]); - close(write_sigio_fds[1]); - /* Critical section end */ - um_set_signals_trace(flags); + return NULL; } int __add_sigio_fd(int fd) { - struct pollfd *p; - int err, i, n; - - for (i = 0; i < all_sigio_fds.used; i++) { - if (all_sigio_fds.poll[i].fd == fd) - break; - } - if (i == all_sigio_fds.used) - return -ENOSPC; - - p = &all_sigio_fds.poll[i]; - - for (i = 0; i < current_poll.used; i++) { - if (current_poll.poll[i].fd == fd) - return 0; - } - - n = current_poll.used; - err = need_poll(&next_poll, n + 1); - if (err) - return err; - - memcpy(next_poll.poll, current_poll.poll, - current_poll.used * sizeof(struct pollfd)); - next_poll.poll[n] = *p; - next_poll.used = n + 1; - update_thread(); - - return 0; + struct epoll_event event = { + .data.fd = fd, + .events = EPOLLIN | EPOLLET, + }; + int r; + + CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event)); + return r < 0 ? -errno : 0; } - int add_sigio_fd(int fd) { int err; @@ -212,38 +80,11 @@ int add_sigio_fd(int fd) int __ignore_sigio_fd(int fd) { - struct pollfd *p; - int err, i, n = 0; - - /* - * This is called from exitcalls elsewhere in UML - if - * sigio_cleanup has already run, then update_thread will hang - * or fail because the thread is no longer running. - */ - if (write_sigio_pid == -1) - return -EIO; - - for (i = 0; i < current_poll.used; i++) { - if (current_poll.poll[i].fd == fd) - break; - } - if (i == current_poll.used) - return -ENOENT; - - err = need_poll(&next_poll, current_poll.used - 1); - if (err) - return err; - - for (i = 0; i < current_poll.used; i++) { - p = ¤t_poll.poll[i]; - if (p->fd != fd) - next_poll.poll[n++] = *p; - } - next_poll.used = current_poll.used - 1; - - update_thread(); + struct epoll_event event; + int r; - return 0; + CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event)); + return r < 0 ? -errno : 0; } int ignore_sigio_fd(int fd) @@ -257,125 +98,37 @@ int ignore_sigio_fd(int fd) return err; } -static struct pollfd *setup_initial_poll(int fd) -{ - struct pollfd *p; - - p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); - if (p == NULL) { - printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " - "poll\n"); - return NULL; - } - *p = ((struct pollfd) { .fd = fd, - .events = POLLIN, - .revents = 0 }); - return p; -} - static void write_sigio_workaround(void) { - struct pollfd *p; int err; - int l_write_sigio_fds[2]; - int l_sigio_private[2]; - int l_write_sigio_pid; - /* We call this *tons* of times - and most ones we must just fail. */ sigio_lock(); - l_write_sigio_pid = write_sigio_pid; - sigio_unlock(); - - if (l_write_sigio_pid != -1) - return; + if (write_sigio_td) + goto out; - err = os_pipe(l_write_sigio_fds, 1, 1); - if (err < 0) { - printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " - "err = %d\n", -err); - return; + epollfd = epoll_create(MAX_EPOLL_EVENTS); + if (epollfd < 0) { + printk(UM_KERN_ERR "%s: epoll_create failed, errno = %d\n", + __func__, errno); + goto out; } - err = os_pipe(l_sigio_private, 1, 1); + + err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL); if (err < 0) { - printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " - "err = %d\n", -err); - goto out_close1; + printk(UM_KERN_ERR "%s: os_run_helper_thread failed, errno = %d\n", + __func__, -err); + close(epollfd); + epollfd = -1; + goto out; } - p = setup_initial_poll(l_sigio_private[1]); - if (!p) - goto out_close2; - - sigio_lock(); - - /* - * Did we race? Don't try to optimize this, please, it's not so likely - * to happen, and no more than once at the boot. - */ - if (write_sigio_pid != -1) - goto out_free; - - current_poll = ((struct pollfds) { .poll = p, - .used = 1, - .size = 1 }); - - if (write_sigio_irq(l_write_sigio_fds[0])) - goto out_clear_poll; - - memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); - memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); - - write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, - CLONE_FILES | CLONE_VM, - &write_sigio_stack); - - if (write_sigio_pid < 0) - goto out_clear; - - sigio_unlock(); - return; - -out_clear: - write_sigio_pid = -1; - write_sigio_fds[0] = -1; - write_sigio_fds[1] = -1; - sigio_private[0] = -1; - sigio_private[1] = -1; -out_clear_poll: - current_poll = ((struct pollfds) { .poll = NULL, - .size = 0, - .used = 0 }); -out_free: +out: sigio_unlock(); - kfree(p); -out_close2: - close(l_sigio_private[0]); - close(l_sigio_private[1]); -out_close1: - close(l_write_sigio_fds[0]); - close(l_write_sigio_fds[1]); } -void sigio_broken(int fd) +void sigio_broken(void) { - int err; - write_sigio_workaround(); - - sigio_lock(); - err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); - if (err) { - printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " - "for descriptor %d\n", fd); - goto out; - } - - all_sigio_fds.poll[all_sigio_fds.used++] = - ((struct pollfd) { .fd = fd, - .events = POLLIN, - .revents = 0 }); -out: - sigio_unlock(); } /* Changed during early boot */ @@ -389,17 +142,16 @@ void maybe_sigio_broken(int fd) if (pty_output_sigio) return; - sigio_broken(fd); + sigio_broken(); } static void sigio_cleanup(void) { - if (write_sigio_pid == -1) + if (!write_sigio_td) return; - os_kill_process(write_sigio_pid, 1); - free_stack(write_sigio_stack, 0); - write_sigio_pid = -1; + os_kill_helper_thread(write_sigio_td); + write_sigio_td = NULL; } __uml_exitcall(sigio_cleanup); diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 9ea7269ffb77..e71e5b4878d1 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -21,7 +21,7 @@ #include <sys/ucontext.h> #include <timetravel.h> -void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { +void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = { [SIGTRAP] = relay_signal, [SIGFPE] = relay_signal, [SIGILL] = relay_signal, @@ -47,7 +47,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) if ((sig != SIGIO) && (sig != SIGWINCH)) unblock_signals_trace(); - (*sig_info[sig])(sig, si, &r); + (*sig_info[sig])(sig, si, &r, mc); errno = save_errno; } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index e2f8f156402f..ae2aea062f06 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -166,7 +166,7 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi) static void handle_segv(int pid, struct uml_pt_regs *regs) { get_skas_faultinfo(pid, ®s->faultinfo); - segv(regs->faultinfo, 0, 1, NULL); + segv(regs->faultinfo, 0, 1, NULL, NULL); } static void handle_trap(int pid, struct uml_pt_regs *regs) @@ -525,7 +525,7 @@ void userspace(struct uml_pt_regs *regs) get_skas_faultinfo(pid, ®s->faultinfo); (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, - regs); + regs, NULL); } else handle_segv(pid, regs); break; @@ -533,7 +533,7 @@ void userspace(struct uml_pt_regs *regs) handle_trap(pid, regs); break; case SIGTRAP: - relay_signal(SIGTRAP, (struct siginfo *)&si, regs); + relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL); break; case SIGALRM: break; @@ -543,7 +543,7 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: block_signals_trace(); - (*sig_info[sig])(sig, (struct siginfo *)&si, regs); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL); unblock_signals_trace(); break; default: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9427b5292ca2..85ba2e187571 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,6 +26,8 @@ config X86_64 depends on 64BIT # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_PTDUMP + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE @@ -148,6 +150,7 @@ config X86 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 + select ARCH_WANT_HUGETLB_VMEMMAP_PREINIT if X86_64 select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT @@ -176,7 +179,6 @@ config X86 select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY @@ -460,20 +462,28 @@ config SMP If you don't know what to do here, say N. config X86_X2APIC - bool "Support x2apic" + bool "x2APIC interrupt controller architecture support" depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) + default y help - This enables x2apic support on CPUs that have this feature. + x2APIC is an interrupt controller architecture, a component of which + (the local APIC) is present in the CPU. It allows faster access to + the local APIC and supports a larger number of CPUs in the system + than the predecessors. - This allows 32-bit apic IDs (so it can support very large systems), - and accesses the local apic via MSRs not via mmio. + x2APIC was introduced in Intel CPUs around 2008 and in AMD EPYC CPUs + in 2019, but it can be disabled by the BIOS. It is also frequently + emulated in virtual machines, even when the host CPU does not support + it. Support in the CPU can be checked by executing + grep x2apic /proc/cpuinfo - Some Intel systems circa 2022 and later are locked into x2APIC mode - and can not fall back to the legacy APIC modes if SGX or TDX are - enabled in the BIOS. They will boot with very reduced functionality - without enabling this option. + If this configuration option is disabled, the kernel will boot with + very reduced functionality and performance on some platforms that + have x2APIC enabled. On the other hand, on hardware that does not + support x2APIC, a kernel with this option enabled will just fallback + to older APIC implementations. - If you don't know what to do here, say N. + If in doubt, say Y. config X86_POSTED_MSI bool "Enable MSI and MSI-x delivery by posted interrupts" @@ -544,16 +554,17 @@ config X86_EXTENDED_PLATFORM CONFIG_64BIT. 32-bit platforms (CONFIG_64BIT=n): - Goldfish (Android emulator) - AMD Elan + Goldfish (mostly Android emulator) + Intel CE media processor (CE4100) SoC + Intel Quark RDC R-321x SoC - SGI 320/540 (Visual Workstation) 64-bit platforms (CONFIG_64BIT=y): Numascale NumaChip ScaleMP vSMP SGI Ultraviolet Merrifield/Moorefield MID devices + Goldfish (mostly Android emulator) If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. @@ -667,6 +678,17 @@ config X86_INTEL_QUARK Say Y here if you have a Quark based system such as the Arduino compatible Intel Galileo. +config X86_RDC321X + bool "RDC R-321x SoC" + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + select M486 + select X86_REBOOTFIXUPS + help + This option is needed for RDC R-321x system-on-chip, also known + as R-8610-(G). + If you don't have one of these chips, you should say N here. + config X86_INTEL_LPSS bool "Intel Low Power Subsystem Support" depends on X86 && ACPI && PCI @@ -720,17 +742,6 @@ config IOSF_MBI_DEBUG If you don't require the option or are in doubt, say N. -config X86_RDC321X - bool "RDC R-321x SoC" - depends on X86_32 - depends on X86_EXTENDED_PLATFORM - select M486 - select X86_REBOOTFIXUPS - help - This option is needed for RDC R-321x system-on-chip, also known - as R-8610-(G). - If you don't have one of these chips, you should say N here. - config X86_SUPPORTS_MEMORY_FAILURE def_bool y # MCE code calls memory_failure(): @@ -879,6 +890,7 @@ config INTEL_TDX_GUEST depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC depends on EFI_STUB + depends on PARAVIRT select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE @@ -1565,7 +1577,6 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || X86_32 select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 @@ -2212,7 +2223,7 @@ config HOTPLUG_CPU config COMPAT_VDSO def_bool n - prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)" + prompt "Workaround for glibc 2.3.2 / 2.3.3 (released in year 2003/2004)" depends on COMPAT_32 help Certain buggy versions of glibc will crash if they are @@ -2901,6 +2912,19 @@ config PCI_MMCONFIG default y depends on PCI && (ACPI || JAILHOUSE_GUEST) depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG) + help + Add support for accessing the PCI configuration space as a memory + mapped area. It is the recommended method if the system supports + this (it must have PCI Express and ACPI for it to be available). + + In the unlikely case that enabling this configuration option causes + problems, the mechanism can be switched off with the 'pci=nommconf' + command line parameter. + + Say N only if you are sure that your platform does not support this + access method or you have problems caused by it. + + Say Y otherwise. config PCI_OLPC def_bool y @@ -2915,13 +2939,21 @@ config MMCONF_FAM10H depends on X86_64 && PCI_MMCONFIG && ACPI config PCI_CNB20LE_QUIRK - bool "Read CNB20LE Host Bridge Windows" if EXPERT - depends on PCI + bool "Read PCI host bridge windows from the CNB20LE chipset" if EXPERT + depends on X86_32 && PCI help Read the PCI windows out of the CNB20LE host bridge. This allows PCI hotplug to work on systems with the CNB20LE chipset which do not have ACPI. + The ServerWorks (later Broadcom) CNB20LE was a chipset designed + most probably only for Pentium III. + + To find out if you have such a chipset, search for a PCI device with + 1166:0009 PCI IDs, for example by executing + lspci -nn | grep '1166:0009' + The code is inactive if there is none. + There's no public spec for this chipset, and this functionality is known to be incomplete. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 1eb4d23cdaae..c95c3aaadf97 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -59,7 +59,7 @@ config EARLY_PRINTK_USB_XDBC config EFI_PGT_DUMP bool "Dump the EFI pagetable" depends on EFI - select PTDUMP_CORE + select PTDUMP help Enable this if you want to dump the EFI page table before enabling virtual mode. This can be used to debug miscellaneous diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 0fc7e8fd1a2e..27efe2dc2aa8 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -277,12 +277,11 @@ cpufeaturemasks.hdr := arch/x86/include/generated/asm/cpufeaturemasks.h cpufeaturemasks.awk := $(srctree)/arch/x86/tools/cpufeaturemasks.awk cpufeatures_hdr := $(srctree)/arch/x86/include/asm/cpufeatures.h targets += $(cpufeaturemasks.hdr) -quiet_cmd_gen_featuremasks = GEN $@ - cmd_gen_featuremasks = $(AWK) -f $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) > $@ + filechk_gen_featuremasks = $(AWK) -f $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) $(cpufeaturemasks.hdr): $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) FORCE $(shell mkdir -p $(dir $@)) - $(call if_changed,gen_featuremasks) + $(call filechk,gen_featuremasks) archprepare: $(cpufeaturemasks.hdr) ### diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index a46b1397ad01..c86cbd9cbba3 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -7,12 +7,13 @@ core-y += arch/x86/crypto/ # GCC versions < 11. See: # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652 # -ifeq ($(CONFIG_CC_IS_CLANG),y) -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json +ifeq ($(call gcc-min-version, 110000)$(CONFIG_CC_IS_CLANG),y) +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 endif +KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json + ifeq ($(CONFIG_X86_32),y) START := 0x8048000 diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 7772b01ab738..edab6d6049be 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -14,6 +14,7 @@ #include <asm/ia32.h> #include <asm/insn.h> #include <asm/insn-eval.h> +#include <asm/paravirt_types.h> #include <asm/pgtable.h> #include <asm/set_memory.h> #include <asm/traps.h> @@ -392,13 +393,21 @@ static int handle_halt(struct ve_info *ve) { const bool irq_disabled = irqs_disabled(); + /* + * HLT with IRQs enabled is unsafe, as an IRQ that is intended to be a + * wake event may be consumed before requesting HLT emulation, leaving + * the vCPU blocking indefinitely. + */ + if (WARN_ONCE(!irq_disabled, "HLT emulation with IRQs enabled")) + return -EIO; + if (__halt(irq_disabled)) return -EIO; return ve_instr_len(ve); } -void __cpuidle tdx_safe_halt(void) +void __cpuidle tdx_halt(void) { const bool irq_disabled = false; @@ -409,6 +418,16 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); } +static void __cpuidle tdx_safe_halt(void) +{ + tdx_halt(); + /* + * "__cpuidle" section doesn't support instrumentation, so stick + * with raw_* variant that avoids tracing hooks. + */ + raw_local_irq_enable(); +} + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_module_args args = { @@ -1110,6 +1129,19 @@ void __init tdx_early_init(void) x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; /* + * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that + * will enable interrupts before HLT TDCALL invocation if executed + * in STI-shadow, possibly resulting in missed wakeup events. + * + * Modify all possible HLT execution paths to use TDX specific routines + * that directly execute TDCALL and toggle the interrupt state as + * needed after TDCALL completion. This also reduces HLT related #VEs + * in addition to having a reliable halt logic execution. + */ + pv_ops.irq.safe_halt = tdx_safe_halt; + pv_ops.irq.halt = tdx_halt; + + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled * there. diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index cb0911c5dc5d..d83236b96f22 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -70,6 +70,8 @@ For 32-bit we have the following conventions - kernel is built with pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ + /* We just clobbered the return address - use the IRET frame for unwinding: */ + UNWIND_HINT_IRET_REGS offset=3*8 .else pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 9518bf1ddf35..adb299d3b6a1 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -162,7 +162,8 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) text_start, image->size, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_SEALED_SYSMAP, &vdso_mapping); if (IS_ERR(vma)) { @@ -181,7 +182,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) VDSO_VCLOCK_PAGES_START(addr), VDSO_NR_VCLOCK_PAGES * PAGE_SIZE, VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, + VM_PFNMAP|VM_SEALED_SYSMAP, &vvar_vclock_mapping); if (IS_ERR(vma)) { diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index b5982b94bdba..cbc6157f0b4b 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -16,7 +16,8 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; - asm_inline (ALTERNATIVE("call __sw_hweight32", + asm_inline (ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE + "call __sw_hweight32", "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT) : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT : [val] REG_IN (w)); @@ -45,7 +46,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) { unsigned long res; - asm_inline (ALTERNATIVE("call __sw_hweight64", + asm_inline (ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE + "call __sw_hweight64", "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT) : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT : [val] REG_IN (w)); diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 731ee7cc40a5..585bdadba47d 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -69,9 +69,6 @@ extern unsigned long highstart_pfn, highend_pfn; arch_flush_lazy_mmu_mode(); \ } while (0) -extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); - #endif /* __KERNEL__ */ #endif /* _ASM_X86_HIGHMEM_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 1a0dc2b2bf5b..e889c3bab5a2 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -170,7 +170,7 @@ extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); #define ioremap_cache ioremap_cache -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, pgprot_t prot); #define ioremap_prot ioremap_prot extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); #define ioremap_encrypted ioremap_encrypted diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index af7541c11821..8ace6559d399 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -168,13 +168,6 @@ void iosf_mbi_unblock_punit_i2c_access(void); int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb); /** - * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier - * - * @nb: notifier_block to unregister - */ -int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); - -/** * iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus * notifier, unlocked * diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index abb8374c9ff7..9a9b21b78905 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -76,6 +76,28 @@ static __always_inline void native_local_irq_restore(unsigned long flags) #endif +#ifndef CONFIG_PARAVIRT +#ifndef __ASSEMBLY__ +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static __always_inline void arch_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static __always_inline void halt(void) +{ + native_halt(); +} +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else @@ -98,24 +120,6 @@ static __always_inline void arch_local_irq_enable(void) } /* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ -static __always_inline void arch_safe_halt(void) -{ - native_safe_halt(); -} - -/* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ -static __always_inline void halt(void) -{ - native_halt(); -} - -/* * For spinlocks, etc: */ static __always_inline unsigned long arch_local_irq_save(void) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e4d11e3318f0..8a5cc8e70439 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -435,12 +435,8 @@ static inline void call_depth_return_thunk(void) {} * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ -#ifdef CONFIG_MITIGATION_RETPOLINE #define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ "call __x86_indirect_thunk_%V[thunk_target]\n" -#else -#define CALL_NOSPEC "call *%[thunk_target]\n" -#endif # define THUNK_TARGET(addr) [thunk_target] "r" (addr) diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 5469d7a7c40f..53ba39ce010c 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -41,10 +41,6 @@ static inline int numa_cpu_node(int cpu) } #endif /* CONFIG_NUMA */ -#ifdef CONFIG_X86_32 -# include <asm/numa_32.h> -#endif - #ifdef CONFIG_NUMA extern void numa_set_node(int cpu, int node); extern void numa_clear_node(int cpu); diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h deleted file mode 100644 index 9c8e9e85be77..000000000000 --- a/arch/x86/include/asm/numa_32.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_NUMA_32_H -#define _ASM_X86_NUMA_32_H - -#ifdef CONFIG_HIGHMEM -extern void set_highmem_pages_init(void); -#else -static inline void set_highmem_pages_init(void) -{ -} -#endif - -#endif /* _ASM_X86_NUMA_32_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index bed346bfac89..c4c23190925c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -102,6 +102,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn, PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); } +static __always_inline void arch_safe_halt(void) +{ + PVOP_VCALL0(irq.safe_halt); +} + +static inline void halt(void) +{ + PVOP_VCALL0(irq.halt); +} + #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { @@ -165,16 +175,6 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -static __always_inline void arch_safe_halt(void) -{ - PVOP_VCALL0(irq.safe_halt); -} - -static inline void halt(void) -{ - PVOP_VCALL0(irq.halt); -} - static inline u64 paravirt_read_msr(unsigned msr) { return PVOP_CALL1(u64, cpu.read_msr, msr); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 62912023b46f..631c306ce1ff 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -120,10 +120,9 @@ struct pv_irq_ops { struct paravirt_callee_save save_fl; struct paravirt_callee_save irq_disable; struct paravirt_callee_save irq_enable; - +#endif void (*safe_halt)(void); void (*halt)(void); -#endif } __no_randomize_layout; struct pv_mmu_ops { diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 105db2d33c7b..5fe314a2e73e 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -63,10 +63,14 @@ unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \ \ tcp_ptr__ += (__force unsigned long)(_ptr); \ - (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \ + (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__; \ }) #else -#define arch_raw_cpu_ptr(_ptr) ({ BUILD_BUG(); (typeof(_ptr))0; }) +#define arch_raw_cpu_ptr(_ptr) \ +({ \ + BUILD_BUG(); \ + (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)0; \ +}) #endif #define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel @@ -81,9 +85,18 @@ #endif /* CONFIG_SMP */ -#define __my_cpu_type(var) typeof(var) __percpu_seg_override -#define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr) -#define __my_cpu_var(var) (*__my_cpu_ptr(&(var))) +#if defined(CONFIG_USE_X86_SEG_SUPPORT) && defined(USE_TYPEOF_UNQUAL) +# define __my_cpu_type(var) typeof(var) +# define __my_cpu_ptr(ptr) (ptr) +# define __my_cpu_var(var) (var) + +# define __percpu_qual __percpu_seg_override +#else +# define __my_cpu_type(var) typeof(var) __percpu_seg_override +# define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr) +# define __my_cpu_var(var) (*__my_cpu_ptr(&(var))) +#endif + #define __percpu_arg(x) __percpu_prefix "%" #x #define __force_percpu_arg(x) __force_percpu_prefix "%" #x @@ -150,7 +163,7 @@ do { \ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \ \ if (0) { \ - typeof(_var) pto_tmp__; \ + TYPEOF_UNQUAL(_var) pto_tmp__; \ pto_tmp__ = (_val); \ (void)pto_tmp__; \ } \ @@ -191,7 +204,7 @@ do { \ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \ \ if (0) { \ - typeof(_var) pto_tmp__; \ + TYPEOF_UNQUAL(_var) pto_tmp__; \ pto_tmp__ = (_val); \ (void)pto_tmp__; \ } \ @@ -212,7 +225,7 @@ do { \ (val) == (typeof(val))-1)) ? (int)(val) : 0; \ \ if (0) { \ - typeof(var) pao_tmp__; \ + TYPEOF_UNQUAL(var) pao_tmp__; \ pao_tmp__ = (val); \ (void)pao_tmp__; \ } \ @@ -245,7 +258,7 @@ do { \ */ #define raw_percpu_xchg_op(_var, _nval) \ ({ \ - typeof(_var) pxo_old__ = raw_cpu_read(_var); \ + TYPEOF_UNQUAL(_var) pxo_old__ = raw_cpu_read(_var); \ \ raw_cpu_write(_var, _nval); \ \ @@ -259,7 +272,7 @@ do { \ */ #define this_percpu_xchg_op(_var, _nval) \ ({ \ - typeof(_var) pxo_old__ = this_cpu_read(_var); \ + TYPEOF_UNQUAL(_var) pxo_old__ = this_cpu_read(_var); \ \ do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \ \ diff --git a/arch/x86/include/asm/rqspinlock.h b/arch/x86/include/asm/rqspinlock.h new file mode 100644 index 000000000000..24a885449ee6 --- /dev/null +++ b/arch/x86/include/asm/rqspinlock.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_RQSPINLOCK_H +#define _ASM_X86_RQSPINLOCK_H + +#include <asm/paravirt.h> + +#ifdef CONFIG_PARAVIRT +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); + +#define resilient_virt_spin_lock_enabled resilient_virt_spin_lock_enabled +static __always_inline bool resilient_virt_spin_lock_enabled(void) +{ + return static_branch_likely(&virt_spin_lock_key); +} + +#ifdef CONFIG_QUEUED_SPINLOCKS +typedef struct qspinlock rqspinlock_t; +#else +typedef struct rqspinlock rqspinlock_t; +#endif +extern int resilient_tas_spin_lock(rqspinlock_t *lock); + +#define resilient_virt_spin_lock resilient_virt_spin_lock +static inline int resilient_virt_spin_lock(rqspinlock_t *lock) +{ + return resilient_tas_spin_lock(lock); +} + +#endif /* CONFIG_PARAVIRT */ + +#include <asm-generic/rqspinlock.h> + +#endif /* _ASM_X86_RQSPINLOCK_H */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index daea94c2993c..55a5e656e4b9 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -16,23 +16,23 @@ #ifdef __ASSEMBLER__ #define ASM_CLAC \ - ALTERNATIVE "", "clac", X86_FEATURE_SMAP + ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "clac", X86_FEATURE_SMAP #define ASM_STAC \ - ALTERNATIVE "", "stac", X86_FEATURE_SMAP + ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "stac", X86_FEATURE_SMAP #else /* __ASSEMBLER__ */ static __always_inline void clac(void) { /* Note: a barrier is implicit in alternative() */ - alternative("", "clac", X86_FEATURE_SMAP); + alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP); } static __always_inline void stac(void) { /* Note: a barrier is implicit in alternative() */ - alternative("", "stac", X86_FEATURE_SMAP); + alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP); } static __always_inline unsigned long smap_save(void) @@ -40,7 +40,8 @@ static __always_inline unsigned long smap_save(void) unsigned long flags; asm volatile ("# smap_save\n\t" - ALTERNATIVE("", "pushf; pop %0; " "clac" "\n\t", + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE + "", "pushf; pop %0; clac", X86_FEATURE_SMAP) : "=rm" (flags) : : "memory", "cc"); @@ -50,16 +51,22 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { asm volatile ("# smap_restore\n\t" - ALTERNATIVE("", "push %0; popf\n\t", + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE + "", "push %0; popf", X86_FEATURE_SMAP) : : "g" (flags) : "memory", "cc"); } /* These macros can be used in asm() statements */ #define ASM_CLAC \ - ALTERNATIVE("", "clac", X86_FEATURE_SMAP) + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP) #define ASM_STAC \ - ALTERNATIVE("", "stac", X86_FEATURE_SMAP) + ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP) + +#define ASM_CLAC_UNSAFE \ + ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "clac", X86_FEATURE_SMAP) +#define ASM_STAC_UNSAFE \ + ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "stac", X86_FEATURE_SMAP) #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 65394aa9b49f..4a1922ec80cf 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve); bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); -void tdx_safe_halt(void); +void tdx_halt(void); bool tdx_early_handle_ve(struct pt_regs *regs); @@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls); #else static inline void tdx_early_init(void) { }; -static inline void tdx_safe_halt(void) { }; +static inline void tdx_halt(void) { }; static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index a9af8759de34..e9b81876ebe4 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -348,8 +348,7 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) } static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm, - unsigned long uaddr) + struct mm_struct *mm, unsigned long start, unsigned long end) { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 97771b9d33af..59a62c3780a2 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -231,14 +231,12 @@ static __always_inline void __xen_stac(void) * Suppress objtool seeing the STAC/CLAC and getting confused about it * calling random code with AC=1. */ - asm volatile(ANNOTATE_IGNORE_ALTERNATIVE - ASM_STAC ::: "memory", "flags"); + asm volatile(ASM_STAC_UNSAFE ::: "memory", "flags"); } static __always_inline void __xen_clac(void) { - asm volatile(ANNOTATE_IGNORE_ALTERNATIVE - ASM_CLAC ::: "memory", "flags"); + asm volatile(ASM_CLAC_UNSAFE ::: "memory", "flags"); } static inline long diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index a9088250770f..bd0fc69a10a7 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -72,18 +72,10 @@ enum xen_lazy_mode { }; DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode); -DECLARE_PER_CPU(unsigned int, xen_lazy_nesting); static inline void enter_lazy(enum xen_lazy_mode mode) { - enum xen_lazy_mode old_mode = this_cpu_read(xen_lazy_mode); - - if (mode == old_mode) { - this_cpu_inc(xen_lazy_nesting); - return; - } - - BUG_ON(old_mode != XEN_LAZY_NONE); + BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE); this_cpu_write(xen_lazy_mode, mode); } @@ -92,10 +84,7 @@ static inline void leave_lazy(enum xen_lazy_mode mode) { BUG_ON(this_cpu_read(xen_lazy_mode) != mode); - if (this_cpu_read(xen_lazy_nesting) == 0) - this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); - else - this_cpu_dec(xen_lazy_nesting); + this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE); } enum xen_lazy_mode xen_get_lazy_mode(void); diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 25ae54250112..d86d7d6e750c 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -98,11 +98,10 @@ static inline bool within_module_coretext(void *addr) #ifdef CONFIG_MODULES struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)addr); if (mod && within_module_core((unsigned long)addr, mod)) ret = true; - preempt_enable(); #endif return ret; } diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c index 97222efb4d2a..237faf7e700c 100644 --- a/arch/x86/kernel/cpu/bus_lock.c +++ b/arch/x86/kernel/cpu/bus_lock.c @@ -201,6 +201,26 @@ static void __split_lock_reenable(struct work_struct *work) static DEFINE_PER_CPU(struct delayed_work, sl_reenable); /* + * Per-CPU delayed_work can't be statically initialized properly because + * the struct address is unknown. Thus per-CPU delayed_work structures + * have to be initialized during kernel initialization and after calling + * setup_per_cpu_areas(). + */ +static int __init setup_split_lock_delayed_work(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct delayed_work *work = per_cpu_ptr(&sl_reenable, cpu); + + INIT_DELAYED_WORK(work, __split_lock_reenable); + } + + return 0; +} +pure_initcall(setup_split_lock_delayed_work); + +/* * If a CPU goes offline with pending delayed work to re-enable split lock * detection then the delayed work will be executed on some other CPU. That * handles releasing the buslock_sem, but because it executes on a @@ -219,15 +239,16 @@ static int splitlock_cpu_offline(unsigned int cpu) static void split_lock_warn(unsigned long ip) { - struct delayed_work *work = NULL; + struct delayed_work *work; int cpu; + unsigned int saved_sld_mitigate = READ_ONCE(sysctl_sld_mitigate); if (!current->reported_split_lock) pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", current->comm, current->pid, ip); current->reported_split_lock = 1; - if (sysctl_sld_mitigate) { + if (saved_sld_mitigate) { /* * misery factor #1: * sleep 10ms before trying to execute split lock. @@ -240,18 +261,10 @@ static void split_lock_warn(unsigned long ip) */ if (down_interruptible(&buslock_sem) == -EINTR) return; - work = &sl_reenable_unlock; } cpu = get_cpu(); - - if (!work) { - work = this_cpu_ptr(&sl_reenable); - /* Deferred initialization of per-CPU struct */ - if (!work->work.func) - INIT_DELAYED_WORK(work, __split_lock_reenable); - } - + work = saved_sld_mitigate ? &sl_reenable_unlock : per_cpu_ptr(&sl_reenable, cpu); schedule_delayed_work_on(cpu, work, 2); /* Disable split lock detection on this CPU to make progress */ diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dac4d64dfb2a..2235a7477436 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs) copy_user = is_copy_from_user(regs); instrumentation_end(); - switch (fixup_type) { - case EX_TYPE_UACCESS: - if (!copy_user) - return IN_KERNEL; - m->kflags |= MCE_IN_KERNEL_COPYIN; - fallthrough; + if (copy_user) { + m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV; + return IN_KERNEL_RECOV; + } + switch (fixup_type) { case EX_TYPE_FAULT_MCE_SAFE: case EX_TYPE_DEFAULT_MCE_SAFE: m->kflags |= MCE_IN_KERNEL_RECOV; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 138689b8e1d8..b61028cf5c8a 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -600,7 +600,7 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev, unsigned long p_addr = (unsigned long)&mc->hdr.data_code; if (!verify_sha256_digest(mc->hdr.patch_id, *cur_rev, (const u8 *)p_addr, psize)) - return -1; + return false; native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c44c5b496355..eaae99602b61 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -403,6 +403,11 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r extern struct mutex rdtgroup_mutex; +static inline const char *rdt_kn_name(const struct kernfs_node *kn) +{ + return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex)); +} + extern struct rdt_hw_resource rdt_resources_all[]; extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 01fa7890b43f..92ea1472bde9 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -52,7 +52,8 @@ static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) rdtgrp = dev_get_drvdata(dev); if (mode) *mode = 0600; - return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); + guard(mutex)(&rdtgroup_mutex); + return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdt_kn_name(rdtgrp->kn)); } static const struct class pseudo_lock_class = { @@ -1298,6 +1299,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) struct task_struct *thread; unsigned int new_minor; struct device *dev; + char *kn_name __free(kfree) = NULL; int ret; ret = pseudo_lock_region_alloc(plr); @@ -1309,6 +1311,11 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) ret = -EINVAL; goto out_region; } + kn_name = kstrdup(rdt_kn_name(rdtgrp->kn), GFP_KERNEL); + if (!kn_name) { + ret = -ENOMEM; + goto out_cstates; + } plr->thread_done = 0; @@ -1353,8 +1360,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) mutex_unlock(&rdtgroup_mutex); if (!IS_ERR_OR_NULL(debugfs_resctrl)) { - plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name, - debugfs_resctrl); + plr->debugfs_dir = debugfs_create_dir(kn_name, debugfs_resctrl); if (!IS_ERR_OR_NULL(plr->debugfs_dir)) debugfs_create_file("pseudo_lock_measure", 0200, plr->debugfs_dir, rdtgrp, @@ -1363,7 +1369,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) dev = device_create(&pseudo_lock_class, NULL, MKDEV(pseudo_lock_major, new_minor), - rdtgrp, "%s", rdtgrp->kn->name); + rdtgrp, "%s", kn_name); mutex_lock(&rdtgroup_mutex); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index c6274d40b217..93ec829015f1 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -944,14 +944,14 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, continue; seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", - rdtg->kn->name); + rdt_kn_name(rdtg->kn)); seq_puts(s, "mon:"); list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, mon.crdtgrp_list) { if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, crg->mon.rmid)) continue; - seq_printf(s, "%s", crg->kn->name); + seq_printf(s, "%s", rdt_kn_name(crg->kn)); break; } seq_putc(s, '\n'); @@ -984,10 +984,20 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, return 0; } +static void *rdt_kn_parent_priv(struct kernfs_node *kn) +{ + /* + * The parent pointer is only valid within RCU section since it can be + * replaced. + */ + guard(rcu)(); + return rcu_dereference(kn->__parent)->priv; +} + static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); seq_printf(seq, "%u\n", s->num_closid); return 0; @@ -996,7 +1006,7 @@ static int rdt_num_closids_show(struct kernfs_open_file *of, static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r)); @@ -1006,7 +1016,7 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.min_cbm_bits); @@ -1016,7 +1026,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, static int rdt_shareable_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->cache.shareable_bits); @@ -1040,7 +1050,7 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of, static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); /* * Use unsigned long even though only 32 bits are used to ensure * test_bit() is used safely. @@ -1122,7 +1132,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.min_bw); @@ -1132,7 +1142,7 @@ static int rdt_min_bw_show(struct kernfs_open_file *of, static int rdt_num_rmids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); seq_printf(seq, "%d\n", r->num_rmid); @@ -1142,7 +1152,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of, static int rdt_mon_features_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); struct mon_evt *mevt; list_for_each_entry(mevt, &r->evt_list, list) { @@ -1157,7 +1167,7 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.bw_gran); @@ -1167,7 +1177,7 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, static int rdt_delay_linear_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.delay_linear); @@ -1185,7 +1195,7 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; switch (r->membw.throttle_mode) { @@ -1259,7 +1269,7 @@ static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); @@ -1670,7 +1680,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid static int mbm_total_bytes_config_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); @@ -1680,7 +1690,7 @@ static int mbm_total_bytes_config_show(struct kernfs_open_file *of, static int mbm_local_bytes_config_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); @@ -1787,7 +1797,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); int ret; /* Valid input requires a trailing newline */ @@ -1813,7 +1823,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); int ret; /* Valid input requires a trailing newline */ @@ -2513,12 +2523,13 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) * resource. "info" and its subdirectories don't * have rdtgroup structures, so return NULL here. */ - if (kn == kn_info || kn->parent == kn_info) + if (kn == kn_info || + rcu_access_pointer(kn->__parent) == kn_info) return NULL; else return kn->priv; } else { - return kn->parent->priv; + return rdt_kn_parent_priv(kn); } } @@ -3752,7 +3763,7 @@ out_unlock: */ static bool is_mon_groups(struct kernfs_node *kn, const char *name) { - return (!strcmp(kn->name, "mon_groups") && + return (!strcmp(rdt_kn_name(kn), "mon_groups") && strcmp(name, "mon_groups")); } @@ -3867,9 +3878,18 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) return 0; } +static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn) +{ + /* + * Valid within the RCU section it was obtained or while rdtgroup_mutex + * is held. + */ + return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex)); +} + static int rdtgroup_rmdir(struct kernfs_node *kn) { - struct kernfs_node *parent_kn = kn->parent; + struct kernfs_node *parent_kn; struct rdtgroup *rdtgrp; cpumask_var_t tmpmask; int ret = 0; @@ -3882,6 +3902,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) ret = -EPERM; goto out; } + parent_kn = rdt_kn_parent(kn); /* * If the rdtgroup is a ctrl_mon group and parent directory @@ -3899,7 +3920,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); } } else if (rdtgrp->type == RDTMON_GROUP && - is_mon_groups(parent_kn, kn->name)) { + is_mon_groups(parent_kn, rdt_kn_name(kn))) { ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); } else { ret = -EPERM; @@ -3950,6 +3971,7 @@ static void mongrp_reparent(struct rdtgroup *rdtgrp, static int rdtgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name) { + struct kernfs_node *kn_parent; struct rdtgroup *new_prdtgrp; struct rdtgroup *rdtgrp; cpumask_var_t tmpmask; @@ -3984,8 +4006,9 @@ static int rdtgroup_rename(struct kernfs_node *kn, goto out; } - if (rdtgrp->type != RDTMON_GROUP || !kn->parent || - !is_mon_groups(kn->parent, kn->name)) { + kn_parent = rdt_kn_parent(kn); + if (rdtgrp->type != RDTMON_GROUP || !kn_parent || + !is_mon_groups(kn_parent, rdt_kn_name(kn))) { rdt_last_cmd_puts("Source must be a MON group\n"); ret = -EPERM; goto out; diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 91639d1e4ec2..c6fefd4585f8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -195,6 +195,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, printk("%sCall Trace:\n", log_lvl); unwind_start(&state, task, regs, stack); + stack = stack ?: get_stack_pointer(task, regs); regs = unwind_get_entry_regs(&state, &partial); /* @@ -213,9 +214,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - hardirq stack * - entry stack */ - for (stack = stack ?: get_stack_pointer(task, regs); - stack; - stack = stack_info.next_sp) { + for (; stack; stack = stack_info.next_sp) { const char *stack_name; stack = PTR_ALIGN(stack, sizeof(long)); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index fc1714bad045..611f27e3890c 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -190,7 +190,6 @@ static __init void early_serial_init(char *s) early_serial_hw_init(divisor); } -#ifdef CONFIG_PCI static __noendbr void mem32_serial_out(unsigned long addr, int offset, int value) { u32 __iomem *vaddr = (u32 __iomem *)addr; @@ -208,6 +207,45 @@ static __noendbr unsigned int mem32_serial_in(unsigned long addr, int offset) ANNOTATE_NOENDBR_SYM(mem32_serial_in); /* + * early_mmio_serial_init() - Initialize MMIO-based early serial console. + * @s: MMIO-based serial specification. + */ +static __init void early_mmio_serial_init(char *s) +{ + unsigned long baudrate; + unsigned long membase; + char *e; + + if (*s == ',') + s++; + + if (!strncmp(s, "0x", 2)) { + /* NB: only 32-bit addresses are supported. */ + membase = simple_strtoul(s, &e, 16); + early_serial_base = (unsigned long)early_ioremap(membase, PAGE_SIZE); + + static_call_update(serial_in, mem32_serial_in); + static_call_update(serial_out, mem32_serial_out); + + s += strcspn(s, ","); + if (*s == ',') + s++; + } + + if (!strncmp(s, "nocfg", 5)) { + baudrate = 0; + } else { + baudrate = simple_strtoul(s, &e, 0); + if (baudrate == 0 || s == e) + baudrate = DEFAULT_BAUD; + } + + if (baudrate) + early_serial_hw_init(115200 / baudrate); +} + +#ifdef CONFIG_PCI +/* * early_pci_serial_init() * * This function is invoked when the early_printk param starts with "pciserial" @@ -351,6 +389,11 @@ static int __init setup_early_printk(char *buf) keep = (strstr(buf, "keep") != NULL); while (*buf != '\0') { + if (!strncmp(buf, "mmio", 4)) { + early_mmio_serial_init(buf + 4); + early_console_register(&early_serial_console, keep); + buf += 4; + } if (!strncmp(buf, "serial", 6)) { buf += 6; early_serial_init(buf); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1b734a9ff088..91d6341f281f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -508,7 +508,7 @@ static inline void fpstate_init_fstate(struct fpstate *fpstate) /* * Used in two places: * 1) Early boot to setup init_fpstate for non XSAVE systems - * 2) fpu_init_fpstate_user() which is invoked from KVM + * 2) fpu_alloc_guest_fpstate() which is invoked from KVM */ void fpstate_init_user(struct fpstate *fpstate) { diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 9c9faa1634fb..102641fd2172 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -655,7 +655,7 @@ void kgdb_arch_late(void) if (breakinfo[i].pev) continue; breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); - if (IS_ERR((void * __force)breakinfo[i].pev)) { + if (IS_ERR_PCPU(breakinfo[i].pev)) { printk(KERN_ERR "kgdb: Could not allocate hw" "breakpoints\nDisabling the kernel debugger\n"); breakinfo[i].pev = NULL; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 97925632c28e..1ccd05d8999f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -75,6 +75,11 @@ void paravirt_set_sched_clock(u64 (*func)(void)) static_call_update(pv_sched_clock, func); } +static noinstr void pv_native_safe_halt(void) +{ + native_safe_halt(); +} + #ifdef CONFIG_PARAVIRT_XXL static noinstr void pv_native_write_cr2(unsigned long val) { @@ -100,11 +105,6 @@ static noinstr void pv_native_set_debugreg(int regno, unsigned long val) { native_set_debugreg(regno, val); } - -static noinstr void pv_native_safe_halt(void) -{ - native_safe_halt(); -} #endif struct pv_info pv_info = { @@ -161,9 +161,11 @@ struct paravirt_patch_template pv_ops = { .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), +#endif /* CONFIG_PARAVIRT_XXL */ + + /* Irq HLT ops. */ .irq.safe_halt = pv_native_safe_halt, .irq.halt = native_halt, -#endif /* CONFIG_PARAVIRT_XXL */ /* Mmu ops. */ .mmu.flush_tlb_user = native_flush_tlb_local, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 91f6ff618852..962c3ce39323 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -939,7 +939,7 @@ void __init select_idle_routine(void) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_halt); } else { static_call_update(x86_idle, default_idle); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c7164a8de983..9d2a13b37833 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -578,14 +578,13 @@ static void __init memblock_x86_reserve_range_setup_data(void) static void __init arch_reserve_crashkernel(void) { unsigned long long crash_base, crash_size, low_size = 0; - char *cmdline = boot_command_line; bool high = false; int ret; if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; - ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, &low_size, &high); if (ret) @@ -596,8 +595,7 @@ static void __init arch_reserve_crashkernel(void) return; } - reserve_crashkernel_generic(cmdline, crash_size, crash_base, - low_size, high); + reserve_crashkernel_generic(crash_size, crash_base, low_size, high); } static struct resource standard_io_resources[] = { @@ -1031,8 +1029,6 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = e820__end_of_low_ram_pfn(); else max_low_pfn = max_pfn; - - high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; #endif /* Find and reserve MPTABLE area */ @@ -1166,8 +1162,10 @@ void __init setup_arch(char **cmdline_p) initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); - if (boot_cpu_has(X86_FEATURE_GBPAGES)) + if (boot_cpu_has(X86_FEATURE_GBPAGES)) { hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); + hugetlb_bootmem_alloc(); + } /* * Reserve memory for crash kernel after SRAT is parsed so that it diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index d4705a348a80..977ee75e047c 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -476,7 +476,7 @@ bool unwind_next_frame(struct unwind_state *state) return false; /* Don't let modules unload while we're reading their ORC data. */ - preempt_disable(); + guard(rcu)(); /* End-of-stack check for user tasks: */ if (state->regs && user_mode(state->regs)) @@ -669,14 +669,12 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } - preempt_enable(); return true; err: state->error = true; the_end: - preempt_enable(); state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index aa8c341b2441..06296eb69fd4 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -77,6 +77,24 @@ SYM_FUNC_START(rep_movs_alternative) _ASM_EXTABLE_UA( 0b, 1b) .Llarge_movsq: + /* Do the first possibly unaligned word */ +0: movq (%rsi),%rax +1: movq %rax,(%rdi) + + _ASM_EXTABLE_UA( 0b, .Lcopy_user_tail) + _ASM_EXTABLE_UA( 1b, .Lcopy_user_tail) + + /* What would be the offset to the aligned destination? */ + leaq 8(%rdi),%rax + andq $-8,%rax + subq %rdi,%rax + + /* .. and update pointers and count to match */ + addq %rax,%rdi + addq %rax,%rsi + subq %rax,%rcx + + /* make %rcx contain the number of words, %rax the remainder */ movq %rcx,%rax shrq $3,%rcx andl $7,%eax diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 690fbf48e853..32035d5be5a0 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -39,11 +39,9 @@ CFLAGS_fault.o := -I $(src)/../include/asm/trace obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o -obj-$(CONFIG_HIGHMEM) += highmem_32.o - KASAN_SANITIZE_kasan_init_$(BITS).o := n obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c deleted file mode 100644 index d9efa35711ee..000000000000 --- a/arch/x86/mm/highmem_32.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include <linux/highmem.h> -#include <linux/export.h> -#include <linux/swap.h> /* for totalram_pages */ -#include <linux/memblock.h> -#include <asm/numa.h> - -void __init set_highmem_pages_init(void) -{ - struct zone *zone; - int nid; - - /* - * Explicitly reset zone->managed_pages because set_highmem_pages_init() - * is invoked before memblock_free_all() - */ - reset_all_zones_managed_pages(); - for_each_zone(zone) { - unsigned long zone_start_pfn, zone_end_pfn; - - if (!is_highmem(zone)) - continue; - - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone_start_pfn + zone->spanned_pages; - - nid = zone_to_nid(zone); - printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, nid, zone_start_pfn, zone_end_pfn); - - add_highpages_with_active_regions(nid, zone_start_pfn, - zone_end_pfn); - } -} diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f288aad8dc74..ad662cc4605c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -394,23 +394,6 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = virt_to_kpte(vaddr); } - -void __init add_highpages_with_active_regions(int nid, - unsigned long start_pfn, unsigned long end_pfn) -{ - phys_addr_t start, end; - u64 i; - - for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) { - unsigned long pfn = clamp_t(unsigned long, PFN_UP(start), - start_pfn, end_pfn); - unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end), - start_pfn, end_pfn); - for ( ; pfn < e_pfn; pfn++) - if (pfn_valid(pfn)) - free_highmem_page(pfn_to_page(pfn)); - } -} #else static inline void permanent_kmaps_init(pgd_t *pgd_base) { @@ -645,9 +628,6 @@ void __init initmem_init(void) memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); -#ifdef CONFIG_FLATMEM - max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn; -#endif __vmalloc_start_set = true; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", @@ -709,27 +689,17 @@ static void __init test_wp_bit(void) panic("Linux doesn't support CPUs with broken WP."); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { pci_iommu_alloc(); #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif - /* - * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to - * be done before memblock_free_all(). Memblock use free low memory for - * temporary data (see find_range_array()) and for this purpose can use - * pages that was already passed to the buddy allocator, hence marked as - * not accessible in the page tables when compiled with - * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not - * important here. - */ - set_highmem_pages_init(); - - /* this will put all low memory onto the freelists */ - memblock_free_all(); +} +void __init mem_init(void) +{ after_bootmem = 1; x86_init.hyper.init_after_bootmem(); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 519aa53114fa..7c4f6f591f2b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -959,9 +959,18 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); - /* update max_pfn, max_low_pfn and high_memory */ - update_end_of_memory_vars(start_pfn << PAGE_SHIFT, - nr_pages << PAGE_SHIFT); + /* + * Special case: add_pages() is called by memremap_pages() for adding device + * private pages. Do not bump up max_pfn in the device private path, + * because max_pfn changes affect dma_addressing_limited(). + * + * dma_addressing_limited() returning true when max_pfn is the device's + * addressable memory can force device drivers to use bounce buffers + * and impact their performance negatively: + */ + if (!params->pgmap) + /* update max_pfn, max_low_pfn and high_memory */ + update_end_of_memory_vars(start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); return ret; } @@ -1340,14 +1349,15 @@ failed: panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { pci_iommu_alloc(); +} +void __init mem_init(void) +{ /* clear_bss() already clear the empty_zero_page */ - /* this will put all memory onto the freelists */ - memblock_free_all(); after_bootmem = 1; x86_init.hyper.init_after_bootmem(); @@ -1591,11 +1601,14 @@ void register_page_bootmem_memmap(unsigned long section_nr, } get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); - if (!boot_cpu_has(X86_FEATURE_PSE)) { + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + continue; + } + + if (!boot_cpu_has(X86_FEATURE_PSE) || !pmd_leaf(*pmd)) { next = (addr + PAGE_SIZE) & PAGE_MASK; - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) - continue; get_page_bootmem(section_nr, pmd_page(*pmd), MIX_SECTION_INFO); @@ -1606,12 +1619,7 @@ void register_page_bootmem_memmap(unsigned long section_nr, SECTION_INFO); } else { next = pmd_addr_end(addr, end); - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) - continue; - - nr_pmd_pages = 1 << get_order(PMD_SIZE); + nr_pmd_pages = (next - addr) >> PAGE_SHIFT; page = pmd_page(*pmd); while (nr_pmd_pages--) get_page_bootmem(section_nr, page++, diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 42c90b420773..331e101bf801 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -440,10 +440,10 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) EXPORT_SYMBOL(ioremap_cache); void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, - unsigned long prot_val) + pgprot_t prot) { return __ioremap_caller(phys_addr, size, - pgprot2cachemode(__pgprot(prot_val)), + pgprot2cachemode(prot), __builtin_return_address(0), false); } EXPORT_SYMBOL(ioremap_prot); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index e40861c9cb90..72d8cbc61158 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -984,29 +984,42 @@ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, return -EINVAL; } -/* - * track_pfn_copy is called when vma that is covering the pfnmap gets - * copied through copy_page_range(). - * - * If the vma has a linear pfn mapping for the entire range, we get the prot - * from pte and reserve the entire vma range with single reserve_pfn_range call. - */ -int track_pfn_copy(struct vm_area_struct *vma) +int track_pfn_copy(struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, unsigned long *pfn) { + const unsigned long vma_size = src_vma->vm_end - src_vma->vm_start; resource_size_t paddr; - unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; + int rc; - if (vma->vm_flags & VM_PAT) { - if (get_pat_info(vma, &paddr, &pgprot)) - return -EINVAL; - /* reserve the whole chunk covered by vma. */ - return reserve_pfn_range(paddr, vma_size, &pgprot, 1); - } + if (!(src_vma->vm_flags & VM_PAT)) + return 0; + + /* + * Duplicate the PAT information for the dst VMA based on the src + * VMA. + */ + if (get_pat_info(src_vma, &paddr, &pgprot)) + return -EINVAL; + rc = reserve_pfn_range(paddr, vma_size, &pgprot, 1); + if (rc) + return rc; + /* Reservation for the destination VMA succeeded. */ + vm_flags_set(dst_vma, VM_PAT); + *pfn = PHYS_PFN(paddr); return 0; } +void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn) +{ + untrack_pfn(dst_vma, pfn, dst_vma->vm_end - dst_vma->vm_start, true); + /* + * Reservation was freed, any copied page tables will get cleaned + * up later, but without getting PAT involved again. + */ +} + /* * prot is passed in as a parameter for the new mapping. If the vma has * a linear pfn mapping for the entire range, or no vma is provided, @@ -1095,15 +1108,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, } } -/* - * untrack_pfn_clear is called if the following situation fits: - * - * 1) while mremapping a pfnmap for a new region, with the old vma after - * its pfnmap page table has been removed. The new vma has a new pfnmap - * to the same pfn & cache type with VM_PAT set. - * 2) while duplicating vm area, the new vma fails to copy the pgtable from - * old vma. - */ void untrack_pfn_clear(struct vm_area_struct *vma) { vm_flags_clear(vma, VM_PAT); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 72405d315b41..def3d9284254 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2274,6 +2274,7 @@ int set_mce_nospec(unsigned long pfn) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; } +EXPORT_SYMBOL_GPL(set_mce_nospec); /* Restore full speculative operation to the pfn. */ int clear_mce_nospec(unsigned long pfn) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index cec321fb74f2..a05fcddfc811 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -20,7 +20,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm) void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { paravirt_release_pte(page_to_pfn(pte)); - tlb_remove_table(tlb, page_ptdesc(pte)); + tlb_remove_ptdesc(tlb, page_ptdesc(pte)); } #if CONFIG_PGTABLE_LEVELS > 2 @@ -34,21 +34,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif - tlb_remove_table(tlb, virt_to_ptdesc(pmd)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); - tlb_remove_table(tlb, virt_to_ptdesc(pud)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } #if CONFIG_PGTABLE_LEVELS > 4 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) { paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); - tlb_remove_table(tlb, virt_to_ptdesc(p4d)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile index 383c87300b0d..dddbefc0f439 100644 --- a/arch/x86/net/Makefile +++ b/arch/x86/net/Makefile @@ -6,5 +6,5 @@ ifeq ($(CONFIG_X86_32),y) obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o else - obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o endif diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 72776dcb75aa..9e5fe2ba858f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1250,8 +1250,8 @@ static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm) emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm); } -static int emit_atomic(u8 **pprog, u8 atomic_op, - u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) +static int emit_atomic_rmw(u8 **pprog, u32 atomic_op, + u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) { u8 *prog = *pprog; @@ -1291,8 +1291,9 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, return 0; } -static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, - u32 dst_reg, u32 src_reg, u32 index_reg, int off) +static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size, + u32 dst_reg, u32 src_reg, u32 index_reg, + int off) { u8 *prog = *pprog; @@ -1305,7 +1306,7 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg)); break; default: - pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n"); + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); return -EFAULT; } @@ -1339,6 +1340,49 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, return 0; } +static int emit_atomic_ld_st(u8 **pprog, u32 atomic_op, u32 dst_reg, + u32 src_reg, s16 off, u8 bpf_size) +{ + switch (atomic_op) { + case BPF_LOAD_ACQ: + /* dst_reg = smp_load_acquire(src_reg + off16) */ + emit_ldx(pprog, bpf_size, dst_reg, src_reg, off); + break; + case BPF_STORE_REL: + /* smp_store_release(dst_reg + off16, src_reg) */ + emit_stx(pprog, bpf_size, dst_reg, src_reg, off); + break; + default: + pr_err("bpf_jit: unknown atomic load/store opcode %02x\n", + atomic_op); + return -EFAULT; + } + + return 0; +} + +static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size, + u32 dst_reg, u32 src_reg, u32 index_reg, + int off) +{ + switch (atomic_op) { + case BPF_LOAD_ACQ: + /* dst_reg = smp_load_acquire(src_reg + idx_reg + off16) */ + emit_ldx_index(pprog, size, dst_reg, src_reg, index_reg, off); + break; + case BPF_STORE_REL: + /* smp_store_release(dst_reg + idx_reg + off16, src_reg) */ + emit_stx_index(pprog, size, dst_reg, src_reg, index_reg, off); + break; + default: + pr_err("bpf_jit: unknown atomic load/store opcode %02x\n", + atomic_op); + return -EFAULT; + } + + return 0; +} + #define DONT_CLEAR 1 bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) @@ -2121,6 +2165,13 @@ populate_extable: } break; + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + if (!bpf_atomic_is_load_store(insn)) { + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); + return -EFAULT; + } + fallthrough; case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: if (insn->imm == (BPF_AND | BPF_FETCH) || @@ -2156,10 +2207,10 @@ populate_extable: EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)], add_2reg(0xC0, AUX_REG, real_src_reg)); /* Attempt to swap in new value */ - err = emit_atomic(&prog, BPF_CMPXCHG, - real_dst_reg, AUX_REG, - insn->off, - BPF_SIZE(insn->code)); + err = emit_atomic_rmw(&prog, BPF_CMPXCHG, + real_dst_reg, AUX_REG, + insn->off, + BPF_SIZE(insn->code)); if (WARN_ON(err)) return err; /* @@ -2174,17 +2225,35 @@ populate_extable: break; } - err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, - insn->off, BPF_SIZE(insn->code)); + if (bpf_atomic_is_load_store(insn)) + err = emit_atomic_ld_st(&prog, insn->imm, dst_reg, src_reg, + insn->off, BPF_SIZE(insn->code)); + else + err = emit_atomic_rmw(&prog, insn->imm, dst_reg, src_reg, + insn->off, BPF_SIZE(insn->code)); if (err) return err; break; + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: + if (!bpf_atomic_is_load_store(insn)) { + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); + return -EFAULT; + } + fallthrough; case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: start_of_ldx = prog; - err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code), - dst_reg, src_reg, X86_REG_R12, insn->off); + + if (bpf_atomic_is_load_store(insn)) + err = emit_atomic_ld_st_index(&prog, insn->imm, + BPF_SIZE(insn->code), dst_reg, + src_reg, X86_REG_R12, insn->off); + else + err = emit_atomic_rmw_index(&prog, insn->imm, BPF_SIZE(insn->code), + dst_reg, src_reg, X86_REG_R12, + insn->off); if (err) return err; goto populate_extable; @@ -3801,3 +3870,8 @@ u64 bpf_arch_uaddress_limit(void) { return 0; } + +bool bpf_jit_supports_timed_may_goto(void) +{ + return true; +} diff --git a/arch/x86/net/bpf_timed_may_goto.S b/arch/x86/net/bpf_timed_may_goto.S new file mode 100644 index 000000000000..54c690cae190 --- /dev/null +++ b/arch/x86/net/bpf_timed_may_goto.S @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <linux/export.h> +#include <linux/linkage.h> +#include <asm/nospec-branch.h> + + .code64 + .section .text, "ax" + +SYM_FUNC_START(arch_bpf_timed_may_goto) + ANNOTATE_NOENDBR + + /* + * r10 passes us stack depth, load the pointer to count and timestamp + * into r10 by adding it to BPF frame pointer. + */ + leaq (%rbp, %r10, 1), %r10 + + /* Setup frame. */ + pushq %rbp + movq %rsp, %rbp + + /* Save r0-r5. */ + pushq %rax + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %r8 + + /* + * r10 has the pointer to count and timestamp, pass it as first + * argument. + */ + movq %r10, %rdi + + /* Emit call depth accounting for call below. */ + CALL_DEPTH_ACCOUNT + call bpf_check_timed_may_goto + + /* BPF_REG_AX=r10 will be stored into count, so move return value to it. */ + movq %rax, %r10 + + /* Restore r5-r0. */ + popq %r8 + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %rax + + leave + RET +SYM_FUNC_END(arch_bpf_timed_may_goto) diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c index c81cea208c2c..40ae94db20d8 100644 --- a/arch/x86/platform/intel/iosf_mbi.c +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -422,19 +422,6 @@ int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( } EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier_unlocked); -int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) -{ - int ret; - - /* Wait for the bus to go inactive before unregistering */ - iosf_mbi_punit_acquire(); - ret = iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(nb); - iosf_mbi_punit_release(); - - return ret; -} -EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier); - void iosf_mbi_assert_punit_acquired(void) { WARN_ON(iosf_mbi_pmic_punit_access_count == 0); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 63230ff8cf4f..08e76a5ca155 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -27,6 +27,7 @@ #include <asm/mmu_context.h> #include <asm/cpu_device_id.h> #include <asm/microcode.h> +#include <asm/fred.h> #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -231,6 +232,19 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) */ #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); + + /* + * Reinitialize FRED to ensure the FRED MSRs contain the same values + * as before hibernation. + * + * Note, the setup of FRED RSPs requires access to percpu data + * structures. Therefore, FRED reinitialization can only occur after + * the percpu access pointer (i.e., MSR_GS_BASE) is restored. + */ + if (ctxt->cr4 & X86_CR4_FRED) { + cpu_init_fred_exceptions(); + cpu_init_fred_rsps(); + } #else loadsegment(fs, __KERNEL_PERCPU); #endif diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c index 6c2986d2ad11..08cd913cbd4e 100644 --- a/arch/x86/tools/insn_decoder_test.c +++ b/arch/x86/tools/insn_decoder_test.c @@ -12,8 +12,6 @@ #include <stdarg.h> #include <linux/kallsyms.h> -#define unlikely(cond) (cond) - #include <asm/insn.h> #include <inat.c> #include <insn.c> diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 4da336965698..b51aefd6ec2b 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -12,9 +12,9 @@ */ #ifdef CONFIG_X86_32 -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#define mb() alternative("lock addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) #else /* CONFIG_X86_32 */ diff --git a/arch/x86/um/asm/module.h b/arch/x86/um/asm/module.h deleted file mode 100644 index a3b061d66082..000000000000 --- a/arch/x86/um/asm/module.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_MODULE_H -#define __UM_MODULE_H - -/* UML is simple */ -struct mod_arch_specific -{ -}; - -#ifdef CONFIG_X86_32 - -#define Elf_Shdr Elf32_Shdr -#define Elf_Sym Elf32_Sym -#define Elf_Ehdr Elf32_Ehdr - -#else - -#define Elf_Shdr Elf64_Shdr -#define Elf_Sym Elf64_Sym -#define Elf_Ehdr Elf64_Ehdr - -#endif - -#endif diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c index e80ab7d28117..37decaa74761 100644 --- a/arch/x86/um/os-Linux/mcontext.c +++ b/arch/x86/um/os-Linux/mcontext.c @@ -4,6 +4,7 @@ #include <asm/ptrace.h> #include <sysdep/ptrace.h> #include <sysdep/mcontext.h> +#include <arch.h> void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) { @@ -27,7 +28,17 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) COPY(RIP); COPY2(EFLAGS, EFL); COPY2(CS, CSGSFS); - regs->gp[CS / sizeof(unsigned long)] &= 0xffff; - regs->gp[CS / sizeof(unsigned long)] |= 3; + regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48; +#endif +} + +void mc_set_rip(void *_mc, void *target) +{ + mcontext_t *mc = _mc; + +#ifdef __i386__ + mc->gregs[REG_EIP] = (unsigned long)target; +#else + mc->gregs[REG_RIP] = (unsigned long)target; #endif } diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h index b6f2437ec29c..ab5c8e47049c 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_32.h +++ b/arch/x86/um/shared/sysdep/faultinfo_32.h @@ -29,4 +29,16 @@ struct faultinfo { #define PTRACE_FULL_FAULTINFO 0 +#define ___backtrack_faulted(_faulted) \ + asm volatile ( \ + "mov $0, %0\n" \ + "movl $__get_kernel_nofault_faulted_%=,%1\n" \ + "jmp _end_%=\n" \ + "__get_kernel_nofault_faulted_%=:\n" \ + "mov $1, %0;" \ + "_end_%=:" \ + : "=r" (_faulted), \ + "=m" (current->thread.segv_continue) :: \ + ) + #endif diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h index ee88f88974ea..26fb4835d3e9 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_64.h +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h @@ -29,4 +29,16 @@ struct faultinfo { #define PTRACE_FULL_FAULTINFO 1 +#define ___backtrack_faulted(_faulted) \ + asm volatile ( \ + "mov $0, %0\n" \ + "movq $__get_kernel_nofault_faulted_%=,%1\n" \ + "jmp _end_%=\n" \ + "__get_kernel_nofault_faulted_%=:\n" \ + "mov $1, %0;" \ + "_end_%=:" \ + : "=r" (_faulted), \ + "=m" (current->thread.segv_continue) :: \ + ) + #endif diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index f238f7b33cdd..dc8dfb2abd80 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -12,33 +12,22 @@ static unsigned int __read_mostly vdso_enabled = 1; unsigned long um_vdso_addr; +static struct page *um_vdso; extern unsigned long task_size; extern char vdso_start[], vdso_end[]; -static struct page **vdsop; - static int __init init_vdso(void) { - struct page *um_vdso; - BUG_ON(vdso_end - vdso_start > PAGE_SIZE); um_vdso_addr = task_size - PAGE_SIZE; - vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL); - if (!vdsop) - goto oom; - um_vdso = alloc_page(GFP_KERNEL); - if (!um_vdso) { - kfree(vdsop); - + if (!um_vdso) goto oom; - } copy_page(page_address(um_vdso), vdso_start); - *vdsop = um_vdso; return 0; @@ -56,6 +45,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) struct mm_struct *mm = current->mm; static struct vm_special_mapping vdso_mapping = { .name = "[vdso]", + .pages = &um_vdso, }; if (!vdso_enabled) @@ -64,7 +54,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (mmap_write_lock_killable(mm)) return -EINTR; - vdso_mapping.pages = vdsop; vma = _install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index dcc2041f8e61..846b5737d320 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -138,7 +138,6 @@ struct tls_descs { }; DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE; -DEFINE_PER_CPU(unsigned int, xen_lazy_nesting); enum xen_lazy_mode xen_get_lazy_mode(void) { diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h index 934e58399c8c..7cdcc2deab3e 100644 --- a/arch/xtensa/include/asm/io.h +++ b/arch/xtensa/include/asm/io.h @@ -29,7 +29,7 @@ * I/O memory mapping functions. */ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot); + pgprot_t prot); #define ioremap_prot ioremap_prot #define iounmap iounmap @@ -40,7 +40,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) return (void*)(offset-XCHAL_KIO_PADDR+XCHAL_KIO_BYPASS_VADDR); else return ioremap_prot(offset, size, - pgprot_val(pgprot_noncached(PAGE_KERNEL))); + pgprot_noncached(PAGE_KERNEL)); } #define ioremap ioremap @@ -51,7 +51,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset, && offset - XCHAL_KIO_PADDR < XCHAL_KIO_SIZE) return (void*)(offset-XCHAL_KIO_PADDR+XCHAL_KIO_CACHED_VADDR); else - return ioremap_prot(offset, size, pgprot_val(PAGE_KERNEL)); + return ioremap_prot(offset, size, PAGE_KERNEL); } #define ioremap_cache ioremap_cache diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index b2587a1a7c46..cc52733a0649 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -66,59 +66,8 @@ void __init bootmem_init(void) memblock_dump_all(); } - -void __init zones_init(void) +static void __init print_vm_layout(void) { - /* All pages are DMA-able, so we put them all in the DMA zone. */ - unsigned long max_zone_pfn[MAX_NR_ZONES] = { - [ZONE_NORMAL] = max_low_pfn, -#ifdef CONFIG_HIGHMEM - [ZONE_HIGHMEM] = max_pfn, -#endif - }; - free_area_init(max_zone_pfn); -} - -static void __init free_highpages(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long max_low = max_low_pfn; - phys_addr_t range_start, range_end; - u64 i; - - /* set highmem page free */ - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, - &range_start, &range_end, NULL) { - unsigned long start = PFN_UP(range_start); - unsigned long end = PFN_DOWN(range_end); - - /* Ignore complete lowmem entries */ - if (end <= max_low) - continue; - - /* Truncate partial highmem entries */ - if (start < max_low) - start = max_low; - - for (; start < end; start++) - free_highmem_page(pfn_to_page(start)); - } -#endif -} - -/* - * Initialize memory pages. - */ - -void __init mem_init(void) -{ - free_highpages(); - - max_mapnr = max_pfn - ARCH_PFN_OFFSET; - high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); - - memblock_free_all(); - pr_info("virtual kernel memory layout:\n" #ifdef CONFIG_KASAN " kasan : 0x%08lx - 0x%08lx (%5lu MB)\n" @@ -167,6 +116,19 @@ void __init mem_init(void) (unsigned long)(__bss_stop - __bss_start) >> 10); } +void __init zones_init(void) +{ + /* All pages are DMA-able, so we put them all in the DMA zone. */ + unsigned long max_zone_pfn[MAX_NR_ZONES] = { + [ZONE_NORMAL] = max_low_pfn, +#ifdef CONFIG_HIGHMEM + [ZONE_HIGHMEM] = max_pfn, +#endif + }; + free_area_init(max_zone_pfn); + print_vm_layout(); +} + static void __init parse_memmap_one(char *p) { char *oldp; diff --git a/arch/xtensa/mm/ioremap.c b/arch/xtensa/mm/ioremap.c index 8ca660b7ab49..26f238fa9d0d 100644 --- a/arch/xtensa/mm/ioremap.c +++ b/arch/xtensa/mm/ioremap.c @@ -11,12 +11,12 @@ #include <asm/io.h> void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { unsigned long pfn = __phys_to_pfn((phys_addr)); WARN_ON(pfn_valid(pfn)); - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); |
