diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2024-12-05 11:48:58 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2024-12-05 11:50:14 -0800 |
| commit | 302cc446cbd92aadff72a647cebc13d5634f8342 (patch) | |
| tree | 759fe019d2e1c82ca8590f34c7ba46ab9ddada6e /arch | |
| parent | da4fa00abe5674d3d165cfd8032c740e8aab4d3b (diff) | |
| parent | 896d8946da97332d4dc80fa1937d8dd6b1c35ad4 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-6.13-rc2).
No conflicts or adjacent changes.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch')
202 files changed, 2345 insertions, 2416 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 832f68af7c77..6682b2a53e34 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -812,6 +812,45 @@ config LTO_CLANG_THIN If unsure, say Y. endchoice +config ARCH_SUPPORTS_AUTOFDO_CLANG + bool + +config AUTOFDO_CLANG + bool "Enable Clang's AutoFDO build (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_AUTOFDO_CLANG + depends on CC_IS_CLANG && CLANG_VERSION >= 170000 + help + This option enables Clang’s AutoFDO build. When + an AutoFDO profile is specified in variable + CLANG_AUTOFDO_PROFILE during the build process, + Clang uses the profile to optimize the kernel. + + If no profile is specified, AutoFDO options are + still passed to Clang to facilitate the collection + of perf data for creating an AutoFDO profile in + subsequent builds. + + If unsure, say N. + +config ARCH_SUPPORTS_PROPELLER_CLANG + bool + +config PROPELLER_CLANG + bool "Enable Clang's Propeller build" + depends on ARCH_SUPPORTS_PROPELLER_CLANG + depends on CC_IS_CLANG && CLANG_VERSION >= 190000 + help + This option enables Clang’s Propeller build. When the Propeller + profiles is specified in variable CLANG_PROPELLER_PROFILE_PREFIX + during the build process, Clang uses the profiles to optimize + the kernel. + + If no profile is specified, Propeller options are still passed + to Clang to facilitate the collection of perf data for creating + the Propeller profiles in subsequent builds. + + If unsure, say N. + config ARCH_SUPPORTS_CFI_CLANG bool help diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index 5808a66e2a81..3048758304b5 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -64,7 +64,7 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num, * Return: %0 on success, negative error code otherwise */ static int pci_mmap_resource(struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, struct vm_area_struct *vma, int sparse) { struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); @@ -93,14 +93,14 @@ static int pci_mmap_resource(struct kobject *kobj, } static int pci_mmap_resource_sparse(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, struct vm_area_struct *vma) { return pci_mmap_resource(kobj, attr, vma, 1); } static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, struct vm_area_struct *vma) { return pci_mmap_resource(kobj, attr, vma, 0); diff --git a/arch/arm/Makefile b/arch/arm/Makefile index aafebf145738..00ca7886b18e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -264,13 +264,13 @@ stack_protector_prepare: prepare0 -mstack-protector-guard=tls \ -mstack-protector-guard-offset=$(shell \ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}'\ - include/generated/asm-offsets.h)) + $(objtree)/include/generated/asm-offsets.h)) else stack_protector_prepare: prepare0 $(eval SSP_PLUGIN_CFLAGS := \ -fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}'\ - include/generated/asm-offsets.h)) + $(objtree)/include/generated/asm-offsets.h)) $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS)) $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS)) endif diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 06b0e5fd54a6..cb6ef449b987 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -516,7 +516,7 @@ static void locomo_remove(struct platform_device *dev) */ static struct platform_driver locomo_device_driver = { .probe = locomo_probe, - .remove_new = locomo_remove, + .remove = locomo_remove, #ifdef CONFIG_PM .suspend = locomo_suspend, .resume = locomo_resume, diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 550978dc3c50..9846f30990f7 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1154,7 +1154,7 @@ static struct dev_pm_ops sa1111_pm_ops = { */ static struct platform_driver sa1111_device_driver = { .probe = sa1111_probe, - .remove_new = sa1111_remove, + .remove = sa1111_remove, .driver = { .name = "sa1111", .pm = &sa1111_pm_ops, diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index 9018c7240166..0b08b6621878 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -250,7 +250,7 @@ static void scoop_remove(struct platform_device *pdev) static struct platform_driver scoop_driver = { .probe = scoop_probe, - .remove_new = scoop_remove, + .remove = scoop_remove, .suspend = scoop_suspend, .resume = scoop_resume, .driver = { diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 1dfae1af8e31..ef6a657c8d13 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -25,6 +25,7 @@ #include <asm/tls.h> #include <asm/system_info.h> #include <asm/uaccess-asm.h> +#include <asm/kasan_def.h> #include "entry-header.S" #include <asm/probes.h> @@ -561,6 +562,13 @@ ENTRY(__switch_to) @ entries covering the vmalloc region. @ ldr r2, [ip] +#ifdef CONFIG_KASAN_VMALLOC + @ Also dummy read from the KASAN shadow memory for the new stack if we + @ are using KASAN + mov_l r2, KASAN_SHADOW_OFFSET + add r2, r2, ip, lsr #KASAN_SHADOW_SCALE_SHIFT + ldr r2, [r2] +#endif #endif @ When CONFIG_THREAD_INFO_IN_TASK=n, the update of SP itself is what diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index b68cb86dbe4c..e898f7c2733e 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -596,7 +596,7 @@ static struct platform_driver imx_mmdc_driver = { .of_match_table = imx_mmdc_dt_ids, }, .probe = imx_mmdc_probe, - .remove_new = imx_mmdc_remove, + .remove = imx_mmdc_remove, }; static int __init imx_mmdc_init(void) diff --git a/arch/arm/mach-omap1/omap-dma.c b/arch/arm/mach-omap1/omap-dma.c index f091f78631d0..aebe5e55ff60 100644 --- a/arch/arm/mach-omap1/omap-dma.c +++ b/arch/arm/mach-omap1/omap-dma.c @@ -832,7 +832,7 @@ static void omap_system_dma_remove(struct platform_device *pdev) static struct platform_driver omap_system_dma_driver = { .probe = omap_system_dma_probe, - .remove_new = omap_system_dma_remove, + .remove = omap_system_dma_remove, .driver = { .name = "omap_dma_system" }, diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c index 72fa2e3fd353..0c8d9000df5a 100644 --- a/arch/arm/mach-pxa/sharpsl_pm.c +++ b/arch/arm/mach-pxa/sharpsl_pm.c @@ -919,7 +919,7 @@ static void sharpsl_pm_remove(struct platform_device *pdev) static struct platform_driver sharpsl_pm_driver = { .probe = sharpsl_pm_probe, - .remove_new = sharpsl_pm_remove, + .remove = sharpsl_pm_remove, .suspend = sharpsl_pm_suspend, .resume = sharpsl_pm_resume, .driver = { diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c index 1956b095e699..d94810217095 100644 --- a/arch/arm/mach-sa1100/jornada720_ssp.c +++ b/arch/arm/mach-sa1100/jornada720_ssp.c @@ -188,7 +188,7 @@ static void jornada_ssp_remove(struct platform_device *dev) struct platform_driver jornadassp_driver = { .probe = jornada_ssp_probe, - .remove_new = jornada_ssp_remove, + .remove = jornada_ssp_remove, .driver = { .name = "jornada_ssp", }, diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index 0ef0ebbf31ac..88fe79f0a4ed 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -423,7 +423,7 @@ static const struct dev_pm_ops neponset_pm_ops = { static struct platform_driver neponset_device_driver = { .probe = neponset_probe, - .remove_new = neponset_remove, + .remove = neponset_remove, .driver = { .name = "neponset", .pm = PM_OPS, diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 794cfea9f9d4..89f1c97f3079 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -23,6 +23,7 @@ */ #include <linux/module.h> #include <linux/errno.h> +#include <linux/kasan.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/io.h> @@ -115,16 +116,40 @@ int ioremap_page(unsigned long virt, unsigned long phys, } EXPORT_SYMBOL(ioremap_page); +#ifdef CONFIG_KASAN +static unsigned long arm_kasan_mem_to_shadow(unsigned long addr) +{ + return (unsigned long)kasan_mem_to_shadow((void *)addr); +} +#else +static unsigned long arm_kasan_mem_to_shadow(unsigned long addr) +{ + return 0; +} +#endif + +static void memcpy_pgd(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + end = ALIGN(end, PGDIR_SIZE); + memcpy(pgd_offset(mm, start), pgd_offset_k(start), + sizeof(pgd_t) * (pgd_index(end) - pgd_index(start))); +} + void __check_vmalloc_seq(struct mm_struct *mm) { int seq; do { - seq = atomic_read(&init_mm.context.vmalloc_seq); - memcpy(pgd_offset(mm, VMALLOC_START), - pgd_offset_k(VMALLOC_START), - sizeof(pgd_t) * (pgd_index(VMALLOC_END) - - pgd_index(VMALLOC_START))); + seq = atomic_read_acquire(&init_mm.context.vmalloc_seq); + memcpy_pgd(mm, VMALLOC_START, VMALLOC_END); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + unsigned long start = + arm_kasan_mem_to_shadow(VMALLOC_START); + unsigned long end = + arm_kasan_mem_to_shadow(VMALLOC_END); + memcpy_pgd(mm, start, end); + } /* * Use a store-release so that other CPUs that observe the * counter's new value are guaranteed to see the results of the diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index b68efe643a12..d44867fc0c5e 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -56,6 +56,34 @@ extern unsigned int VFP_arch_feroceon __alias(VFP_arch); union vfp_state *vfp_current_hw_state[NR_CPUS]; /* + * Claim ownership of the VFP unit. + * + * The caller may change VFP registers until vfp_state_release() is called. + * + * local_bh_disable() is used to disable preemption and to disable VFP + * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is + * not sufficient because it only serializes soft interrupt related sections + * via a local lock, but stays preemptible. Disabling preemption is the right + * choice here as bottom half processing is always in thread context on RT + * kernels so it implicitly prevents bottom half processing as well. + */ +static void vfp_state_hold(void) +{ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); +} + +static void vfp_state_release(void) +{ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); +} + +/* * Is 'thread's most up to date state stored in this CPUs hardware? * Must be called from non-preemptible context. */ @@ -240,7 +268,7 @@ static void vfp_panic(char *reason, u32 inst) /* * Process bitmask of exception conditions. */ -static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs) +static int vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr) { int si_code = 0; @@ -248,8 +276,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ if (exceptions == VFP_EXCEPTION_ERROR) { vfp_panic("unhandled bounce", inst); - vfp_raise_sigfpe(FPE_FLTINV, regs); - return; + return FPE_FLTINV; } /* @@ -277,8 +304,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF); RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV); - if (si_code) - vfp_raise_sigfpe(si_code, regs); + return si_code; } /* @@ -324,6 +350,8 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) { u32 fpscr, orig_fpscr, fpsid, exceptions; + int si_code2 = 0; + int si_code = 0; pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); @@ -369,8 +397,8 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) * unallocated VFP instruction but with FPSCR.IXE set and not * on VFP subarch 1. */ - vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); - return; + si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr); + goto exit; } /* @@ -394,14 +422,14 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) */ exceptions = vfp_emulate_instruction(trigger, fpscr, regs); if (exceptions) - vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); + si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); /* * If there isn't a second FP instruction, exit now. Note that * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. */ if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) - return; + goto exit; /* * The barrier() here prevents fpinst2 being read @@ -413,7 +441,13 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) emulate: exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); if (exceptions) - vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); + si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); +exit: + vfp_state_release(); + if (si_code2) + vfp_raise_sigfpe(si_code2, regs); + if (si_code) + vfp_raise_sigfpe(si_code, regs); } static void vfp_enable(void *unused) @@ -512,11 +546,9 @@ static inline void vfp_pm_init(void) { } */ void vfp_sync_hwstate(struct thread_info *thread) { - unsigned int cpu = get_cpu(); + vfp_state_hold(); - local_bh_disable(); - - if (vfp_state_in_hw(cpu, thread)) { + if (vfp_state_in_hw(raw_smp_processor_id(), thread)) { u32 fpexc = fmrx(FPEXC); /* @@ -527,8 +559,7 @@ void vfp_sync_hwstate(struct thread_info *thread) fmxr(FPEXC, fpexc); } - local_bh_enable(); - put_cpu(); + vfp_state_release(); } /* Ensure that the thread reloads the hardware VFP state on the next use. */ @@ -683,7 +714,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) if (!user_mode(regs)) return vfp_kmode_exception(regs, trigger); - local_bh_disable(); + vfp_state_hold(); fpexc = fmrx(FPEXC); /* @@ -748,6 +779,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) * replay the instruction that trapped. */ fmxr(FPEXC, fpexc); + vfp_state_release(); } else { /* Check for synchronous or asynchronous exceptions */ if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) { @@ -762,17 +794,17 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) if (!(fpscr & FPSCR_IXE)) { if (!(fpscr & FPSCR_LENGTH_MASK)) { pr_debug("not VFP\n"); - local_bh_enable(); + vfp_state_release(); return -ENOEXEC; } fpexc |= FPEXC_DEX; } } bounce: regs->ARM_pc += 4; + /* VFP_bounce() will invoke vfp_state_release() */ VFP_bounce(trigger, fpexc, regs); } - local_bh_enable(); return 0; } @@ -837,7 +869,7 @@ void kernel_neon_begin(void) unsigned int cpu; u32 fpexc; - local_bh_disable(); + vfp_state_hold(); /* * Kernel mode NEON is only allowed outside of hardirq context with @@ -868,7 +900,7 @@ void kernel_neon_end(void) { /* Disable the NEON/VFP unit. */ fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); - local_bh_enable(); + vfp_state_release(); } EXPORT_SYMBOL(kernel_neon_end); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d743737bf9ce..100570a048c5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -14,7 +14,6 @@ config ARM64 select ARCH_HAS_DEBUG_WX select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_BINFMT_ELF_STATE - select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE @@ -1236,6 +1235,17 @@ config HISILICON_ERRATUM_161600802 If unsure, say Y. +config HISILICON_ERRATUM_162100801 + bool "Hip09 162100801 erratum support" + default y + help + When enabling GICv4.1 in hip09, VMAPP will fail to clear some caches + during unmapping operation, which will cause some vSGIs lost. + To fix the issue, invalidate related vPE cache through GICR_INVALLR + after VMOVP. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 9efd3f37c2fd..358c68565bfd 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -71,7 +71,7 @@ stack_protector_prepare: prepare0 -mstack-protector-guard-reg=sp_el0 \ -mstack-protector-guard-offset=$(shell \ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \ - include/generated/asm-offsets.h)) + $(objtree)/include/generated/asm-offsets.h)) endif ifeq ($(CONFIG_ARM64_BTI_KERNEL),y) diff --git a/arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtsi b/arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtsi index ae0379fd42a9..dfc5c2f0ddef 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtsi @@ -14,6 +14,7 @@ compatible = "melfas,mip4_ts"; reg = <0x34>; interrupts-extended = <&pio 88 IRQ_TYPE_LEVEL_LOW>; + status = "fail-needs-probe"; }; /* @@ -26,6 +27,7 @@ reg = <0x20>; hid-descr-addr = <0x0020>; interrupts-extended = <&pio 88 IRQ_TYPE_LEVEL_LOW>; + status = "fail-needs-probe"; }; /* Lenovo Ideapad C330 uses G2Touch touchscreen as a 2nd source touchscreen */ @@ -35,6 +37,7 @@ hid-descr-addr = <0x0001>; interrupt-parent = <&pio>; interrupts = <88 IRQ_TYPE_LEVEL_LOW>; + status = "fail-needs-probe"; }; }; @@ -47,6 +50,8 @@ trackpad2: trackpad@2c { compatible = "hid-over-i2c"; interrupts-extended = <&pio 117 IRQ_TYPE_LEVEL_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&trackpad_irq>; reg = <0x2c>; hid-descr-addr = <0x0020>; /* @@ -58,6 +63,7 @@ */ vdd-supply = <&mt6397_vgp6_reg>; wakeup-source; + status = "fail-needs-probe"; }; }; @@ -82,3 +88,11 @@ }; }; }; + +&touchscreen { + status = "fail-needs-probe"; +}; + +&trackpad { + status = "fail-needs-probe"; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi b/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi index b4d85147b77b..eee64461421f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi @@ -358,12 +358,12 @@ &i2c4 { clock-frequency = <400000>; status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&trackpad_irq>; trackpad: trackpad@15 { compatible = "elan,ekth3000"; interrupts-extended = <&pio 117 IRQ_TYPE_LEVEL_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&trackpad_irq>; reg = <0x15>; vcc-supply = <&mt6397_vgp6_reg>; wakeup-source; diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index a523b519700f..a2b5d6f20f4d 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -18,7 +18,7 @@ #include "aes-ce-setkey.h" -MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_IMPORT_NS("CRYPTO_INTERNAL"); static int num_rounds(struct crypto_aes_ctx *ctx) { diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index a147e847a5a1..b0150999743f 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -1048,7 +1048,7 @@ unregister_ciphers: #ifdef USE_V8_CRYPTO_EXTENSIONS module_cpu_feature_match(AES, aes_init); -EXPORT_SYMBOL_NS(ce_aes_mac_update, CRYPTO_INTERNAL); +EXPORT_SYMBOL_NS(ce_aes_mac_update, "CRYPTO_INTERNAL"); #else module_init(aes_init); EXPORT_SYMBOL(neon_aes_ecb_encrypt); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index aeaa6017ffd8..9b36218b48de 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -951,7 +951,7 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu, return v; } -static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) +static __always_inline void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) { int i = sr - __SANITISED_REG_START__; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 8ad62284fa23..456102bc0b55 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -274,12 +274,23 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) irq_work_sync(&vcpu->arch.pmu.overflow_work); } -bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) +static u64 kvm_pmu_hyp_counter_mask(struct kvm_vcpu *vcpu) { - unsigned int hpmn; + unsigned int hpmn, n; - if (!vcpu_has_nv(vcpu) || idx == ARMV8_PMU_CYCLE_IDX) - return false; + if (!vcpu_has_nv(vcpu)) + return 0; + + hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); + n = vcpu->kvm->arch.pmcr_n; + + /* + * Programming HPMN to a value greater than PMCR_EL0.N is + * CONSTRAINED UNPREDICTABLE. Make the implementation choice that an + * UNKNOWN number of counters (in our case, zero) are reserved for EL2. + */ + if (hpmn >= n) + return 0; /* * Programming HPMN=0 is CONSTRAINED UNPREDICTABLE if FEAT_HPMN0 isn't @@ -288,20 +299,22 @@ bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) * implementation choice that all counters are included in the second * range reserved for EL2/EL3. */ - hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); - return idx >= hpmn; + return GENMASK(n - 1, hpmn); +} + +bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) +{ + return kvm_pmu_hyp_counter_mask(vcpu) & BIT(idx); } u64 kvm_pmu_accessible_counter_mask(struct kvm_vcpu *vcpu) { u64 mask = kvm_pmu_implemented_counter_mask(vcpu); - u64 hpmn; if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu)) return mask; - hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); - return mask & ~GENMASK(vcpu->kvm->arch.pmcr_n - 1, hpmn); + return mask & ~kvm_pmu_hyp_counter_mask(vcpu); } u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu) @@ -375,15 +388,30 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) } } -static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) +/* + * Returns the PMU overflow state, which is true if there exists an event + * counter where the values of the global enable control, PMOVSSET_EL0[n], and + * PMINTENSET_EL1[n] are all 1. + */ +static bool kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { - u64 reg = 0; + u64 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); - if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { - reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); - reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); - } + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); + + /* + * PMCR_EL0.E is the global enable control for event counters available + * to EL0 and EL1. + */ + if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) + reg &= kvm_pmu_hyp_counter_mask(vcpu); + + /* + * Otherwise, MDCR_EL2.HPME is the global enable control for event + * counters reserved for EL2. + */ + if (!(vcpu_read_sys_reg(vcpu, MDCR_EL2) & MDCR_EL2_HPME)) + reg &= ~kvm_pmu_hyp_counter_mask(vcpu); return reg; } @@ -396,7 +424,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) if (!kvm_vcpu_has_pmu(vcpu)) return; - overflow = !!kvm_pmu_overflow_status(vcpu); + overflow = kvm_pmu_overflow_status(vcpu); if (pmu->irq_level == overflow) return; diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index e1397ab2072a..afb018528bc3 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -287,7 +287,10 @@ static int vgic_debug_show(struct seq_file *s, void *v) * Expect this to succeed, as iter_mark_lpis() takes a reference on * every LPI to be visited. */ - irq = vgic_get_irq(kvm, vcpu, iter->intid); + if (iter->intid < VGIC_NR_PRIVATE_IRQS) + irq = vgic_get_vcpu_irq(vcpu, iter->intid); + else + irq = vgic_get_irq(kvm, iter->intid); if (WARN_ON_ONCE(!irq)) return -EINVAL; diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 48c952563e85..bc7e22ab5d81 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -322,7 +322,7 @@ int vgic_init(struct kvm *kvm) goto out; for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = vgic_get_irq(kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); switch (dist->vgic_model) { case KVM_DEV_TYPE_ARM_VGIC_V3: diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 198296933e7e..f4c4494645c3 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -31,6 +31,41 @@ static int vgic_its_commit_v0(struct vgic_its *its); static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, struct kvm_vcpu *filter_vcpu, bool needs_inv); +#define vgic_its_read_entry_lock(i, g, valp, t) \ + ({ \ + int __sz = vgic_its_get_abi(i)->t##_esz; \ + struct kvm *__k = (i)->dev->kvm; \ + int __ret; \ + \ + BUILD_BUG_ON(NR_ITS_ABIS == 1 && \ + sizeof(*(valp)) != ABI_0_ESZ); \ + if (NR_ITS_ABIS > 1 && \ + KVM_BUG_ON(__sz != sizeof(*(valp)), __k)) \ + __ret = -EINVAL; \ + else \ + __ret = kvm_read_guest_lock(__k, (g), \ + valp, __sz); \ + __ret; \ + }) + +#define vgic_its_write_entry_lock(i, g, val, t) \ + ({ \ + int __sz = vgic_its_get_abi(i)->t##_esz; \ + struct kvm *__k = (i)->dev->kvm; \ + typeof(val) __v = (val); \ + int __ret; \ + \ + BUILD_BUG_ON(NR_ITS_ABIS == 1 && \ + sizeof(__v) != ABI_0_ESZ); \ + if (NR_ITS_ABIS > 1 && \ + KVM_BUG_ON(__sz != sizeof(__v), __k)) \ + __ret = -EINVAL; \ + else \ + __ret = vgic_write_guest_lock(__k, (g), \ + &__v, __sz); \ + __ret; \ + }) + /* * Creates a new (reference to a) struct vgic_irq for a given LPI. * If this LPI is already mapped on another ITS, we increase its refcount @@ -42,7 +77,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; unsigned long flags; int ret; @@ -419,7 +454,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) last_byte_offset = byte_offset; } - irq = vgic_get_irq(vcpu->kvm, NULL, intid); + irq = vgic_get_irq(vcpu->kvm, intid); if (!irq) continue; @@ -794,7 +829,7 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, its_free_ite(kvm, ite); - return vgic_its_write_entry_lock(its, gpa, 0, ite_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, ite); } return E_ITS_DISCARD_UNMAPPED_INTERRUPT; @@ -1143,7 +1178,6 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, bool valid = its_cmd_get_validbit(its_cmd); u8 num_eventid_bits = its_cmd_get_size(its_cmd); gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd); - int dte_esz = vgic_its_get_abi(its)->dte_esz; struct its_device *device; gpa_t gpa; @@ -1168,7 +1202,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, * is an error, so we are done in any case. */ if (!valid) - return vgic_its_write_entry_lock(its, gpa, 0, dte_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, dte); device = vgic_its_alloc_device(its, device_id, itt_addr, num_eventid_bits); @@ -1288,7 +1322,7 @@ int vgic_its_invall(struct kvm_vcpu *vcpu) unsigned long intid; xa_for_each(&dist->lpi_xa, intid, irq) { - irq = vgic_get_irq(kvm, NULL, intid); + irq = vgic_get_irq(kvm, intid); if (!irq) continue; @@ -1354,7 +1388,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, return 0; xa_for_each(&dist->lpi_xa, intid, irq) { - irq = vgic_get_irq(kvm, NULL, intid); + irq = vgic_get_irq(kvm, intid); if (!irq) continue; @@ -2090,7 +2124,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, * vgic_its_save_ite - Save an interrupt translation entry at @gpa */ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, - struct its_ite *ite, gpa_t gpa, int ite_esz) + struct its_ite *ite, gpa_t gpa) { u32 next_offset; u64 val; @@ -2101,7 +2135,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, ite->collection->collection_id; val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, gpa, val, ite_esz); + return vgic_its_write_entry_lock(its, gpa, val, ite); } /** @@ -2201,7 +2235,7 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) if (ite->irq->hw && !kvm_vgic_global_state.has_gicv4_1) return -EACCES; - ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); + ret = vgic_its_save_ite(its, device, ite, gpa); if (ret) return ret; } @@ -2240,10 +2274,9 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) * @its: ITS handle * @dev: ITS device * @ptr: GPA - * @dte_esz: device table entry size */ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, - gpa_t ptr, int dte_esz) + gpa_t ptr) { u64 val, itt_addr_field; u32 next_offset; @@ -2256,7 +2289,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, (dev->num_eventid_bits - 1)); val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, ptr, val, dte_esz); + return vgic_its_write_entry_lock(its, ptr, val, dte); } /** @@ -2332,10 +2365,8 @@ static int vgic_its_device_cmp(void *priv, const struct list_head *a, */ static int vgic_its_save_device_tables(struct vgic_its *its) { - const struct vgic_its_abi *abi = vgic_its_get_abi(its); u64 baser = its->baser_device_table; struct its_device *dev; - int dte_esz = abi->dte_esz; if (!(baser & GITS_BASER_VALID)) return 0; @@ -2354,7 +2385,7 @@ static int vgic_its_save_device_tables(struct vgic_its *its) if (ret) return ret; - ret = vgic_its_save_dte(its, dev, eaddr, dte_esz); + ret = vgic_its_save_dte(its, dev, eaddr); if (ret) return ret; } @@ -2435,7 +2466,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) static int vgic_its_save_cte(struct vgic_its *its, struct its_collection *collection, - gpa_t gpa, int esz) + gpa_t gpa) { u64 val; @@ -2444,7 +2475,7 @@ static int vgic_its_save_cte(struct vgic_its *its, collection->collection_id); val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, gpa, val, esz); + return vgic_its_write_entry_lock(its, gpa, val, cte); } /* @@ -2452,7 +2483,7 @@ static int vgic_its_save_cte(struct vgic_its *its, * Return +1 on success, 0 if the entry was invalid (which should be * interpreted as end-of-table), and a negative error value for generic errors. */ -static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) +static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa) { struct its_collection *collection; struct kvm *kvm = its->dev->kvm; @@ -2460,7 +2491,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) u64 val; int ret; - ret = vgic_its_read_entry_lock(its, gpa, &val, esz); + ret = vgic_its_read_entry_lock(its, gpa, &val, cte); if (ret) return ret; val = le64_to_cpu(val); @@ -2507,7 +2538,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; list_for_each_entry(collection, &its->collection_list, coll_list) { - ret = vgic_its_save_cte(its, collection, gpa, cte_esz); + ret = vgic_its_save_cte(its, collection, gpa); if (ret) return ret; gpa += cte_esz; @@ -2521,7 +2552,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) * table is not fully filled, add a last dummy element * with valid bit unset */ - return vgic_its_write_entry_lock(its, gpa, 0, cte_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, cte); } /* @@ -2546,7 +2577,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; while (read < max_size) { - ret = vgic_its_restore_cte(its, gpa, cte_esz); + ret = vgic_its_restore_cte(its, gpa); if (ret <= 0) break; gpa += cte_esz; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index e070cda86e12..f25fccb1f8e6 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -148,7 +148,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, if (!(targets & (1U << c))) continue; - irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; @@ -167,7 +167,7 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->targets << (i * 8); @@ -191,7 +191,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, return; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, intid + i); int target; raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -213,7 +213,7 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->source << (i * 8); @@ -231,7 +231,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -253,7 +253,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 9e50928f5d7d..ae4c0593d114 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -194,7 +194,7 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { int intid = VGIC_ADDR_TO_INTID(addr, 64); - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, intid); unsigned long ret = 0; if (!irq) @@ -220,7 +220,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, if (addr & 4) return; - irq = vgic_get_irq(vcpu->kvm, NULL, intid); + irq = vgic_get_irq(vcpu->kvm, intid); if (!irq) return; @@ -530,6 +530,7 @@ static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, unsigned long val) { struct vgic_irq *irq; + u32 intid; /* * If the guest wrote only to the upper 32bit part of the @@ -541,9 +542,13 @@ static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, if ((addr & 4) || !vgic_lpis_enabled(vcpu)) return; + intid = lower_32_bits(val); + if (intid < VGIC_MIN_LPI) + return; + vgic_set_rdist_busy(vcpu, true); - irq = vgic_get_irq(vcpu->kvm, NULL, lower_32_bits(val)); + irq = vgic_get_irq(vcpu->kvm, intid); if (irq) { vgic_its_inv_lpi(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); @@ -1020,7 +1025,7 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, sgi); unsigned long flags; raw_spin_lock_irqsave(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index cf76523a2194..e416e433baff 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -50,7 +50,7 @@ unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->group) value |= BIT(i); @@ -74,7 +74,7 @@ void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long flags; for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->group = !!(val & BIT(i)); @@ -102,7 +102,7 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->enabled) value |= (1U << i); @@ -122,7 +122,7 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid)) { @@ -171,7 +171,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled) @@ -193,7 +193,7 @@ int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = true; @@ -214,7 +214,7 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = false; @@ -236,7 +236,7 @@ static unsigned long __read_pending(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); unsigned long flags; bool val; @@ -309,7 +309,7 @@ static void __set_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* GICD_ISPENDR0 SGI bits are WI when written from the guest. */ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { @@ -395,7 +395,7 @@ static void __clear_pending(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* GICD_ICPENDR0 SGI bits are WI when written from the guest. */ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { @@ -494,7 +494,7 @@ static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* * Even for HW interrupts, don't evaluate the HW state as @@ -598,7 +598,7 @@ static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, int i; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, false); vgic_put_irq(vcpu->kvm, irq); } @@ -635,7 +635,7 @@ static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, int i; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, true); vgic_put_irq(vcpu->kvm, irq); } @@ -672,7 +672,7 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->priority << (i * 8); @@ -698,7 +698,7 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); /* Narrow the priority range to what we actually support */ @@ -719,7 +719,7 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, int i; for (i = 0; i < len * 4; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->config == VGIC_CONFIG_EDGE) value |= (2U << (i * 2)); @@ -750,7 +750,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, if (intid + i < VGIC_NR_PRIVATE_IRQS) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_irq(vcpu->kvm, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i * 2 + 1, &val)) @@ -775,7 +775,7 @@ u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level) val |= (1U << i); @@ -799,7 +799,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_vcpu_irq(vcpu, intid + i); /* * Line level is set irrespective of irq type diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index ae5a44d5702d..381673f03c39 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -72,7 +72,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) kvm_notify_acked_irq(vcpu->kvm, 0, intid - VGIC_NR_PRIVATE_IRQS); - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock(&irq->irq_lock); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index b217b256853c..f267bc2486a1 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -65,7 +65,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) kvm_notify_acked_irq(vcpu->kvm, 0, intid - VGIC_NR_PRIVATE_IRQS); - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); if (!irq) /* An LPI could have been unmapped. */ continue; diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 74a67ad87f29..eedecbbbcf31 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -123,7 +123,7 @@ static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) * IRQ. The SGI code will do its magic. */ for (i = 0; i < VGIC_NR_SGIS; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); struct irq_desc *desc; unsigned long flags; int ret; @@ -160,7 +160,7 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) int i; for (i = 0; i < VGIC_NR_SGIS; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); struct irq_desc *desc; unsigned long flags; int ret; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f50274fd5581..cc8c6b9b5dd8 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -84,17 +84,11 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) * struct vgic_irq. It also increases the refcount, so any caller is expected * to call vgic_put_irq() once it's finished with this IRQ. */ -struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, - u32 intid) +struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid) { - /* SGIs and PPIs */ - if (intid <= VGIC_MAX_PRIVATE) { - intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); - return &vcpu->arch.vgic_cpu.private_irqs[intid]; - } - /* SPIs */ - if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { + if (intid >= VGIC_NR_PRIVATE_IRQS && + intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; } @@ -106,6 +100,20 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, return NULL; } +struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid) +{ + if (WARN_ON(!vcpu)) + return NULL; + + /* SGIs and PPIs */ + if (intid < VGIC_NR_PRIVATE_IRQS) { + intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS); + return &vcpu->arch.vgic_cpu.private_irqs[intid]; + } + + return vgic_get_irq(vcpu->kvm, intid); +} + /* * We can't do anything in here, because we lack the kvm pointer to * lock and remove the item from the lpi_list. So we keep this function @@ -437,7 +445,10 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); - irq = vgic_get_irq(kvm, vcpu, intid); + if (intid < VGIC_NR_PRIVATE_IRQS) + irq = vgic_get_vcpu_irq(vcpu, intid); + else + irq = vgic_get_irq(kvm, intid); if (!irq) return -EINVAL; @@ -499,7 +510,7 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, u32 vintid, struct irq_ops *ops) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; int ret; @@ -524,7 +535,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, */ void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; if (!irq->hw) @@ -547,7 +558,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) if (!vgic_initialized(vcpu->kvm)) return -EAGAIN; - irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + irq = vgic_get_vcpu_irq(vcpu, vintid); BUG_ON(!irq); raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -560,7 +571,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; int ret = -1; @@ -596,7 +607,7 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) return -EINVAL; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->owner && irq->owner != owner) ret = -EEXIST; @@ -1008,7 +1019,7 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) if (!vgic_initialized(vcpu->kvm)) return false; - irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + irq = vgic_get_vcpu_irq(vcpu, vintid); raw_spin_lock_irqsave(&irq->irq_lock, flags); map_is_active = irq->hw && irq->active; raw_spin_unlock_irqrestore(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 309295f5e1b0..122d95b4e284 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -146,29 +146,6 @@ static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -static inline int vgic_its_read_entry_lock(struct vgic_its *its, gpa_t eaddr, - u64 *eval, unsigned long esize) -{ - struct kvm *kvm = its->dev->kvm; - - if (KVM_BUG_ON(esize != sizeof(*eval), kvm)) - return -EINVAL; - - return kvm_read_guest_lock(kvm, eaddr, eval, esize); - -} - -static inline int vgic_its_write_entry_lock(struct vgic_its *its, gpa_t eaddr, - u64 eval, unsigned long esize) -{ - struct kvm *kvm = its->dev->kvm; - - if (KVM_BUG_ON(esize != sizeof(eval), kvm)) - return -EINVAL; - - return vgic_write_guest_lock(kvm, eaddr, &eval, esize); -} - /* * This struct provides an intermediate representation of the fields contained * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC @@ -202,8 +179,8 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, const struct vgic_register_region * vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, gpa_t addr, int len); -struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, - u32 intid); +struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid); +struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index b837c65a4894..c8e4057734d0 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -24,6 +24,16 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } +#define __HAVE_ARCH_HUGE_PTE_CLEAR +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) +{ + pte_t clear; + + pte_val(clear) = (unsigned long)invalid_pte_table; + set_pte_at(mm, addr, ptep, clear); +} + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 944482063f14..3089785ca97e 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -683,7 +683,17 @@ DEF_EMIT_REG2I16_FORMAT(blt, blt_op) DEF_EMIT_REG2I16_FORMAT(bge, bge_op) DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op) DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op) -DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op) + +static inline void emit_jirl(union loongarch_instruction *insn, + enum loongarch_gpr rd, + enum loongarch_gpr rj, + int offset) +{ + insn->reg2i16_format.opcode = jirl_op; + insn->reg2i16_format.immediate = offset; + insn->reg2i16_format.rd = rd; + insn->reg2i16_format.rj = rj; +} #define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 2bf86aeda874..de21e72759ee 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -95,7 +95,7 @@ static void __init init_screen_info(void) memset(si, 0, sizeof(*si)); early_memunmap(si, sizeof(*si)); - memblock_reserve(screen_info.lfb_base, screen_info.lfb_size); + memblock_reserve(__screen_info_lfb_base(&screen_info), screen_info.lfb_size); } void __init efi_init(void) diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 3050329556d1..14d7d700bcb9 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -332,7 +332,7 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) return INSN_BREAK; } - emit_jirl(&insn, rj, rd, imm >> 2); + emit_jirl(&insn, rd, rj, imm >> 2); return insn.word; } diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 5d59e9ce2772..fbf747447f13 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -82,7 +82,7 @@ void show_ipi_list(struct seq_file *p, int prec) for (i = 0; i < NR_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, cpu).ipi_irqs[i], 10); seq_printf(p, " LoongArch %d %s\n", i + 1, ipi_types[i]); } } diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 69f3e3782cc9..a7893bd01e73 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -156,7 +156,7 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { - int ret; + int idx, ret; unsigned long *val; u32 addr, rd, rj, opcode; @@ -167,7 +167,6 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) rj = inst.reg2_format.rj; opcode = inst.reg2_format.opcode; addr = vcpu->arch.gprs[rj]; - ret = EMULATE_DO_IOCSR; run->iocsr_io.phys_addr = addr; run->iocsr_io.is_write = 0; val = &vcpu->arch.gprs[rd]; @@ -207,20 +206,28 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) } if (run->iocsr_io.is_write) { - if (!kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (ret == 0) ret = EMULATE_DONE; - else + else { + ret = EMULATE_DO_IOCSR; /* Save data and let user space to write it */ memcpy(run->iocsr_io.data, val, run->iocsr_io.len); - + } trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val); } else { - if (!kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (ret == 0) ret = EMULATE_DONE; - else + else { + ret = EMULATE_DO_IOCSR; /* Save register id for iocsr read completion */ vcpu->arch.io_gpr = rd; - + } trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL); } @@ -359,7 +366,7 @@ static int kvm_handle_gspr(struct kvm_vcpu *vcpu) int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) { - int ret; + int idx, ret; unsigned int op8, opcode, rd; struct kvm_run *run = vcpu->run; @@ -464,8 +471,10 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) * it need not return to user space to handle the mmio * exception. */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, &vcpu->arch.gprs[rd]); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) { update_pc(&vcpu->arch); vcpu->mmio_needed = 0; @@ -531,7 +540,7 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) { - int ret; + int idx, ret; unsigned int rd, op8, opcode; unsigned long curr_pc, rd_val = 0; struct kvm_run *run = vcpu->run; @@ -631,7 +640,9 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) * it need not return to user space to handle the mmio * exception. */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, data); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) return EMULATE_DONE; diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index a233a323e295..93f4acd44523 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -98,7 +98,7 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { - int i, ret; + int i, idx, ret; uint32_t val = 0, mask = 0; /* @@ -107,7 +107,9 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) */ if ((data >> 27) & 0xf) { /* Read the old val */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); return ret; @@ -121,7 +123,9 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) val &= mask; } val |= ((uint32_t)(data >> 32) & ~mask); + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index cab1818be68d..d18a4a270415 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -240,7 +240,7 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu) */ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) { - int ret; + int idx, ret; /* * Check conditions before entering the guest @@ -249,7 +249,9 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) if (ret < 0) return ret; + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_check_requests(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; } diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index dd350cba1252..ea357a3edc09 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -181,13 +181,13 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) /* Set return value */ emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0); /* Return to the caller */ - emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); } else { /* * Call the next bpf prog and skip the first instruction * of TCC initialization. */ - emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 1); } } @@ -904,7 +904,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext return ret; move_addr(ctx, t1, func_addr); - emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0); + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); break; diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index f8edc9082724..20d877cb4e30 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -503,6 +503,7 @@ CONFIG_UHID=m # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_M48T59=m CONFIG_RTC_DRV_MSM6242=m CONFIG_RTC_DRV_RP5C01=m CONFIG_RTC_DRV_GENERIC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 71fc71bb660e..5e1c8d0d3da5 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -391,6 +391,7 @@ CONFIG_UHID=m # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_M48T59=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 41072e68028e..5d1409e6a137 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -392,6 +392,7 @@ CONFIG_UHID=m # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_M48T59=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set diff --git a/arch/m68k/include/asm/mvme147hw.h b/arch/m68k/include/asm/mvme147hw.h index dbf88059e47a..6ad93bac06f9 100644 --- a/arch/m68k/include/asm/mvme147hw.h +++ b/arch/m68k/include/asm/mvme147hw.h @@ -4,24 +4,7 @@ #include <asm/irq.h> -typedef struct { - unsigned char - ctrl, - bcd_sec, - bcd_min, - bcd_hr, - bcd_dow, - bcd_dom, - bcd_mth, - bcd_year; -} MK48T02; - -#define RTC_WRITE 0x80 -#define RTC_READ 0x40 -#define RTC_STOP 0x20 - -#define m147_rtc ((MK48T02 * volatile)0xfffe07f8) - +#define MVME147_RTC_BASE 0xfffe0000 struct pcc_regs { volatile u_long dma_tadr; diff --git a/arch/m68k/include/asm/mvme16xhw.h b/arch/m68k/include/asm/mvme16xhw.h index cc7f5ae1220f..ff1126a51fbe 100644 --- a/arch/m68k/include/asm/mvme16xhw.h +++ b/arch/m68k/include/asm/mvme16xhw.h @@ -24,23 +24,7 @@ typedef struct { #define mvmelp ((*(volatile MVMElpPtr)(MVME_LPR_BASE))) -typedef struct { - unsigned char - ctrl, - bcd_sec, - bcd_min, - bcd_hr, - bcd_dow, - bcd_dom, - bcd_mth, - bcd_year; -} MK48T08_t, *MK48T08ptr_t; - -#define RTC_WRITE 0x80 -#define RTC_READ 0x40 -#define RTC_STOP 0x20 - -#define MVME_RTC_BASE 0xfffc1ff8 +#define MVME_RTC_BASE 0xfffc0000 #define MVME_I596_BASE 0xfff46000 diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 824c42a302c6..3054d3857efa 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -19,8 +19,9 @@ #include <linux/linkage.h> #include <linux/init.h> #include <linux/major.h> -#include <linux/rtc.h> #include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/rtc/m48t59.h> #include <asm/bootinfo.h> #include <asm/bootinfo-vme.h> @@ -36,13 +37,9 @@ static void mvme147_get_model(char *model); static void __init mvme147_sched_init(void); -extern int mvme147_hwclk (int, struct rtc_time *); extern void mvme147_reset (void); -static int bcd2int (unsigned char b); - - int __init mvme147_parse_bootinfo(const struct bi_record *bi) { uint16_t tag = be16_to_cpu(bi->tag); @@ -80,7 +77,6 @@ void __init config_mvme147(void) { mach_sched_init = mvme147_sched_init; mach_init_IRQ = mvme147_init_IRQ; - mach_hwclk = mvme147_hwclk; mach_reset = mvme147_reset; mach_get_model = mvme147_get_model; @@ -89,6 +85,28 @@ void __init config_mvme147(void) vme_brdtype = VME_TYPE_MVME147; } +static struct resource m48t59_rsrc[] = { + DEFINE_RES_MEM(MVME147_RTC_BASE, 0x800), +}; + +static struct m48t59_plat_data m48t59_data = { + .type = M48T59RTC_TYPE_M48T02, + .yy_offset = 70, +}; + +static int __init mvme147_platform_init(void) +{ + if (!MACH_IS_MVME147) + return 0; + + platform_device_register_resndata(NULL, "rtc-m48t59", -1, + m48t59_rsrc, ARRAY_SIZE(m48t59_rsrc), + &m48t59_data, sizeof(m48t59_data)); + return 0; +} + +arch_initcall(mvme147_platform_init); + static u64 mvme147_read_clk(struct clocksource *cs); static struct clocksource mvme147_clk = { @@ -162,31 +180,6 @@ static u64 mvme147_read_clk(struct clocksource *cs) return ticks; } -static int bcd2int (unsigned char b) -{ - return ((b>>4)*10 + (b&15)); -} - -int mvme147_hwclk(int op, struct rtc_time *t) -{ - if (!op) { - m147_rtc->ctrl = RTC_READ; - t->tm_year = bcd2int (m147_rtc->bcd_year); - t->tm_mon = bcd2int(m147_rtc->bcd_mth) - 1; - t->tm_mday = bcd2int (m147_rtc->bcd_dom); - t->tm_hour = bcd2int (m147_rtc->bcd_hr); - t->tm_min = bcd2int (m147_rtc->bcd_min); - t->tm_sec = bcd2int (m147_rtc->bcd_sec); - m147_rtc->ctrl = 0; - if (t->tm_year < 70) - t->tm_year += 100; - } else { - /* FIXME Setting the time is not yet supported */ - return -EOPNOTSUPP; - } - return 0; -} - static void scc_delay(void) { __asm__ __volatile__ ("nop; nop;"); diff --git a/arch/m68k/mvme16x/Makefile b/arch/m68k/mvme16x/Makefile index a8a368c2cbea..02f9e4ad8209 100644 --- a/arch/m68k/mvme16x/Makefile +++ b/arch/m68k/mvme16x/Makefile @@ -3,4 +3,4 @@ # Makefile for Linux arch/m68k/mvme16x source directory # -obj-y := config.o rtc.o +obj-y := config.o diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index d1fbd1704d65..99768fe8da73 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -21,9 +21,10 @@ #include <linux/linkage.h> #include <linux/init.h> #include <linux/major.h> -#include <linux/rtc.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/rtc/m48t59.h> #include <asm/bootinfo.h> #include <asm/bootinfo-vme.h> @@ -39,16 +40,10 @@ extern t_bdid mvme_bdid; -static MK48T08ptr_t volatile rtc = (MK48T08ptr_t)MVME_RTC_BASE; - static void mvme16x_get_model(char *model); extern void mvme16x_sched_init(void); -extern int mvme16x_hwclk (int, struct rtc_time *); extern void mvme16x_reset (void); -int bcd2int (unsigned char b); - - unsigned short mvme16x_config; EXPORT_SYMBOL(mvme16x_config); @@ -268,7 +263,6 @@ void __init config_mvme16x(void) mach_sched_init = mvme16x_sched_init; mach_init_IRQ = mvme16x_init_IRQ; - mach_hwclk = mvme16x_hwclk; mach_reset = mvme16x_reset; mach_get_model = mvme16x_get_model; mach_get_hardware_list = mvme16x_get_hardware_list; @@ -312,6 +306,28 @@ void __init config_mvme16x(void) } } +static struct resource m48t59_rsrc[] = { + DEFINE_RES_MEM(MVME_RTC_BASE, 0x2000), +}; + +static struct m48t59_plat_data m48t59_data = { + .type = M48T59RTC_TYPE_M48T08, + .yy_offset = 70, +}; + +static int __init mvme16x_platform_init(void) +{ + if (!MACH_IS_MVME16x) + return 0; + + platform_device_register_resndata(NULL, "rtc-m48t59", -1, + m48t59_rsrc, ARRAY_SIZE(m48t59_rsrc), + &m48t59_data, sizeof(m48t59_data)); + return 0; +} + +arch_initcall(mvme16x_platform_init); + static irqreturn_t mvme16x_abort_int (int irq, void *dev_id) { unsigned long *new = (unsigned long *)vectors; @@ -426,28 +442,3 @@ static u64 mvme16x_read_clk(struct clocksource *cs) return ticks; } - -int bcd2int (unsigned char b) -{ - return ((b>>4)*10 + (b&15)); -} - -int mvme16x_hwclk(int op, struct rtc_time *t) -{ - if (!op) { - rtc->ctrl = RTC_READ; - t->tm_year = bcd2int (rtc->bcd_year); - t->tm_mon = bcd2int(rtc->bcd_mth) - 1; - t->tm_mday = bcd2int (rtc->bcd_dom); - t->tm_hour = bcd2int (rtc->bcd_hr); - t->tm_min = bcd2int (rtc->bcd_min); - t->tm_sec = bcd2int (rtc->bcd_sec); - rtc->ctrl = 0; - if (t->tm_year < 70) - t->tm_year += 100; - } else { - /* FIXME Setting the time is not yet supported */ - return -EOPNOTSUPP; - } - return 0; -} diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c deleted file mode 100644 index ccbaae1125e6..000000000000 --- a/arch/m68k/mvme16x/rtc.c +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Real Time Clock interface for Linux on the MVME16x - * - * Based on the PC driver by Paul Gortmaker. - */ - -#define RTC_VERSION "1.00" - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/miscdevice.h> -#include <linux/ioport.h> -#include <linux/capability.h> -#include <linux/fcntl.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/rtc.h> /* For struct rtc_time and ioctls, etc */ -#include <linux/bcd.h> -#include <asm/mvme16xhw.h> - -#include <asm/io.h> -#include <linux/uaccess.h> -#include <asm/setup.h> - -/* - * We sponge a minor off of the misc major. No need slurping - * up another valuable major dev number for this. If you add - * an ioctl, make sure you don't conflict with SPARC's RTC - * ioctls. - */ - -static const unsigned char days_in_mo[] = -{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; - -static atomic_t rtc_ready = ATOMIC_INIT(1); - -static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - volatile MK48T08ptr_t rtc = (MK48T08ptr_t)MVME_RTC_BASE; - unsigned long flags; - struct rtc_time wtime; - void __user *argp = (void __user *)arg; - - switch (cmd) { - case RTC_RD_TIME: /* Read the time/date from RTC */ - { - local_irq_save(flags); - /* Ensure clock and real-time-mode-register are accessible */ - rtc->ctrl = RTC_READ; - memset(&wtime, 0, sizeof(struct rtc_time)); - wtime.tm_sec = bcd2bin(rtc->bcd_sec); - wtime.tm_min = bcd2bin(rtc->bcd_min); - wtime.tm_hour = bcd2bin(rtc->bcd_hr); - wtime.tm_mday = bcd2bin(rtc->bcd_dom); - wtime.tm_mon = bcd2bin(rtc->bcd_mth)-1; - wtime.tm_year = bcd2bin(rtc->bcd_year); - if (wtime.tm_year < 70) - wtime.tm_year += 100; - wtime.tm_wday = bcd2bin(rtc->bcd_dow)-1; - rtc->ctrl = 0; - local_irq_restore(flags); - return copy_to_user(argp, &wtime, sizeof wtime) ? - -EFAULT : 0; - } - case RTC_SET_TIME: /* Set the RTC */ - { - struct rtc_time rtc_tm; - unsigned char mon, day, hrs, min, sec, leap_yr; - unsigned int yrs; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - if (copy_from_user(&rtc_tm, argp, sizeof(struct rtc_time))) - return -EFAULT; - - yrs = rtc_tm.tm_year; - if (yrs < 1900) - yrs += 1900; - mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */ - day = rtc_tm.tm_mday; - hrs = rtc_tm.tm_hour; - min = rtc_tm.tm_min; - sec = rtc_tm.tm_sec; - - leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400)); - - if ((mon > 12) || (day == 0)) - return -EINVAL; - - if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr))) - return -EINVAL; - - if ((hrs >= 24) || (min >= 60) || (sec >= 60)) - return -EINVAL; - - if (yrs >= 2070) - return -EINVAL; - - local_irq_save(flags); - rtc->ctrl = RTC_WRITE; - - rtc->bcd_sec = bin2bcd(sec); - rtc->bcd_min = bin2bcd(min); - rtc->bcd_hr = bin2bcd(hrs); - rtc->bcd_dom = bin2bcd(day); - rtc->bcd_mth = bin2bcd(mon); - rtc->bcd_year = bin2bcd(yrs%100); - - rtc->ctrl = 0; - local_irq_restore(flags); - return 0; - } - default: - return -EINVAL; - } -} - -/* - * We enforce only one user at a time here with the open/close. - */ -static int rtc_open(struct inode *inode, struct file *file) -{ - if( !atomic_dec_and_test(&rtc_ready) ) - { - atomic_inc( &rtc_ready ); - return -EBUSY; - } - return 0; -} - -static int rtc_release(struct inode *inode, struct file *file) -{ - atomic_inc( &rtc_ready ); - return 0; -} - -/* - * The various file operations we support. - */ - -static const struct file_operations rtc_fops = { - .unlocked_ioctl = rtc_ioctl, - .open = rtc_open, - .release = rtc_release, - .llseek = noop_llseek, -}; - -static struct miscdevice rtc_dev= -{ - .minor = RTC_MINOR, - .name = "rtc", - .fops = &rtc_fops -}; - -static int __init rtc_MK48T08_init(void) -{ - if (!MACH_IS_MVME16x) - return -ENODEV; - - pr_info("MK48T08 Real Time Clock Driver v%s\n", RTC_VERSION); - return misc_register(&rtc_dev); -} -device_initcall(rtc_MK48T08_init); diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi index cce9428afc41..ee71045883e7 100644 --- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi +++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi @@ -70,7 +70,6 @@ device_type = "pci"; #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <2>; msi-parent = <&msi>; reg = <0 0x1a000000 0 0x02000000>, @@ -234,7 +233,7 @@ }; }; - pci_bridge@9,0 { + pcie@9,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -244,12 +243,16 @@ interrupts = <32 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 32 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@a,0 { + pcie@a,0 { compatible = "pci0014,7a09.1", "pci0014,7a09", "pciclass060400", @@ -259,12 +262,16 @@ interrupts = <33 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 33 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@b,0 { + pcie@b,0 { compatible = "pci0014,7a09.1", "pci0014,7a09", "pciclass060400", @@ -274,12 +281,16 @@ interrupts = <34 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 34 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@c,0 { + pcie@c,0 { compatible = "pci0014,7a09.1", "pci0014,7a09", "pciclass060400", @@ -289,12 +300,16 @@ interrupts = <35 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 35 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@d,0 { + pcie@d,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -304,12 +319,16 @@ interrupts = <36 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 36 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@e,0 { + pcie@e,0 { compatible = "pci0014,7a09.1", "pci0014,7a09", "pciclass060400", @@ -319,12 +338,16 @@ interrupts = <37 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 37 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@f,0 { + pcie@f,0 { compatible = "pci0014,7a29.1", "pci0014,7a29", "pciclass060400", @@ -334,12 +357,16 @@ interrupts = <40 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 40 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@10,0 { + pcie@10,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -349,12 +376,16 @@ interrupts = <41 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 41 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@11,0 { + pcie@11,0 { compatible = "pci0014,7a29.1", "pci0014,7a29", "pciclass060400", @@ -364,12 +395,16 @@ interrupts = <42 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 42 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@12,0 { + pcie@12,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -379,12 +414,16 @@ interrupts = <43 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 43 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@13,0 { + pcie@13,0 { compatible = "pci0014,7a29.1", "pci0014,7a29", "pciclass060400", @@ -394,12 +433,16 @@ interrupts = <38 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 38 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@14,0 { + pcie@14,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -409,9 +452,13 @@ interrupts = <39 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 39 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; }; diff --git a/arch/mips/boot/dts/mobileye/eyeq5-clocks.dtsi b/arch/mips/boot/dts/mobileye/eyeq5-clocks.dtsi deleted file mode 100644 index 17a342cc744e..000000000000 --- a/arch/mips/boot/dts/mobileye/eyeq5-clocks.dtsi +++ /dev/null @@ -1,270 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright 2023 Mobileye Vision Technologies Ltd. - */ - -#include <dt-bindings/clock/mobileye,eyeq5-clk.h> - -/ { - /* Fixed clock */ - xtal: xtal { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <30000000>; - }; - -/* PLL_CPU derivatives */ - occ_cpu: occ-cpu { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_CPU>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - si_css0_ref_clk: si-css0-ref-clk { /* gate ClkRstGen_si_css0_ref */ - compatible = "fixed-factor-clock"; - clocks = <&occ_cpu>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - cpc_clk: cpc-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - core0_clk: core0-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - core1_clk: core1-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - core2_clk: core2-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - core3_clk: core3-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - cm_clk: cm-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - mem_clk: mem-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - occ_isram: occ-isram { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_CPU>; - #clock-cells = <0>; - clock-div = <2>; - clock-mult = <1>; - }; - isram_clk: isram-clk { /* gate ClkRstGen_isram */ - compatible = "fixed-factor-clock"; - clocks = <&occ_isram>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - occ_dbu: occ-dbu { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_CPU>; - #clock-cells = <0>; - clock-div = <10>; - clock-mult = <1>; - }; - si_dbu_tp_pclk: si-dbu-tp-pclk { /* gate ClkRstGen_dbu */ - compatible = "fixed-factor-clock"; - clocks = <&occ_dbu>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; -/* PLL_VDI derivatives */ - occ_vdi: occ-vdi { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_VDI>; - #clock-cells = <0>; - clock-div = <2>; - clock-mult = <1>; - }; - vdi_clk: vdi-clk { /* gate ClkRstGen_vdi */ - compatible = "fixed-factor-clock"; - clocks = <&occ_vdi>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - occ_can_ser: occ-can-ser { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_VDI>; - #clock-cells = <0>; - clock-div = <16>; - clock-mult = <1>; - }; - can_ser_clk: can-ser-clk { /* gate ClkRstGen_can_ser */ - compatible = "fixed-factor-clock"; - clocks = <&occ_can_ser>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - i2c_ser_clk: i2c-ser-clk { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_VDI>; - #clock-cells = <0>; - clock-div = <20>; - clock-mult = <1>; - }; -/* PLL_PER derivatives */ - occ_periph: occ-periph { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <16>; - clock-mult = <1>; - }; - periph_clk: periph-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - can_clk: can-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - spi_clk: spi-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - uart_clk: uart-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - i2c_clk: i2c-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "i2c_clk"; - }; - timer_clk: timer-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "timer_clk"; - }; - gpio_clk: gpio-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "gpio_clk"; - }; - emmc_sys_clk: emmc-sys-clk { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <10>; - clock-mult = <1>; - clock-output-names = "emmc_sys_clk"; - }; - ccf_ctrl_clk: ccf-ctrl-clk { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <4>; - clock-mult = <1>; - clock-output-names = "ccf_ctrl_clk"; - }; - occ_mjpeg_core: occ-mjpeg-core { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <2>; - clock-mult = <1>; - clock-output-names = "occ_mjpeg_core"; - }; - hsm_clk: hsm-clk { /* gate ClkRstGen_hsm */ - compatible = "fixed-factor-clock"; - clocks = <&occ_mjpeg_core>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "hsm_clk"; - }; - mjpeg_core_clk: mjpeg-core-clk { /* gate ClkRstGen_mjpeg_gen */ - compatible = "fixed-factor-clock"; - clocks = <&occ_mjpeg_core>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "mjpeg_core_clk"; - }; - fcmu_a_clk: fcmu-a-clk { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <20>; - clock-mult = <1>; - clock-output-names = "fcmu_a_clk"; - }; - occ_pci_sys: occ-pci-sys { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <8>; - clock-mult = <1>; - clock-output-names = "occ_pci_sys"; - }; - pclk: pclk { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <250000000>; /* 250MHz */ - }; - tsu_clk: tsu-clk { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <125000000>; /* 125MHz */ - }; -}; diff --git a/arch/mips/boot/dts/mobileye/eyeq5.dtsi b/arch/mips/boot/dts/mobileye/eyeq5.dtsi index 0708771c193d..5d73e8320b8e 100644 --- a/arch/mips/boot/dts/mobileye/eyeq5.dtsi +++ b/arch/mips/boot/dts/mobileye/eyeq5.dtsi @@ -5,7 +5,7 @@ #include <dt-bindings/interrupt-controller/mips-gic.h> -#include "eyeq5-clocks.dtsi" +#include <dt-bindings/clock/mobileye,eyeq5-clk.h> / { #address-cells = <2>; @@ -17,7 +17,7 @@ device_type = "cpu"; compatible = "img,i6500"; reg = <0>; - clocks = <&core0_clk>; + clocks = <&olb EQ5C_CPU_CORE0>; }; }; @@ -64,6 +64,24 @@ #interrupt-cells = <1>; }; + xtal: xtal { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <30000000>; + }; + + pclk: pclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <250000000>; /* 250MHz */ + }; + + tsu_clk: tsu-clk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; /* 125MHz */ + }; + soc: soc { #address-cells = <2>; #size-cells = <2>; @@ -76,7 +94,7 @@ reg-io-width = <4>; interrupt-parent = <&gic>; interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&uart_clk>, <&occ_periph>; + clocks = <&olb EQ5C_PER_UART>, <&olb EQ5C_PER_OCC>; clock-names = "uartclk", "apb_pclk"; resets = <&olb 0 10>; pinctrl-names = "default"; @@ -89,7 +107,7 @@ reg-io-width = <4>; interrupt-parent = <&gic>; interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&uart_clk>, <&occ_periph>; + clocks = <&olb EQ5C_PER_UART>, <&olb EQ5C_PER_OCC>; clock-names = "uartclk", "apb_pclk"; resets = <&olb 0 11>; pinctrl-names = "default"; @@ -102,7 +120,7 @@ reg-io-width = <4>; interrupt-parent = <&gic>; interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&uart_clk>, <&occ_periph>; + clocks = <&olb EQ5C_PER_UART>, <&olb EQ5C_PER_OCC>; clock-names = "uartclk", "apb_pclk"; resets = <&olb 0 12>; pinctrl-names = "default"; @@ -135,7 +153,7 @@ timer { compatible = "mti,gic-timer"; interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>; - clocks = <&core0_clk>; + clocks = <&olb EQ5C_CPU_CORE0>; }; }; }; diff --git a/arch/mips/boot/dts/mobileye/eyeq6h-fixed-clocks.dtsi b/arch/mips/boot/dts/mobileye/eyeq6h-fixed-clocks.dtsi deleted file mode 100644 index 5fa99e06fde7..000000000000 --- a/arch/mips/boot/dts/mobileye/eyeq6h-fixed-clocks.dtsi +++ /dev/null @@ -1,52 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright 2023 Mobileye Vision Technologies Ltd. - */ - -#include <dt-bindings/clock/mobileye,eyeq5-clk.h> - -/ { - xtal: clock-30000000 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <30000000>; - }; - - pll_west: clock-2000000000-west { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <2000000000>; - }; - - pll_cpu: clock-2000000000-cpu { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <2000000000>; - }; - - /* pll-cpu derivatives */ - occ_cpu: clock-2000000000-occ-cpu { - compatible = "fixed-factor-clock"; - clocks = <&pll_cpu>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - - /* pll-west derivatives */ - occ_periph_w: clock-200000000 { - compatible = "fixed-factor-clock"; - clocks = <&pll_west>; - #clock-cells = <0>; - clock-div = <10>; - clock-mult = <1>; - }; - uart_clk: clock-200000000-uart { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph_w>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - -}; diff --git a/arch/mips/boot/dts/mobileye/eyeq6h.dtsi b/arch/mips/boot/dts/mobileye/eyeq6h.dtsi index 1db3c3cda2e3..4a1a43f351d3 100644 --- a/arch/mips/boot/dts/mobileye/eyeq6h.dtsi +++ b/arch/mips/boot/dts/mobileye/eyeq6h.dtsi @@ -5,7 +5,7 @@ #include <dt-bindings/interrupt-controller/mips-gic.h> -#include "eyeq6h-fixed-clocks.dtsi" +#include <dt-bindings/clock/mobileye,eyeq5-clk.h> / { #address-cells = <2>; @@ -17,7 +17,7 @@ device_type = "cpu"; compatible = "img,i6500"; reg = <0>; - clocks = <&occ_cpu>; + clocks = <&olb_central EQ6HC_CENTRAL_CPU_OCC>; }; }; @@ -32,19 +32,42 @@ #interrupt-cells = <1>; }; + xtal: clock-30000000 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <30000000>; + }; + soc: soc { compatible = "simple-bus"; #address-cells = <2>; #size-cells = <2>; ranges; + olb_acc: system-controller@d2003000 { + compatible = "mobileye,eyeq6h-acc-olb", "syscon"; + reg = <0x0 0xd2003000 0x0 0x1000>; + #reset-cells = <1>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + + olb_central: system-controller@d3100000 { + compatible = "mobileye,eyeq6h-central-olb", "syscon"; + reg = <0x0 0xd3100000 0x0 0x1000>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + uart0: serial@d3331000 { compatible = "arm,pl011", "arm,primecell"; reg = <0 0xd3331000 0x0 0x1000>; reg-io-width = <4>; interrupt-parent = <&gic>; interrupts = <GIC_SHARED 43 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&occ_periph_w>, <&occ_periph_w>; + clocks = <&olb_west EQ6HC_WEST_PER_UART>, <&olb_west EQ6HC_WEST_PER_OCC>; clock-names = "uartclk", "apb_pclk"; }; @@ -56,6 +79,15 @@ pinctrl-single,function-mask = <0xffff>; }; + olb_west: system-controller@d3338000 { + compatible = "mobileye,eyeq6h-west-olb", "syscon"; + reg = <0x0 0xd3338000 0x0 0x1000>; + #reset-cells = <1>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + pinctrl_east: pinctrl@d3357000 { compatible = "pinctrl-single"; reg = <0x0 0xd3357000 0x0 0xb0>; @@ -64,6 +96,23 @@ pinctrl-single,function-mask = <0xffff>; }; + olb_east: system-controller@d3358000 { + compatible = "mobileye,eyeq6h-east-olb", "syscon"; + reg = <0x0 0xd3358000 0x0 0x1000>; + #reset-cells = <1>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + + olb_south: system-controller@d8013000 { + compatible = "mobileye,eyeq6h-south-olb", "syscon"; + reg = <0x0 0xd8013000 0x0 0x1000>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + pinctrl_south: pinctrl@d8014000 { compatible = "pinctrl-single"; reg = <0x0 0xd8014000 0x0 0xf8>; @@ -72,6 +121,22 @@ pinctrl-single,function-mask = <0xffff>; }; + olb_ddr0: system-controller@e4080000 { + compatible = "mobileye,eyeq6h-ddr0-olb", "syscon"; + reg = <0x0 0xe4080000 0x0 0x1000>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + + olb_ddr1: system-controller@e4081000 { + compatible = "mobileye,eyeq6h-ddr1-olb", "syscon"; + reg = <0x0 0xe4081000 0x0 0x1000>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + gic: interrupt-controller@f0920000 { compatible = "mti,gic"; reg = <0x0 0xf0920000 0x0 0x20000>; @@ -89,7 +154,7 @@ timer { compatible = "mti,gic-timer"; interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>; - clocks = <&occ_cpu>; + clocks = <&olb_central EQ6HC_CENTRAL_CPU_OCC>; }; }; }; diff --git a/arch/mips/boot/dts/realtek/rtl930x.dtsi b/arch/mips/boot/dts/realtek/rtl930x.dtsi index 6a6f3f3fe389..17577457d159 100644 --- a/arch/mips/boot/dts/realtek/rtl930x.dtsi +++ b/arch/mips/boot/dts/realtek/rtl930x.dtsi @@ -61,6 +61,8 @@ }; &soc { + ranges = <0x0 0x18000000 0x20000>; + intc: interrupt-controller@3000 { compatible = "realtek,rtl9300-intc", "realtek,rtl-intc"; reg = <0x3000 0x18>, <0x3018 0x18>; @@ -88,6 +90,17 @@ interrupts = <7>, <8>, <9>, <10>, <11>; clocks = <&lx_clk>; }; + + snand: spi@1a400 { + compatible = "realtek,rtl9301-snand"; + reg = <0x1a400 0x44>; + interrupt-parent = <&intc>; + interrupts = <19>; + clocks = <&lx_clk>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; }; &uart0 { diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index b825ed4476c7..e3ff6179c99f 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -59,6 +59,7 @@ #endif .endm + __HEAD #ifndef CONFIG_NO_EXCEPT_FILL /* * Reserved space for exception handlers. diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 9ff55cb80a64..2b708fac8d2c 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -61,6 +61,7 @@ SECTIONS /* read-only */ _text = .; /* Text and read-only data */ .text : { + HEAD_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 45ddbaa6c123..dae856fb3e5b 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -749,7 +749,7 @@ static void bridge_remove(struct platform_device *pdev) static struct platform_driver bridge_driver = { .probe = bridge_probe, - .remove_new = bridge_remove, + .remove = bridge_remove, .driver = { .name = "xtalk-bridge", } diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 41489483a602..f3804103c56c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -403,10 +403,12 @@ PHONY += stack_protector_prepare stack_protector_prepare: prepare0 ifdef CONFIG_PPC64 $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls -mstack-protector-guard-reg=r13 \ - -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h)) + -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' \ + $(objtree)/include/generated/asm-offsets.h)) else $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls -mstack-protector-guard-reg=r2 \ - -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' include/generated/asm-offsets.h)) + -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' \ + $(objtree)/include/generated/asm-offsets.h)) endif endif diff --git a/arch/powerpc/crypto/vmx.c b/arch/powerpc/crypto/vmx.c index 7eb713cc87c8..0b725e826388 100644 --- a/arch/powerpc/crypto/vmx.c +++ b/arch/powerpc/crypto/vmx.c @@ -74,4 +74,4 @@ MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions " "support on Power 8"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); -MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_IMPORT_NS("CRYPTO_INTERNAL"); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 73210e5bcfa7..8e776ba39497 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2848,7 +2848,7 @@ static void __init fixup_device_tree_chrp(void) #endif #if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) -static void __init fixup_device_tree_pmac(void) +static void __init fixup_device_tree_pmac64(void) { phandle u3, i2c, mpic; u32 u3_rev; @@ -2888,7 +2888,31 @@ static void __init fixup_device_tree_pmac(void) &parent, sizeof(parent)); } #else -#define fixup_device_tree_pmac() +#define fixup_device_tree_pmac64() +#endif + +#ifdef CONFIG_PPC_PMAC +static void __init fixup_device_tree_pmac(void) +{ + __be32 val = 1; + char type[8]; + phandle node; + + // Some pmacs are missing #size-cells on escc nodes + for (node = 0; prom_next_node(&node); ) { + type[0] = '\0'; + prom_getprop(node, "device_type", type, sizeof(type)); + if (prom_strcmp(type, "escc")) + continue; + + if (prom_getproplen(node, "#size-cells") != PROM_ERROR) + continue; + + prom_setprop(node, NULL, "#size-cells", &val, sizeof(val)); + } +} +#else +static inline void fixup_device_tree_pmac(void) { } #endif #ifdef CONFIG_PPC_EFIKA @@ -3111,6 +3135,7 @@ static void __init fixup_device_tree(void) { fixup_device_tree_chrp(); fixup_device_tree_pmac(); + fixup_device_tree_pmac64(); fixup_device_tree_efika(); fixup_device_tree_pasemi(); } diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index c5d0f92c7969..384c9dc1899a 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -10,7 +10,6 @@ #include <linux/memblock.h> #include <linux/mem_encrypt.h> #include <linux/cc_platform.h> -#include <linux/mem_encrypt.h> #include <asm/machdep.h> #include <asm/svm.h> #include <asm/swiotlb.h> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index cc63aef41e94..d4a7ca0388c0 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -32,6 +32,7 @@ config RISCV select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_HW_PTE_YOUNG select ARCH_HAS_KCOV select ARCH_HAS_KERNEL_FPU_SUPPORT if 64BIT && FPU select ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 9fe1ee740dda..13fbc0f94238 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -135,7 +135,7 @@ stack_protector_prepare: prepare0 -mstack-protector-guard-reg=tp \ -mstack-protector-guard-offset=$(shell \ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \ - include/generated/asm-offsets.h)) + $(objtree)/include/generated/asm-offsets.h)) endif # arch specific predefines for sparse diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index fe5d4eb9adea..37bdea65bbd8 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -199,6 +199,7 @@ /* xENVCFG flags */ #define ENVCFG_STCE (_AC(1, ULL) << 63) #define ENVCFG_PBMTE (_AC(1, ULL) << 62) +#define ENVCFG_ADUE (_AC(1, ULL) << 61) #define ENVCFG_PMM (_AC(0x3, ULL) << 32) #define ENVCFG_PMM_PMLEN_0 (_AC(0x0, ULL) << 32) #define ENVCFG_PMM_PMLEN_7 (_AC(0x2, ULL) << 32) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 08d2a5697466..869da082252a 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -98,6 +98,8 @@ #define RISCV_ISA_EXT_SSNPM 89 #define RISCV_ISA_EXT_ZABHA 90 #define RISCV_ISA_EXT_ZICCRSE 91 +#define RISCV_ISA_EXT_SVADE 92 +#define RISCV_ISA_EXT_SVADU 93 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 5d7f3e8c2e50..d4e99eef90ac 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -113,6 +113,7 @@ #include <asm/tlbflush.h> #include <linux/mm_types.h> #include <asm/compat.h> +#include <asm/cpufeature.h> #define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT) @@ -284,7 +285,6 @@ static inline pte_t pud_pte(pud_t pud) } #ifdef CONFIG_RISCV_ISA_SVNAPOT -#include <asm/cpufeature.h> static __always_inline bool has_svnapot(void) { @@ -656,6 +656,17 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) } /* + * Both Svade and Svadu control the hardware behavior when the PTE A/D bits need to be set. By + * default the M-mode firmware enables the hardware updating scheme when only Svadu is present in + * DT. + */ +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) +{ + return riscv_has_extension_unlikely(RISCV_ISA_EXT_SVADU); +} + +/* * THP functions */ static inline pmd_t pte_pmd(pte_t pte) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 4f24201376b1..3482c9a73d1b 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -177,6 +177,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZAWRS, KVM_RISCV_ISA_EXT_SMNPM, KVM_RISCV_ISA_EXT_SSNPM, + KVM_RISCV_ISA_EXT_SVADE, + KVM_RISCV_ISA_EXT_SVADU, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 467c5c735bf5..c0916ed318c2 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -136,6 +136,16 @@ static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, return -EPROBE_DEFER; } +static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + /* SVADE has already been detected, use SVADE only */ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_SVADE)) + return -EOPNOTSUPP; + + return 0; +} + static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, RISCV_ISA_EXT_ZBKC, @@ -387,6 +397,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_SUPERSET(ssnpm, RISCV_ISA_EXT_SSNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svade, RISCV_ISA_EXT_SVADE), + __RISCV_ISA_EXT_DATA_VALIDATE(svadu, RISCV_ISA_EXT_SVADU, riscv_ext_svadu_validate), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index dc3f76f6e46c..e048dcc6e65e 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -551,6 +551,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, ZICBOZ)) cfg->henvcfg |= ENVCFG_CBZE; + if (riscv_isa_extension_available(isa, SVADU) && + !riscv_isa_extension_available(isa, SVADE)) + cfg->henvcfg |= ENVCFG_ADUE; + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; if (riscv_isa_extension_available(isa, SSAIA)) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 5b68490ad9b7..753f66c8b70a 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -15,6 +15,7 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/kvm_vcpu_vector.h> +#include <asm/pgtable.h> #include <asm/vector.h> #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) @@ -40,6 +41,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSNPM), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), @@ -112,6 +115,12 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SSCOFPMF: /* Sscofpmf depends on interrupt filtering defined in ssaia */ return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); + case KVM_RISCV_ISA_EXT_SVADU: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Guest OS can use Svadu only when host OS enable Svadu. + */ + return arch_has_hw_pte_young(); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -185,6 +194,12 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) /* Extensions which can be disabled using Smstateen */ case KVM_RISCV_ISA_EXT_SSAIA: return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); + case KVM_RISCV_ISA_EXT_SVADE: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Svade is not allowed to disable when the platform use Svade. + */ + return arch_has_hw_pte_young(); default: break; } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c64b2987d108..0077969170e8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -87,6 +87,7 @@ config S390 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS + select ARCH_HAS_PREEMPT_LAZY select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_DIRECT_MAP @@ -218,6 +219,7 @@ config S390 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RETHOOK diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 8cc02d6e0d0f..9c46b1b630b1 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -1168,4 +1168,4 @@ MODULE_ALIAS_CRYPTO("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); -MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_IMPORT_NS("CRYPTO_INTERNAL"); diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index ccd4e148b5ed..a7f7bdc9e19c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -66,14 +66,15 @@ typedef int (debug_header_proc_t) (debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, - char *out_buf); + char *out_buf, size_t out_buf_size); typedef int (debug_format_proc_t) (debug_info_t *id, struct debug_view *view, char *out_buf, + size_t out_buf_size, const char *in_buf); typedef int (debug_prolog_proc_t) (debug_info_t *id, struct debug_view *view, - char *out_buf); + char *out_buf, size_t out_buf_size); typedef int (debug_input_proc_t) (debug_info_t *id, struct debug_view *view, struct file *file, @@ -81,7 +82,8 @@ typedef int (debug_input_proc_t) (debug_info_t *id, size_t in_buf_size, loff_t *offset); int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf); + int area, debug_entry_t *entry, + char *out_buf, size_t out_buf_size); struct debug_view { char name[DEBUG_MAX_NAME_LEN]; diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 64761c78f774..13f51a6a5bb1 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -17,8 +17,8 @@ #define GMAP_NOTIFY_MPROT 0x1 /* Status bits only for huge segment entries */ -#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ -#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */ +#define _SEGMENT_ENTRY_GMAP_IN 0x0800 /* invalidation notify bit */ +#define _SEGMENT_ENTRY_GMAP_UC 0x0002 /* dirty (migration) */ /** * struct gmap_struct - guest address space diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 6f815d4ba0ca..a40664b236e9 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -10,6 +10,8 @@ #define _ASM_S390_HUGETLB_H #include <linux/pgtable.h> +#include <linux/swap.h> +#include <linux/swapops.h> #include <asm/page.h> #define hugepages_supported() (MACHINE_HAS_EDAT1) @@ -78,7 +80,7 @@ static inline int huge_pte_none(pte_t pte) #define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY static inline int huge_pte_none_mostly(pte_t pte) { - return huge_pte_none(pte); + return huge_pte_none(pte) || is_pte_marker(pte); } #define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 1cd8eaebd3c0..97c7c8127543 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -94,11 +94,16 @@ union ipte_control { }; }; +/* + * Utility is defined as two bytes but having it four bytes wide + * generates more efficient code. Since the following bytes are + * reserved this makes no functional difference. + */ union sca_utility { - __u16 val; + __u32 val; struct { - __u16 mtcr : 1; - __u16 reserved : 15; + __u32 mtcr : 1; + __u32 : 31; }; }; @@ -107,7 +112,7 @@ struct bsca_block { __u64 reserved[5]; __u64 mcn; union sca_utility utility; - __u8 reserved2[6]; + __u8 reserved2[4]; struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; }; @@ -115,7 +120,7 @@ struct esca_block { union ipte_control ipte_control; __u64 reserved1[6]; union sca_utility utility; - __u8 reserved2[6]; + __u8 reserved2[4]; __u64 mcn[4]; __u64 reserved3[20]; struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 8b67036edb69..48268095b0a3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -277,7 +277,8 @@ static inline int is_module_addr(void *addr) #define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) #define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) -#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) +#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \ + _REGION3_ENTRY_PRESENT) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_HARDWARE_BITS 0xfffffffffffff6ffUL @@ -285,18 +286,27 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */ #define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ #define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ +#define _REGION3_ENTRY_COMM 0x0010 /* Common-Region, marks swap entry */ #define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ -#define _REGION3_ENTRY_WRITE 0x0002 /* SW region write bit */ -#define _REGION3_ENTRY_READ 0x0001 /* SW region read bit */ +#define _REGION3_ENTRY_WRITE 0x8000 /* SW region write bit */ +#define _REGION3_ENTRY_READ 0x4000 /* SW region read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */ +#define _REGION3_ENTRY_SOFT_DIRTY 0x0002 /* SW region soft dirty bit */ #else #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ #endif #define _REGION_ENTRY_BITS 0xfffffffffffff22fUL +/* + * SW region present bit. For non-leaf region-third-table entries, bits 62-63 + * indicate the TABLE LENGTH and both must be set to 1. But such entries + * would always be considered as present, so it is safe to use bit 63 as + * PRESENT bit for PUD. + */ +#define _REGION3_ENTRY_PRESENT 0x0001 + /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe3fUL #define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe3cUL @@ -308,21 +318,29 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */ -#define _SEGMENT_ENTRY (0) +#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PRESENT) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ + +#define _SEGMENT_ENTRY_COMM 0x0010 /* Common-Segment, marks swap entry */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */ -#define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */ +#define _SEGMENT_ENTRY_WRITE 0x8000 /* SW segment write bit */ +#define _SEGMENT_ENTRY_READ 0x4000 /* SW segment read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */ +#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0002 /* SW segment soft dirty bit */ #else #define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */ #endif +#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */ + +/* Common bits in region and segment table entries, for swap entries */ +#define _RST_ENTRY_COMM 0x0010 /* Common-Region/Segment, marks swap entry */ +#define _RST_ENTRY_INVALID 0x0020 /* invalid region/segment table entry */ + #define _CRST_ENTRIES 2048 /* number of region/segment table entries */ #define _PAGE_ENTRIES 256 /* number of page table entries */ @@ -454,17 +472,22 @@ static inline int is_module_addr(void *addr) /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \ +#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \ +#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ) -#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \ +#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \ +#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) #define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ @@ -491,6 +514,7 @@ static inline int is_module_addr(void *addr) */ #define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_WRITE | \ @@ -498,12 +522,14 @@ static inline int is_module_addr(void *addr) _REGION3_ENTRY_DIRTY | \ _REGION_ENTRY_NOEXEC) #define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_YOUNG | \ _REGION_ENTRY_PROTECT | \ _REGION_ENTRY_NOEXEC) #define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_WRITE | \ @@ -746,7 +772,7 @@ static inline int pud_present(pud_t pud) { if (pud_folded(pud)) return 1; - return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; + return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0; } static inline int pud_none(pud_t pud) @@ -761,13 +787,18 @@ static inline bool pud_leaf(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) return 0; - return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); + return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0); +} + +static inline int pmd_present(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0; } #define pmd_leaf pmd_leaf static inline bool pmd_leaf(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; + return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0); } static inline int pmd_bad(pmd_t pmd) @@ -799,11 +830,6 @@ static inline int p4d_bad(p4d_t p4d) return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0; } -static inline int pmd_present(pmd_t pmd) -{ - return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY; -} - static inline int pmd_none(pmd_t pmd) { return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY; @@ -1851,7 +1877,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, static inline int pmd_trans_huge(pmd_t pmd) { - return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; + return pmd_leaf(pmd); } #define has_transparent_hugepage has_transparent_hugepage @@ -1911,6 +1937,53 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +/* + * 64 bit swap entry format for REGION3 and SEGMENT table entries (RSTE) + * Bits 59 and 63 are used to indicate the swap entry. Bit 58 marks the rste + * as invalid. + * A swap entry is indicated by bit pattern (rste & 0x011) == 0x010 + * | offset |Xtype |11TT|S0| + * |0000000000111111111122222222223333333333444444444455|555555|5566|66| + * |0123456789012345678901234567890123456789012345678901|234567|8901|23| + * + * Bits 0-51 store the offset. + * Bits 53-57 store the type. + * Bit 62 (S) is used for softdirty tracking. + * Bits 60-61 (TT) indicate the table type: 0x01 for REGION3 and 0x00 for SEGMENT. + * Bit 52 (X) is unused. + */ + +#define __SWP_OFFSET_MASK_RSTE ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT_RSTE 12 +#define __SWP_TYPE_MASK_RSTE ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT_RSTE 6 + +/* + * TT bits set to 0x00 == SEGMENT. For REGION3 entries, caller must add R3 + * bits 0x01. See also __set_huge_pte_at(). + */ +static inline unsigned long mk_swap_rste(unsigned long type, unsigned long offset) +{ + unsigned long rste; + + rste = _RST_ENTRY_INVALID | _RST_ENTRY_COMM; + rste |= (offset & __SWP_OFFSET_MASK_RSTE) << __SWP_OFFSET_SHIFT_RSTE; + rste |= (type & __SWP_TYPE_MASK_RSTE) << __SWP_TYPE_SHIFT_RSTE; + return rste; +} + +static inline unsigned long __swp_type_rste(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT_RSTE) & __SWP_TYPE_MASK_RSTE; +} + +static inline unsigned long __swp_offset_rste(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT_RSTE) & __SWP_OFFSET_MASK_RSTE; +} + +#define __rste_to_swp_entry(rste) ((swp_entry_t) { rste }) + extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 0cde7e240373..2c29bdf12127 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -130,10 +130,24 @@ static __always_inline bool should_resched(int preempt_offset) #define init_idle_preempt_count(p, cpu) do { } while (0) #ifdef CONFIG_PREEMPTION -extern void preempt_schedule(void); -#define __preempt_schedule() preempt_schedule() -extern void preempt_schedule_notrace(void); -#define __preempt_schedule_notrace() preempt_schedule_notrace() + +void preempt_schedule(void); +void preempt_schedule_notrace(void); + +#ifdef CONFIG_PREEMPT_DYNAMIC + +void dynamic_preempt_schedule(void); +void dynamic_preempt_schedule_notrace(void); +#define __preempt_schedule() dynamic_preempt_schedule() +#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace() + +#else /* CONFIG_PREEMPT_DYNAMIC */ + +#define __preempt_schedule() preempt_schedule() +#define __preempt_schedule_notrace() preempt_schedule_notrace() + +#endif /* CONFIG_PREEMPT_DYNAMIC */ + #endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index ac868a9bb0d1..f87dd0a84855 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -82,9 +82,10 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) kcsan_release(); asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */ - " sth %1,%0\n" - : "=R" (((unsigned short *) &lp->lock)[1]) - : "d" (0) : "cc", "memory"); + " mvhhi %[lock],0\n" + : [lock] "=Q" (((unsigned short *)&lp->lock)[1]) + : + : "memory"); } /* diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 00ac01874a12..c33f7144d1b9 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -61,44 +61,45 @@ void arch_setup_new_exec(void); /* * thread information flags bit numbers */ -/* _TIF_WORK bits */ #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_UPROBE 3 /* breakpointed or single-stepping */ -#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */ +#define TIF_UPROBE 4 /* breakpointed or single-stepping */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ +#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ #define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ - #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 19 /* This task is single stepped */ #define TIF_BLOCK_STEP 20 /* This task is block stepped */ #define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ - -/* _TIF_TRACE bits */ #define TIF_SYSCALL_TRACE 24 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */ #define TIF_SECCOMP 26 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ #define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) -#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_SIGPENDING BIT(TIF_SIGPENDING) #define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) +#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE BIT(TIF_UPROBE) -#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) +#define _TIF_PGSTE BIT(TIF_PGSTE) +#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) +#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) #define _TIF_PER_TRAP BIT(TIF_PER_TRAP) - #define _TIF_31BIT BIT(TIF_31BIT) +#define _TIF_MEMDIE BIT(TIF_MEMDIE) +#define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK) #define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) - +#define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP) +#define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP) #define _TIF_SYSCALL_TRACE BIT(TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT BIT(TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP BIT(TIF_SECCOMP) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index a6e2cd89b609..9dfd46dd03c6 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -46,11 +46,6 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) { unsigned long gmap_asce; - /* - * If the machine has IDTE we prefer to do a per mm flush - * on all cpus instead of doing a local flush if the mm - * only ran on the local cpu. - */ preempt_disable(); atomic_inc(&mm->context.flush_count); /* Reset TLB flush mask */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index b3f2103694e4..de19fd8a6a95 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -77,12 +77,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_prolog_pages_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); @@ -90,9 +92,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf); + char *out_buf, size_t out_buf_size, + const char *in_buf); static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf); + char *out_buf, size_t out_buf_size, + const char *inbuf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -391,8 +395,10 @@ static int debug_format_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { /* print prolog */ - if (view->prolog_proc) - len += view->prolog_proc(id_snap, view, p_info->temp_buf); + if (view->prolog_proc) { + len += view->prolog_proc(id_snap, view, p_info->temp_buf, + sizeof(p_info->temp_buf)); + } goto out; } if (!id_snap->areas) /* this is true, if we have a prolog only view */ @@ -402,12 +408,16 @@ static int debug_format_entry(file_private_info_t *p_info) if (act_entry->clock == 0LL) goto out; /* empty entry */ - if (view->header_proc) + if (view->header_proc) { len += view->header_proc(id_snap, view, p_info->act_area, - act_entry, p_info->temp_buf + len); - if (view->format_proc) + act_entry, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len); + } + if (view->format_proc) { len += view->format_proc(id_snap, view, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len, DEBUG_DATA(act_entry)); + } out: return len; } @@ -1292,9 +1302,9 @@ static inline int debug_get_uint(char *buf) */ static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { - return sprintf(out_buf, "%i\n", id->pages_per_area); + return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area); } /* @@ -1341,14 +1351,14 @@ out: * prints out actual debug level */ static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { int rc = 0; if (id->level == DEBUG_OFF_LEVEL) - rc = sprintf(out_buf, "-\n"); + rc = scnprintf(out_buf, out_buf_size, "-\n"); else - rc = sprintf(out_buf, "%i\n", id->level); + rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level); return rc; } @@ -1465,22 +1475,24 @@ out: * prints debug data in hex/ascii format */ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf) + char *out_buf, size_t out_buf_size, const char *in_buf) { int i, rc = 0; - for (i = 0; i < id->buf_size; i++) - rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]); - rc += sprintf(out_buf + rc, "| "); + for (i = 0; i < id->buf_size; i++) { + rc += scnprintf(out_buf + rc, out_buf_size - rc, + "%02x ", ((unsigned char *)in_buf)[i]); + } + rc += scnprintf(out_buf + rc, out_buf_size - rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; if (isascii(c) && isprint(c)) - rc += sprintf(out_buf + rc, "%c", c); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c); else - rc += sprintf(out_buf + rc, "."); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "."); } - rc += sprintf(out_buf + rc, "\n"); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n"); return rc; } @@ -1488,7 +1500,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, * prints header for debug entry */ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf) + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) { unsigned long sec, usec; unsigned long caller; @@ -1505,9 +1518,9 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ", - area, sec, usec, level, except_str, - entry->cpu, (void *)caller); + rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ", + area, sec, usec, level, except_str, + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); @@ -1520,7 +1533,7 @@ EXPORT_SYMBOL(debug_dflt_header_fn); #define DEBUG_SPRINTF_MAX_ARGS 10 static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf) + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1533,8 +1546,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, goto out; /* bufsize of entry too small */ if (num_longs == 1) { /* no args, we use only the string */ - strcpy(out_buf, curr_event->string); - rc = strlen(curr_event->string); + rc = strscpy(out_buf, curr_event->string, out_buf_size); + if (rc == -E2BIG) + rc = out_buf_size; goto out; } @@ -1546,12 +1560,13 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, for (i = 0; i < num_used_args; i++) index[i] = i; - rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], - curr_event->args[index[1]], curr_event->args[index[2]], - curr_event->args[index[3]], curr_event->args[index[4]], - curr_event->args[index[5]], curr_event->args[index[6]], - curr_event->args[index[7]], curr_event->args[index[8]], - curr_event->args[index[9]]); + rc = scnprintf(out_buf, out_buf_size, + curr_event->string, curr_event->args[index[0]], + curr_event->args[index[1]], curr_event->args[index[2]], + curr_event->args[index[3]], curr_event->args[index[4]], + curr_event->args[index[5]], curr_event->args[index[6]], + curr_event->args[index[7]], curr_event->args[index[8]], + curr_event->args[index[9]]); out: return rc; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1ff13239d4e5..960c08700cf6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -430,9 +430,13 @@ SYM_CODE_START(\name) SYM_CODE_END(\name) .endm + .section .irqentry.text, "ax" + INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq + .section .kprobes.text, "ax" + /* * Machine check handler routines */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 6295faf0987d..8b80ea57125f 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -489,6 +489,12 @@ int __init arch_init_kprobes(void) return 0; } +int __init arch_populate_kprobe_blacklist(void) +{ + return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); +} + int arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0cde42f8af6e..1e99514fb7ae 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -180,39 +180,27 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) */ static void free_sampling_buffer(struct sf_buffer *sfb) { - unsigned long *sdbt, *curr; - - if (!sfb->sdbt) - return; + unsigned long *sdbt, *curr, *head; sdbt = sfb->sdbt; - curr = sdbt; - + if (!sdbt) + return; + sfb->sdbt = NULL; /* Free the SDBT after all SDBs are processed... */ - while (1) { - if (!*curr || !sdbt) - break; - - /* Process table-link entries */ + head = sdbt; + curr = sdbt; + do { if (is_link_entry(curr)) { + /* Process table-link entries */ curr = get_next_sdbt(curr); - if (sdbt) - free_page((unsigned long)sdbt); - - /* If the origin is reached, sampling buffer is freed */ - if (curr == sfb->sdbt) - break; - else - sdbt = curr; + free_page((unsigned long)sdbt); + sdbt = curr; } else { /* Process SDB pointer */ - if (*curr) { - free_page((unsigned long)phys_to_virt(*curr)); - curr++; - } + free_page((unsigned long)phys_to_virt(*curr)); + curr++; } - } - + } while (curr != head); memset(sfb, 0, sizeof(*sfb)); } diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 9f59837d159e..40edfde25f5b 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -151,7 +151,7 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo break; } if (!store_ip(consume_entry, cookie, entry, perf, ip)) - return; + break; first = false; } pagefault_enable(); diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a688351f4ab5..9816b0060fbe 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm) retry: read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); if (old.k) { read_unlock(&kvm->arch.sca_lock); cond_resched(); @@ -138,7 +138,7 @@ retry: } new = old; new.k = 1; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); out: mutex_unlock(&kvm->arch.ipte_mutex); @@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm) goto out; read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); new = old; new.k = 0; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); wake_up(&kvm->arch.ipte_wq); out: @@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm) retry: read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); if (old.kg) { read_unlock(&kvm->arch.sca_lock); cond_resched(); @@ -182,7 +182,7 @@ retry: new = old; new.k = 1; new.kh++; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); } @@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm) read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); new = old; new.kh--; if (!new.kh) new.k = 0; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); if (!new.kh) wake_up(&kvm->arch.ipte_wq); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 4f0e7f61edf7..ea8dce299954 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) static void sca_clear_ext_call(struct kvm_vcpu *vcpu) { - int rc, expect; - if (!kvm_s390_use_sca_entries()) return; kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND); @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl old; - old = READ_ONCE(*sigp_ctrl); - expect = old.value; - rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + WRITE_ONCE(sigp_ctrl->value, 0); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl old; - old = READ_ONCE(*sigp_ctrl); - expect = old.value; - rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + WRITE_ONCE(sigp_ctrl->value, 0); } read_unlock(&vcpu->kvm->arch.sca_lock); - WARN_ON(rc != expect); /* cannot clear? */ } int psw_extint_disabled(struct kvm_vcpu *vcpu) @@ -247,12 +238,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam) { u64 word, _word; + word = READ_ONCE(gisa->u64.word[0]); do { - word = READ_ONCE(gisa->u64.word[0]); if ((u64)gisa != word >> 32) return -EBUSY; _word = (word & ~0xffUL) | iam; - } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word)); return 0; } @@ -270,10 +261,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa) { u64 word, _word; + word = READ_ONCE(gisa->u64.word[0]); do { - word = READ_ONCE(gisa->u64.word[0]); _word = word & ~(0xffUL << 24); - } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word)); } /** @@ -291,14 +282,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) u8 pending_mask, alert_mask; u64 word, _word; + word = READ_ONCE(gi->origin->u64.word[0]); do { - word = READ_ONCE(gi->origin->u64.word[0]); alert_mask = READ_ONCE(gi->alert.mask); pending_mask = (u8)(word >> 24) & alert_mask; if (pending_mask) return pending_mask; _word = (word & ~0xffUL) | alert_mask; - } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word)); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 442d4a227c0e..d8080c27d45b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) read_lock(&kvm->arch.sca_lock); sca = kvm->arch.sca; + old = READ_ONCE(sca->utility); do { - old = READ_ONCE(sca->utility); new = old; new.mtcr = val; - } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); } diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index a61518b549f0..9b9e7fdd5380 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages) page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + cur_pages = atomic_long_read(&user->locked_vm); do { - cur_pages = atomic_long_read(&user->locked_vm); new_pages = cur_pages + nr_pages; if (new_pages > page_limit) return -ENOMEM; - } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, - new_pages) != cur_pages); + } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages)); atomic64_add(nr_pages, ¤t->mm->pinned_vm); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 09d735010ee1..a81a01c44927 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -15,6 +15,7 @@ #include <linux/percpu.h> #include <linux/io.h> #include <asm/alternative.h> +#include <asm/asm.h> int spin_retry = -1; @@ -76,24 +77,43 @@ static inline int arch_load_niai4(int *lock) asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ - " l %0,%1\n" - : "=d" (owner) : "Q" (*lock) : "memory"); + " l %[owner],%[lock]\n" + : [owner] "=d" (owner) : [lock] "R" (*lock) : "memory"); return owner; } -static inline int arch_cmpxchg_niai8(int *lock, int old, int new) +#ifdef __HAVE_ASM_FLAG_OUTPUTS__ + +static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) +{ + int cc; + + asm_inline volatile( + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ + " cs %[old],%[new],%[lock]\n" + : [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc) + : [new] "d" (new) + : "memory"); + return cc == 0; +} + +#else /* __HAVE_ASM_FLAG_OUTPUTS__ */ + +static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) { int expected = old; asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ - " cs %0,%3,%1\n" - : "=d" (old), "=Q" (*lock) - : "0" (old), "d" (new), "Q" (*lock) + " cs %[old],%[new],%[lock]\n" + : [old] "+d" (old), [lock] "+Q" (*lock) + : [new] "d" (new) : "cc", "memory"); return expected == old; } +#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */ + static inline struct spin_wait *arch_spin_decode_tail(int lock) { int ix, cpu; @@ -226,7 +246,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) /* Try to get the lock if it is free. */ if (!owner) { new = (old & _Q_TAIL_MASK) | lockval; - if (arch_cmpxchg_niai8(&lp->lock, old, new)) { + if (arch_try_cmpxchg_niai8(&lp->lock, old, new)) { /* Got the lock */ return; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 646326fa0fad..9b681f74dccc 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -338,7 +338,8 @@ done: handle_fault_error_nolock(regs, 0); else do_sigsegv(regs, SEGV_MAPERR); - } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | + VM_FAULT_HWPOISON_LARGE)) { if (!user_mode(regs)) handle_fault_error_nolock(regs, 0); else diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 643e47bfaddc..16b8a36c56de 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -587,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) - | _SEGMENT_ENTRY_GMAP_UC; + | _SEGMENT_ENTRY_GMAP_UC + | _SEGMENT_ENTRY; } else *table = pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS; @@ -2396,7 +2397,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (purge) __pmdp_csp(pmdp); set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); @@ -2450,7 +2452,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); @@ -2485,7 +2488,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 7c79cf1bc7d7..d9ce199953de 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -24,6 +24,7 @@ static inline unsigned long __pte_to_rste(pte_t pte) { + swp_entry_t arch_entry; unsigned long rste; /* @@ -48,6 +49,7 @@ static inline unsigned long __pte_to_rste(pte_t pte) */ if (pte_present(pte)) { rste = pte_val(pte) & PAGE_MASK; + rste |= _SEGMENT_ENTRY_PRESENT; rste |= move_set_bit(pte_val(pte), _PAGE_READ, _SEGMENT_ENTRY_READ); rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, @@ -66,6 +68,10 @@ static inline unsigned long __pte_to_rste(pte_t pte) #endif rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, _SEGMENT_ENTRY_NOEXEC); + } else if (!pte_none(pte)) { + /* swap pte */ + arch_entry = __pte_to_swp_entry(pte); + rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry)); } else rste = _SEGMENT_ENTRY_EMPTY; return rste; @@ -73,13 +79,18 @@ static inline unsigned long __pte_to_rste(pte_t pte) static inline pte_t __rste_to_pte(unsigned long rste) { + swp_entry_t arch_entry; unsigned long pteval; - int present; + int present, none; + pte_t pte; - if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { present = pud_present(__pud(rste)); - else + none = pud_none(__pud(rste)); + } else { present = pmd_present(__pmd(rste)); + none = pmd_none(__pmd(rste)); + } /* * Convert encoding pmd / pud bits pte bits @@ -114,6 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste) pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY); #endif pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); + } else if (!none) { + /* swap rste */ + arch_entry = __rste_to_swp_entry(rste); + pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry)); + pteval = pte_val(pte); } else pteval = _PAGE_INVALID; return __pte(pteval); @@ -148,8 +164,6 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long rste; rste = __pte_to_rste(pte); - if (!MACHINE_HAS_NX) - rste &= ~_SEGMENT_ENTRY_NOEXEC; /* Set correct table type for 2G hugepages */ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { @@ -223,11 +237,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm, p4dp = p4d_offset(pgdp, addr); if (p4d_present(*p4dp)) { pudp = pud_offset(p4dp, addr); - if (pud_present(*pudp)) { - if (pud_leaf(*pudp)) - return (pte_t *) pudp; + if (sz == PUD_SIZE) + return (pte_t *)pudp; + if (pud_present(*pudp)) pmdp = pmd_offset(pudp, addr); - } } } return (pte_t *) pmdp; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index cbff587dc4e3..88f72745fa59 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -779,8 +779,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) * @fh: Current Function Handle of the device to be created * @state: Initial state after creation either Standby or Configured * - * Creates a new zpci device and adds it to its, possibly newly created, zbus - * as well as zpci_list. + * Allocates a new struct zpci_dev and queries the platform for its details. + * If successful the device can subsequently be added to the zPCI subsystem + * using zpci_add_device(). * * Returns: the zdev on success or an error pointer otherwise */ @@ -803,7 +804,6 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state) goto error; zdev->state = state; - kref_init(&zdev->kref); mutex_init(&zdev->state_lock); mutex_init(&zdev->fmb_lock); mutex_init(&zdev->kzdev_lock); @@ -816,6 +816,17 @@ error: return ERR_PTR(rc); } +/** + * zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem + * @zdev: The zPCI device to be added + * + * A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating + * a new one as necessary. A hotplug slot is created and events start to be handled. + * If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used. + * If adding the struct zpci_dev fails the device was not added and should be freed. + * + * Return: 0 on success, or an error code otherwise + */ int zpci_add_device(struct zpci_dev *zdev) { int rc; @@ -829,6 +840,7 @@ int zpci_add_device(struct zpci_dev *zdev) if (rc) goto error_destroy_iommu; + kref_init(&zdev->kref); spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); @@ -928,10 +940,8 @@ void zpci_device_reserved(struct zpci_dev *zdev) void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); - int ret; - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); + WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); @@ -939,28 +949,14 @@ void zpci_release_device(struct kref *kref) if (zdev_enabled(zdev)) zpci_disable_device(zdev); - switch (zdev->state) { - case ZPCI_FN_STATE_CONFIGURED: - ret = sclp_pci_deconfigure(zdev->fid); - zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); - fallthrough; - case ZPCI_FN_STATE_STANDBY: - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); - zpci_dbg(3, "rsv fid:%x\n", zdev->fid); - fallthrough; - case ZPCI_FN_STATE_RESERVED: - if (zdev->has_resources) - zpci_cleanup_bus_resources(zdev); - zpci_bus_device_unregister(zdev); - zpci_destroy_iommu(zdev); - fallthrough; - default: - break; - } + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + + if (zdev->has_resources) + zpci_cleanup_bus_resources(zdev); + + zpci_bus_device_unregister(zdev); + zpci_destroy_iommu(zdev); zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree_rcu(zdev, rcu); } @@ -1121,7 +1117,8 @@ static void zpci_add_devices(struct list_head *scan_list) list_sort(NULL, scan_list, &zpci_cmp_rid); list_for_each_entry_safe(zdev, tmp, scan_list, entry) { list_del_init(&zdev->entry); - zpci_add_device(zdev); + if (zpci_add_device(zdev)) + kfree(zdev); } } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 47f934f4e828..7f7b732b3f3e 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -340,7 +340,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); if (IS_ERR(zdev)) break; - zpci_add_device(zdev); + if (zpci_add_device(zdev)) { + kfree(zdev); + break; + } } else { /* the configuration request may be stale */ if (zdev->state != ZPCI_FN_STATE_STANDBY) @@ -354,7 +357,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); if (IS_ERR(zdev)) break; - zpci_add_device(zdev); + if (zpci_add_device(zdev)) { + kfree(zdev); + break; + } } else { zpci_update_fh(zdev, ccdf->fh); } diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c index 1dea43381b5a..2b51ad9d5586 100644 --- a/arch/sh/drivers/push-switch.c +++ b/arch/sh/drivers/push-switch.c @@ -110,7 +110,7 @@ static void switch_drv_remove(struct platform_device *pdev) static struct platform_driver switch_driver = { .probe = switch_drv_probe, - .remove_new = switch_drv_remove, + .remove = switch_drv_remove, .driver = { .name = DRV_NAME, }, diff --git a/arch/sh/kernel/cpu/proc.c b/arch/sh/kernel/cpu/proc.c index a306bcd6b341..5f6d0e827bae 100644 --- a/arch/sh/kernel/cpu/proc.c +++ b/arch/sh/kernel/cpu/proc.c @@ -132,7 +132,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + return *pos < nr_cpu_ids ? cpu_data + *pos : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 757451c3ea1d..0400078076e5 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -29,7 +29,7 @@ UTS_MACHINE := sparc # versions of gcc. Some gcc versions won't pass -Av8 to binutils when you # give -mcpu=v8. This silently worked with older bintutils versions but # does not any more. -KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 +KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu $(call cc-option,-fcall-used-g5) $(call cc-option,-fcall-used-g7) KBUILD_CFLAGS += -Wa,-Av8 KBUILD_AFLAGS += -m32 -Wa,-Av8 @@ -45,7 +45,7 @@ export BITS := 64 UTS_MACHINE := sparc64 KBUILD_CFLAGS += -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare +KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 $(call cc-option,-fcall-used-g7) -Wno-sign-compare KBUILD_CFLAGS += -Wa,--undeclared-regs KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3) KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs diff --git a/arch/sparc/include/asm/hvtramp.h b/arch/sparc/include/asm/hvtramp.h index 688ea43af0f5..ce2453ea4f2b 100644 --- a/arch/sparc/include/asm/hvtramp.h +++ b/arch/sparc/include/asm/hvtramp.h @@ -17,7 +17,7 @@ struct hvtramp_descr { __u64 fault_info_va; __u64 fault_info_pa; __u64 thread_reg; - struct hvtramp_mapping maps[1]; + struct hvtramp_mapping maps[]; }; void hv_cpu_startup(unsigned long hvdescr_pa); diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h index 4f530a270760..3068809ef9ad 100644 --- a/arch/sparc/include/asm/parport_64.h +++ b/arch/sparc/include/asm/parport_64.h @@ -243,7 +243,7 @@ static struct platform_driver ecpp_driver = { .of_match_table = ecpp_match, }, .probe = ecpp_probe, - .remove_new = ecpp_remove, + .remove = ecpp_remove, }; static int parport_pc_find_nonpci_ports(int autoirq, int autodma) diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c index e02074062001..d4c74d6b2e1b 100644 --- a/arch/sparc/kernel/chmc.c +++ b/arch/sparc/kernel/chmc.c @@ -814,7 +814,7 @@ static struct platform_driver us3mc_driver = { .of_match_table = us3mc_match, }, .probe = us3mc_probe, - .remove_new = us3mc_remove, + .remove = us3mc_remove, }; static inline bool us3mc_platform(void) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index e40c395db202..5cbd6ed5ef6f 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -297,9 +297,7 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, unsigned long hv_err; int i; - hdesc = kzalloc(sizeof(*hdesc) + - (sizeof(struct hvtramp_mapping) * - num_kernel_image_mappings - 1), + hdesc = kzalloc(struct_size(hdesc, maps, num_kernel_image_mappings), GFP_KERNEL); if (!hdesc) { printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 08bbdc458596..578fd0d49f30 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -255,6 +255,7 @@ static void mostek_write_byte(struct device *dev, u32 ofs, u8 val) static struct m48t59_plat_data m48t59_data = { .read_byte = mostek_read_byte, .write_byte = mostek_write_byte, + .yy_offset = 68, }; /* resource is set at runtime */ diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 60f1c8cc5363..b32f27f929d1 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -544,6 +544,7 @@ static void mostek_write_byte(struct device *dev, u32 ofs, u8 val) static struct m48t59_plat_data m48t59_data = { .read_byte = mostek_read_byte, .write_byte = mostek_write_byte, + .yy_offset = 68, }; static struct platform_device m48t59_rtc = { diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index d317a843f7ea..f1b86eb30340 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -48,6 +48,11 @@ SECTIONS { _text = .; HEAD_TEXT + ALIGN_FUNCTION(); +#ifdef CONFIG_SPARC64 + /* Match text section symbols in head_64.S first */ + *head_64.o(.text) +#endif TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 243dbfc4609d..50ec2978cda5 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -46,7 +46,7 @@ CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64 -fno-omit-frame-pointer -foptimize-sibling-calls \ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO -SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7 +SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 $(call cc-option,-fcall-used-g5) $(call cc-option,-fcall-used-g7) $(vobjs): KBUILD_CFLAGS := $(filter-out $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c index e794edde6755..79607804ea1b 100644 --- a/arch/sparc/vdso/vclock_gettime.c +++ b/arch/sparc/vdso/vclock_gettime.c @@ -86,6 +86,11 @@ notrace static long vdso_fallback_gettimeofday(struct __kernel_old_timeval *tv, } #ifdef CONFIG_SPARC64 +notrace static __always_inline u64 __shr64(u64 val, int amt) +{ + return val >> amt; +} + notrace static __always_inline u64 vread_tick(void) { u64 ret; @@ -102,6 +107,21 @@ notrace static __always_inline u64 vread_tick_stick(void) return ret; } #else +notrace static __always_inline u64 __shr64(u64 val, int amt) +{ + u64 ret; + + __asm__ __volatile__("sllx %H1, 32, %%g1\n\t" + "srl %L1, 0, %L1\n\t" + "or %%g1, %L1, %%g1\n\t" + "srlx %%g1, %2, %L0\n\t" + "srlx %L0, 32, %H0" + : "=r" (ret) + : "r" (val), "r" (amt) + : "g1"); + return ret; +} + notrace static __always_inline u64 vread_tick(void) { register unsigned long long ret asm("o4"); @@ -154,7 +174,7 @@ notrace static __always_inline int do_realtime(struct vvar_data *vvar, ts->tv_sec = vvar->wall_time_sec; ns = vvar->wall_time_snsec; ns += vgetsns(vvar); - ns >>= vvar->clock.shift; + ns = __shr64(ns, vvar->clock.shift); } while (unlikely(vvar_read_retry(vvar, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); @@ -174,7 +194,7 @@ notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar, ts->tv_sec = vvar->wall_time_sec; ns = vvar->wall_time_snsec; ns += vgetsns_stick(vvar); - ns >>= vvar->clock.shift; + ns = __shr64(ns, vvar->clock.shift); } while (unlikely(vvar_read_retry(vvar, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); @@ -194,7 +214,7 @@ notrace static __always_inline int do_monotonic(struct vvar_data *vvar, ts->tv_sec = vvar->monotonic_time_sec; ns = vvar->monotonic_time_snsec; ns += vgetsns(vvar); - ns >>= vvar->clock.shift; + ns = __shr64(ns, vvar->clock.shift); } while (unlikely(vvar_read_retry(vvar, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); @@ -214,7 +234,7 @@ notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar, ts->tv_sec = vvar->monotonic_time_sec; ns = vvar->monotonic_time_snsec; ns += vgetsns_stick(vvar); - ns >>= vvar->clock.shift; + ns = __shr64(ns, vvar->clock.shift); } while (unlikely(vvar_read_retry(vvar, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); diff --git a/arch/um/Kconfig b/arch/um/Kconfig index c89575d05021..18051b1cfce0 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -5,6 +5,7 @@ menu "UML-specific options" config UML bool default y + select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -32,6 +33,8 @@ config UML select HAVE_ARCH_VMAP_STACK select HAVE_RUST select ARCH_HAS_UBSAN + select HAVE_ARCH_TRACEHOOK + select THREAD_INFO_IN_TASK config MMU bool @@ -94,7 +97,7 @@ config MAY_HAVE_RUNTIME_DEPS config STATIC_LINK bool "Force a static link" - depends on CC_CAN_LINK_STATIC_NO_RUNTIME_DEPS || !MAY_HAVE_RUNTIME_DEPS + depends on !MAY_HAVE_RUNTIME_DEPS help This option gives you the ability to force a static link of UML. Normally, UML is linked as a shared binary. This is inconvenient for @@ -209,8 +212,8 @@ config MMAPPER config PGTABLE_LEVELS int - default 3 if 3_LEVEL_PGTABLES - default 2 + default 4 if 64BIT + default 2 if !64BIT config UML_TIME_TRAVEL_SUPPORT bool @@ -227,6 +230,21 @@ config UML_TIME_TRAVEL_SUPPORT It is safe to say Y, but you probably don't need this. +config UML_MAX_USERSPACE_ITERATIONS + int + prompt "Maximum number of unscheduled userspace iterations" + default 10000 + depends on UML_TIME_TRAVEL_SUPPORT + help + In UML inf-cpu and ext time-travel mode userspace can run without being + interrupted. This will eventually overwhelm the kernel and create OOM + situations (mainly RCU not running). This setting specifies the number + of kernel/userspace switches (minor/major page fault, signal or syscall) + for the same userspace thread before the sched_clock is advanced by a + jiffie to trigger scheduling. + + Setting it to zero disables the feature. + config KASAN_SHADOW_OFFSET hex depends on KASAN diff --git a/arch/um/Makefile b/arch/um/Makefile index 00b63bac5eff..1d36a613aad8 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ + -D__close_range=kernel__close_range KBUILD_RUSTFLAGS += -Crelocation-model=pie @@ -70,7 +71,9 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE) USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ $(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \ -D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \ - -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ + -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ \ + -include $(srctree)/include/linux/compiler-version.h \ + -include $(srctree)/include/linux/kconfig.h #This will adjust *FLAGS accordingly to the platform. include $(srctree)/$(ARCH_DIR)/Makefile-os-Linux diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas index 67323b028999..1a27e65bcb9c 100644 --- a/arch/um/Makefile-skas +++ b/arch/um/Makefile-skas @@ -3,15 +3,15 @@ # Licensed under the GPL # -GPROF_OPT += -pg +export UM_GPROF_OPT += -pg ifdef CONFIG_CC_IS_CLANG -GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping +export UM_GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping else -GCOV_OPT += -fprofile-arcs -ftest-coverage +export UM_GCOV_OPT += -fprofile-arcs -ftest-coverage endif -CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT) -CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT) -LINK-$(CONFIG_GCOV) += $(GCOV_OPT) -LINK-$(CONFIG_GPROF) += $(GPROF_OPT) +CFLAGS-$(CONFIG_GCOV) += $(UM_GCOV_OPT) +CFLAGS-$(CONFIG_GPROF) += $(UM_GPROF_OPT) +LINK-$(CONFIG_GCOV) += $(UM_GCOV_OPT) +LINK-$(CONFIG_GPROF) += $(UM_GPROF_OPT) diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index 9c9c77f1255a..1ffa088739f4 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -1,4 +1,3 @@ -CONFIG_3_LEVEL_PGTABLES=y # CONFIG_COMPACTION is not set CONFIG_BINFMT_MISC=m CONFIG_HOSTFS=y diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index a66e556012c4..35f9beeb19b3 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -161,6 +161,8 @@ static __noreturn int winch_thread(void *arg) int count; char c = 1; + os_set_pdeathsig(); + pty_fd = data->pty_fd; pipe_fd = data->pipe_fd; count = write(pipe_fd, &c, sizeof(c)); diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 9d228878cea2..0ac149de1ac0 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -48,6 +48,7 @@ MODULE_PARM_DESC(mixer, MIXER_HELP); #ifndef MODULE static int set_dsp(char *name, int *add) { + *add = 0; dsp = name; return 0; } @@ -56,6 +57,7 @@ __uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP); static int set_mixer(char *name, int *add) { + *add = 0; mixer = name; return 0; } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 77c4afb8ab90..75d04fb4994a 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -336,7 +336,7 @@ static struct platform_driver uml_net_driver = { static void net_device_release(struct device *dev) { - struct uml_net *device = dev_get_drvdata(dev); + struct uml_net *device = container_of(dev, struct uml_net, pdev.dev); struct net_device *netdev = device->dev; struct uml_net_private *lp = netdev_priv(netdev); diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c index 3a1582219c4b..134a58f93c85 100644 --- a/arch/um/drivers/rtc_kern.c +++ b/arch/um/drivers/rtc_kern.c @@ -176,7 +176,7 @@ static void uml_rtc_remove(struct platform_device *pdev) static struct platform_driver uml_rtc_driver = { .probe = uml_rtc_probe, - .remove_new = uml_rtc_remove, + .remove = uml_rtc_remove, .driver = { .name = "uml-rtc", }, diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 7f28ec1929dc..66c1a8835e36 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -779,7 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev) static void ubd_device_release(struct device *dev) { - struct ubd *ubd_dev = dev_get_drvdata(dev); + struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev); blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); @@ -898,6 +898,8 @@ static int ubd_add(int n, char **error_out) if (err) goto out_cleanup_disk; + ubd_dev->disk = disk; + return 0; out_cleanup_disk: @@ -1499,6 +1501,7 @@ int io_thread(void *arg) { int n, count, written, res; + os_set_pdeathsig(); os_fix_helper_signals(); while(1){ diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index c992da83268d..64c09db392c1 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -815,7 +815,8 @@ static struct platform_driver uml_net_driver = { static void vector_device_release(struct device *dev) { - struct vector_device *device = dev_get_drvdata(dev); + struct vector_device *device = + container_of(dev, struct vector_device, pdev.dev); struct net_device *netdev = device->dev; list_del(&device->list); diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h index 6f147cd3c9f7..fcfa3b7e021b 100644 --- a/arch/um/drivers/vhost_user.h +++ b/arch/um/drivers/vhost_user.h @@ -10,6 +10,7 @@ /* Feature bits */ #define VHOST_USER_F_PROTOCOL_FEATURES 30 /* Protocol feature bits */ +#define VHOST_USER_PROTOCOL_F_MQ 0 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 #define VHOST_USER_PROTOCOL_F_CONFIG 9 @@ -23,7 +24,8 @@ /* Supported transport features */ #define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES) /* Supported protocol features */ -#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ +#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_MQ) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 2b6e701776b6..65df43fa9be5 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -56,6 +56,7 @@ struct virtio_uml_device { int sock, req_fd, irq; u64 features; u64 protocol_features; + u64 max_vqs; u8 status; u8 registered:1; u8 suspended:1; @@ -72,8 +73,6 @@ struct virtio_uml_vq_info { bool suspended; }; -extern unsigned long long physmem_size, highmem; - #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) /* Vhost-user protocol */ @@ -343,6 +342,17 @@ static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, protocol_features); } +static int vhost_user_get_queue_num(struct virtio_uml_device *vu_dev, + u64 *queue_num) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_QUEUE_NUM); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, queue_num); +} + static void vhost_user_reply(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, int response) { @@ -516,6 +526,15 @@ static int vhost_user_init(struct virtio_uml_device *vu_dev) return rc; } + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_MQ)) { + rc = vhost_user_get_queue_num(vu_dev, &vu_dev->max_vqs); + if (rc) + return rc; + } else { + vu_dev->max_vqs = U64_MAX; + } + return 0; } @@ -625,7 +644,7 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) { struct vhost_user_msg msg = { .header.request = VHOST_USER_SET_MEM_TABLE, - .header.size = sizeof(msg.payload.mem_regions), + .header.size = offsetof(typeof(msg.payload.mem_regions), regions[1]), .payload.mem_regions.num = 1, }; unsigned long reserved = uml_reserved - uml_physmem; @@ -673,13 +692,6 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) if (rc < 0) return rc; - if (highmem) { - msg.payload.mem_regions.num++; - rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, - &fds[1], &msg.payload.mem_regions.regions[1]); - if (rc < 0) - return rc; - } return vhost_user_send(vu_dev, false, &msg, fds, msg.payload.mem_regions.num); @@ -897,7 +909,7 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, { struct virtio_uml_vq_info *info = vq->priv; int call_fds[2]; - int rc; + int rc, irq; /* no call FD needed/desired in this case */ if (vu_dev->protocol_features & @@ -914,19 +926,23 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, return rc; info->call_fd = call_fds[0]; - rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, - vu_interrupt, IRQF_SHARED, info->name, vq); - if (rc < 0) + irq = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, + vu_interrupt, IRQF_SHARED, info->name, vq); + if (irq < 0) { + rc = irq; goto close_both; + } rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); if (rc) goto release_irq; + vu_dev->irq = irq; + goto out; release_irq: - um_free_irq(vu_dev->irq, vq); + um_free_irq(irq, vq); close_both: os_close_file(call_fds[0]); out: @@ -1023,7 +1039,9 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vq; /* not supported for now */ - if (WARN_ON(nvqs > 64)) + if (WARN(nvqs > 64 || nvqs > vu_dev->max_vqs, + "%d VQs requested, only up to 64 or %lld supported\n", + nvqs, vu_dev->max_vqs)) return -EINVAL; rc = vhost_user_set_mem_table(vu_dev); @@ -1210,6 +1228,7 @@ static int virtio_uml_probe(struct platform_device *pdev) vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; vu_dev->pdev = pdev; vu_dev->req_fd = -1; + vu_dev->irq = UM_IRQ_ALLOC; time_travel_propagate_time(); @@ -1446,7 +1465,7 @@ static int virtio_uml_resume(struct platform_device *pdev) static struct platform_driver virtio_uml_driver = { .probe = virtio_uml_probe, - .remove_new = virtio_uml_remove, + .remove = virtio_uml_remove, .driver = { .name = "virtio-uml", .of_match_table = virtio_uml_match, diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 18f902da8e99..428f2c5158c2 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += bug.h generic-y += compat.h -generic-y += current.h generic-y += device.h generic-y += dma-mapping.h generic-y += emergency-restart.h diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h new file mode 100644 index 000000000000..de64e032d66c --- /dev/null +++ b/arch/um/include/asm/current.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_CURRENT_H +#define __ASM_CURRENT_H + +#include <linux/compiler.h> +#include <linux/threads.h> + +#ifndef __ASSEMBLY__ + +struct task_struct; +extern struct task_struct *cpu_tasks[NR_CPUS]; + +static __always_inline struct task_struct *get_current(void) +{ + return cpu_tasks[0]; +} + + +#define current get_current() + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_CURRENT_H */ diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 834313ecd3d6..3d516f3ca9c7 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -29,51 +29,35 @@ struct page; #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT) - typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; -#define pte_val(p) ((p).pte) -#define pte_get_bits(p, bits) ((p).pte & (bits)) -#define pte_set_bits(p, bits) ((p).pte |= (bits)) -#define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) -#define pte_copy(to, from) ({ (to).pte = (from).pte; }) -#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) -#define pte_set_val(p, phys, prot) \ - ({ (p).pte = (phys) | pgprot_val(prot); }) +#if CONFIG_PGTABLE_LEVELS > 2 +typedef struct { unsigned long pmd; } pmd_t; #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) -typedef unsigned long long phys_t; +#if CONFIG_PGTABLE_LEVELS > 3 -#else - -typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned long pgd; } pgd_t; +typedef struct { unsigned long pud; } pud_t; +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) } ) -#ifdef CONFIG_3_LEVEL_PGTABLES -typedef struct { unsigned long pmd; } pmd_t; -#define pmd_val(x) ((x).pmd) -#define __pmd(x) ((pmd_t) { (x) } ) -#endif +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ #define pte_val(x) ((x).pte) - #define pte_get_bits(p, bits) ((p).pte & (bits)) #define pte_set_bits(p, bits) ((p).pte |= (bits)) #define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) #define pte_copy(to, from) ((to).pte = (from).pte) -#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) +#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEEDSYNC)) #define pte_set_val(p, phys, prot) (p).pte = (phys | pgprot_val(prot)) typedef unsigned long phys_t; -#endif - typedef struct { unsigned long pgprot; } pgprot_t; typedef struct page *pgtable_t; diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index de5e31c64793..04fb4e6969a4 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -31,7 +31,7 @@ do { \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) -#ifdef CONFIG_3_LEVEL_PGTABLES +#if CONFIG_PGTABLE_LEVELS > 2 #define __pmd_free_tlb(tlb, pmd, address) \ do { \ @@ -39,6 +39,15 @@ do { \ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ } while (0) +#if CONFIG_PGTABLE_LEVELS > 3 + +#define __pud_free_tlb(tlb, pud, address) \ +do { \ + pagetable_pud_dtor(virt_to_ptdesc(pud)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ +} while (0) + +#endif #endif #endif diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index 8256ecc5b919..ab0c8dd86564 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -31,7 +31,7 @@ printk("%s:%d: bad pgd %p(%08lx).\n", __FILE__, __LINE__, &(e), \ pgd_val(e)) -static inline int pgd_newpage(pgd_t pgd) { return 0; } +static inline int pgd_needsync(pgd_t pgd) { return 0; } static inline void pgd_mkuptodate(pgd_t pgd) { } #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-4level.h index 8a5032ec231f..0d279caee93c 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-4level.h @@ -4,21 +4,25 @@ * Derived from include/asm-i386/pgtable.h */ -#ifndef __UM_PGTABLE_3LEVEL_H -#define __UM_PGTABLE_3LEVEL_H +#ifndef __UM_PGTABLE_4LEVEL_H +#define __UM_PGTABLE_4LEVEL_H -#include <asm-generic/pgtable-nopud.h> +#include <asm-generic/pgtable-nop4d.h> -/* PGDIR_SHIFT determines what a third-level page table entry can map */ +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#ifdef CONFIG_64BIT -#define PGDIR_SHIFT 30 -#else -#define PGDIR_SHIFT 31 -#endif +#define PGDIR_SHIFT 39 #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) +/* PUD_SHIFT determines the size of the area a third-level page table can + * map + */ + +#define PUD_SHIFT 30 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + /* PMD_SHIFT determines the size of the area a second-level page table can * map */ @@ -32,13 +36,9 @@ */ #define PTRS_PER_PTE 512 -#ifdef CONFIG_64BIT #define PTRS_PER_PMD 512 +#define PTRS_PER_PUD 512 #define PTRS_PER_PGD 512 -#else -#define PTRS_PER_PMD 1024 -#define PTRS_PER_PGD 1024 -#endif #define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) @@ -48,11 +48,14 @@ #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pud_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ pgd_val(e)) -#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEWPAGE)) +#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEEDSYNC)) #define pud_bad(x) ((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pud_present(x) (pud_val(x) & _PAGE_PRESENT) #define pud_populate(mm, pud, pmd) \ @@ -60,23 +63,40 @@ #define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) -static inline int pgd_newpage(pgd_t pgd) +#define p4d_none(x) (!(p4d_val(x) & ~_PAGE_NEEDSYNC)) +#define p4d_bad(x) ((p4d_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define p4d_present(x) (p4d_val(x) & _PAGE_PRESENT) +#define p4d_populate(mm, p4d, pud) \ + set_p4d(p4d, __p4d(_PAGE_TABLE + __pa(pud))) + +#define set_p4d(p4dptr, p4dval) (*(p4dptr) = (p4dval)) + + +static inline int pgd_needsync(pgd_t pgd) { - return(pgd_val(pgd) & _PAGE_NEWPAGE); + return pgd_val(pgd) & _PAGE_NEEDSYNC; } -static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } +static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEEDSYNC; } #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) static inline void pud_clear (pud_t *pud) { - set_pud(pud, __pud(_PAGE_NEWPAGE)); + set_pud(pud, __pud(_PAGE_NEEDSYNC)); +} + +static inline void p4d_clear (p4d_t *p4d) +{ + set_p4d(p4d, __p4d(_PAGE_NEEDSYNC)); } #define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) #define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK)) +#define p4d_page(p4d) phys_to_page(p4d_val(p4d) & PAGE_MASK) +#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & PAGE_MASK)) + static inline unsigned long pte_pfn(pte_t pte) { return phys_to_pfn(pte_val(pte)); @@ -97,4 +117,3 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) } #endif - diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index faab5a2a4b06..0bd60afcc37d 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -11,8 +11,7 @@ #include <asm/fixmap.h> #define _PAGE_PRESENT 0x001 -#define _PAGE_NEWPAGE 0x002 -#define _PAGE_NEWPROT 0x004 +#define _PAGE_NEEDSYNC 0x002 #define _PAGE_RW 0x020 #define _PAGE_USER 0x040 #define _PAGE_ACCESSED 0x080 @@ -24,10 +23,12 @@ /* We borrow bit 10 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE 0x400 -#ifdef CONFIG_3_LEVEL_PGTABLES -#include <asm/pgtable-3level.h> -#else +#if CONFIG_PGTABLE_LEVELS == 4 +#include <asm/pgtable-4level.h> +#elif CONFIG_PGTABLE_LEVELS == 2 #include <asm/pgtable-2level.h> +#else +#error "Unsupported number of page table levels" #endif extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; @@ -78,22 +79,22 @@ extern unsigned long end_iomem; */ #define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) -#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE)) +#define pte_clear(mm, addr, xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEEDSYNC)) -#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE)) +#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEEDSYNC)) #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) +#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEEDSYNC; } while (0) -#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) -#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) +#define pmd_needsync(x) (pmd_val(x) & _PAGE_NEEDSYNC) +#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEEDSYNC) -#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE) -#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE) +#define pud_needsync(x) (pud_val(x) & _PAGE_NEEDSYNC) +#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEEDSYNC) -#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE) -#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE) +#define p4d_needsync(x) (p4d_val(x) & _PAGE_NEEDSYNC) +#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEEDSYNC) #define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT) #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) @@ -144,14 +145,9 @@ static inline int pte_young(pte_t pte) return pte_get_bits(pte, _PAGE_ACCESSED); } -static inline int pte_newpage(pte_t pte) +static inline int pte_needsync(pte_t pte) { - return pte_get_bits(pte, _PAGE_NEWPAGE); -} - -static inline int pte_newprot(pte_t pte) -{ - return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); + return pte_get_bits(pte, _PAGE_NEEDSYNC); } /* @@ -160,12 +156,6 @@ static inline int pte_newprot(pte_t pte) * ================================= */ -static inline pte_t pte_mknewprot(pte_t pte) -{ - pte_set_bits(pte, _PAGE_NEWPROT); - return(pte); -} - static inline pte_t pte_mkclean(pte_t pte) { pte_clear_bits(pte, _PAGE_DIRTY); @@ -180,19 +170,14 @@ static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_wrprotect(pte_t pte) { - if (likely(pte_get_bits(pte, _PAGE_RW))) - pte_clear_bits(pte, _PAGE_RW); - else - return pte; - return(pte_mknewprot(pte)); + pte_clear_bits(pte, _PAGE_RW); + return pte; } static inline pte_t pte_mkread(pte_t pte) { - if (unlikely(pte_get_bits(pte, _PAGE_USER))) - return pte; pte_set_bits(pte, _PAGE_USER); - return(pte_mknewprot(pte)); + return pte; } static inline pte_t pte_mkdirty(pte_t pte) @@ -209,23 +194,19 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkwrite_novma(pte_t pte) { - if (unlikely(pte_get_bits(pte, _PAGE_RW))) - return pte; pte_set_bits(pte, _PAGE_RW); - return(pte_mknewprot(pte)); + return pte; } static inline pte_t pte_mkuptodate(pte_t pte) { - pte_clear_bits(pte, _PAGE_NEWPAGE); - if(pte_present(pte)) - pte_clear_bits(pte, _PAGE_NEWPROT); - return(pte); + pte_clear_bits(pte, _PAGE_NEEDSYNC); + return pte; } -static inline pte_t pte_mknewpage(pte_t pte) +static inline pte_t pte_mkneedsync(pte_t pte) { - pte_set_bits(pte, _PAGE_NEWPAGE); + pte_set_bits(pte, _PAGE_NEEDSYNC); return(pte); } @@ -233,13 +214,11 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); - /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so - * fix_range knows to unmap it. _PAGE_NEWPROT is specific to - * mapped pages. + /* If it's a swap entry, it needs to be marked _PAGE_NEEDSYNC so + * update_pte_range knows to unmap it. */ - *pteptr = pte_mknewpage(*pteptr); - if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); + *pteptr = pte_mkneedsync(*pteptr); } #define PFN_PTE_SHIFT PAGE_SHIFT @@ -279,7 +258,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { - return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE); + return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEEDSYNC); } /* @@ -294,8 +273,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) ({ pte_t pte; \ \ pte_set_val(pte, page_to_phys(page), (pgprot)); \ - if (pte_present(pte)) \ - pte_mknewprot(pte_mknewpage(pte)); \ pte;}) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -329,7 +306,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); * <--------------- offset ----------------> E < type -> 0 0 0 1 0 * * E is the exclusive marker that is not stored in swap entries. - * _PAGE_NEWPAGE (bit 1) is always set to 1 in set_pte(). + * _PAGE_NEEDSYNC (bit 1) is always set to 1 in set_pte(). */ #define __swp_type(x) (((x).val >> 5) & 0x1f) #define __swp_offset(x) ((x).val >> 11) diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index bce4595798da..5d6356eafffe 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,10 +20,7 @@ struct task_struct; struct mm_struct; struct thread_struct { - struct pt_regs regs; struct pt_regs *segv_regs; - void *fault_addr; - jmp_buf *fault_catcher; struct task_struct *prev_sched; struct arch_thread arch; jmp_buf switch_buf; @@ -33,12 +30,14 @@ struct thread_struct { void *arg; } thread; } request; + + /* Contains variable sized FP registers */ + struct pt_regs regs; }; #define INIT_THREAD \ { \ .regs = EMPTY_REGS, \ - .fault_addr = NULL, \ .prev_sched = NULL, \ .arch = INIT_ARCH_THREAD, \ .request = { } \ diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index c7b4b49826a2..f9ad06fcc991 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -17,35 +17,17 @@ #include <sysdep/ptrace_user.h> struct thread_info { - struct task_struct *task; /* main task structure */ unsigned long flags; /* low level flags */ __u32 cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - struct thread_info *real_thread; /* Points to non-IRQ stack */ - unsigned long aux_fp_regs[FP_SIZE]; /* auxiliary fp_regs to save/restore - them out-of-band */ }; #define INIT_THREAD_INFO(tsk) \ { \ - .task = &tsk, \ .flags = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .real_thread = NULL, \ -} - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - unsigned long mask = THREAD_SIZE - 1; - void *p; - - asm volatile ("" : "=r" (p) : "0" (&ti)); - ti = (struct thread_info *) (((unsigned long)p) & ~mask); - return ti; } #endif diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h index db997976b6ea..13a3009942be 100644 --- a/arch/um/include/asm/tlbflush.h +++ b/arch/um/include/asm/tlbflush.h @@ -9,8 +9,8 @@ #include <linux/mm.h> /* - * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls - * from the process handling the MM (which can be the kernel itself). + * In UML, we need to sync the TLB over by using mmap/munmap syscalls from + * the process handling the MM (which can be the kernel itself). * * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes * we catch all PTE transitions where memory that was unusable becomes usable. diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 06292fca5a4d..ea65f151bf48 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -30,25 +30,23 @@ #include <sysdep/ptrace.h> -struct cpu_task { - void *task; -}; +struct task_struct; +extern struct task_struct *cpu_tasks[]; -extern struct cpu_task cpu_tasks[]; +extern unsigned long long physmem_size; extern unsigned long high_physmem; extern unsigned long uml_physmem; extern unsigned long uml_reserved; extern unsigned long end_vm; extern unsigned long start_vm; -extern unsigned long long highmem; extern unsigned long brk_start; extern unsigned long host_task_size; extern unsigned long stub_start; -extern int linux_main(int argc, char **argv); +extern int linux_main(int argc, char **argv, char **envp); extern void uml_finishsetup(void); struct siginfo; diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 579ed946a3a9..73f3a4792ed8 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -6,7 +6,6 @@ DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); -DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(UM_GFP_KERNEL, GFP_KERNEL); DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); @@ -15,17 +14,3 @@ DEFINE(UM_THREAD_SIZE, THREAD_SIZE); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); - -#ifdef CONFIG_PRINTK -DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); -#endif -#ifdef CONFIG_UML_X86 -DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); -#endif -#ifdef CONFIG_64BIT -DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT); -#endif -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT -DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); -#endif - diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index d8ffd2db168e..f21dc8517538 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -60,7 +60,6 @@ extern unsigned long from_irq_stack(int nested); extern int singlestepping(void); extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); -extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs); extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern void fatal_sigsegv(void) __attribute__ ((noreturn)); diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index 11a723a58545..adfa08062f88 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -47,10 +47,9 @@ extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern void mem_total_pages(unsigned long physmem, unsigned long iomem, - unsigned long highmem); +extern void mem_total_pages(unsigned long physmem, unsigned long iomem); extern void setup_physmem(unsigned long start, unsigned long usable, - unsigned long len, unsigned long long highmem); + unsigned long len); extern void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 9a039d6f1f74..5babad8c5f75 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -145,7 +145,6 @@ extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); extern int os_get_ifname(int fd, char *namebuf); extern int os_set_slip(int fd); extern int os_mode_fd(int fd, int mode); -extern int os_fsync_file(int fd); extern int os_seek_file(int fd, unsigned long long offset); extern int os_open_file(const char *file, struct openflags flags, int mode); @@ -199,15 +198,11 @@ extern int create_mem_file(unsigned long long len); extern void report_enomem(void); /* process.c */ -extern unsigned long os_process_pc(int pid); -extern int os_process_parent(int pid); extern void os_alarm_process(int pid); -extern void os_stop_process(int pid); extern void os_kill_process(int pid, int reap_child); extern void os_kill_ptraced_process(int pid, int reap_child); extern int os_getpid(void); -extern int os_getpgrp(void); extern void init_new_thread_signals(void); @@ -220,6 +215,8 @@ extern int os_drop_memory(void *addr, int length); extern int can_drop_memory(void); extern int os_mincore(void *addr, unsigned long len); +void os_set_pdeathsig(void); + /* execvp.c */ extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); /* helper.c */ @@ -244,7 +241,6 @@ extern void block_signals(void); extern void unblock_signals(void); extern int um_set_signals(int enable); extern int um_set_signals_trace(int enable); -extern int os_is_signal_stack(void); extern void deliver_alarm(void); extern void register_pm_wake_signal(void); extern void block_signals_hard(void); @@ -283,13 +279,11 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, int phys_fd, unsigned long long offset); int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len); -int protect(struct mm_id *mm_idp, unsigned long addr, - unsigned long len, unsigned int prot); /* skas/process.c */ extern int is_skas_winch(int pid, int fd, void *data); extern int start_userspace(unsigned long stub_stack); -extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs); +extern void userspace(struct uml_pt_regs *regs); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); extern void switch_threads(jmp_buf *me, jmp_buf *you); extern int start_idle_thread(void *stack, jmp_buf *switch_buf); @@ -329,9 +323,6 @@ extern int __ignore_sigio_fd(int fd); /* tty.c */ extern int get_pty(void); -/* sys-$ARCH/task_size.c */ -extern unsigned long os_get_top_address(void); - long syscall(long number, ...); /* irqflags tracing */ diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index a0450326521c..7d81b2339a48 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -8,12 +8,6 @@ #include <sysdep/ptrace.h> -extern int save_i387_registers(int pid, unsigned long *fp_regs); -extern int restore_i387_registers(int pid, unsigned long *fp_regs); -extern int save_fp_registers(int pid, unsigned long *fp_regs); -extern int restore_fp_registers(int pid, unsigned long *fp_regs); -extern int save_fpx_registers(int pid, unsigned long *fp_regs); -extern int restore_fpx_registers(int pid, unsigned long *fp_regs); extern int init_pid_registers(int pid); extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs); extern int get_fp_registers(int pid, unsigned long *regs); diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index 2b6b44759dfa..81a4cace032c 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -12,6 +12,17 @@ #include <as-layout.h> #include <sysdep/tls.h> +struct stub_init_data { + unsigned long stub_start; + + int stub_code_fd; + unsigned long stub_code_offset; + int stub_data_fd; + unsigned long stub_data_offset; + + unsigned long segv_handler; +}; + #define STUB_NEXT_SYSCALL(s) \ ((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len)) @@ -19,7 +30,6 @@ enum stub_syscall_type { STUB_SYSCALL_UNSET = 0, STUB_SYSCALL_MMAP, STUB_SYSCALL_MUNMAP, - STUB_SYSCALL_MPROTECT, }; struct stub_syscall { diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h index c8db2f213dba..7c2b277b7eb0 100644 --- a/arch/um/include/shared/timetravel.h +++ b/arch/um/include/shared/timetravel.h @@ -12,14 +12,13 @@ enum time_travel_mode { TT_MODE_EXTERNAL, }; -#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \ - defined(CONFIG_UML_TIME_TRAVEL_SUPPORT) +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) extern enum time_travel_mode time_travel_mode; extern int time_travel_should_print_bc_msg; #else #define time_travel_mode TT_MODE_OFF #define time_travel_should_print_bc_msg 0 -#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */ +#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ void _time_travel_print_bc_msg(void); static inline void time_travel_print_bc_msg(void) diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index bbab79c0c074..139eb78a4767 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -38,7 +38,7 @@ extern void panic(const char *fmt, ...) #define UM_KERN_DEBUG KERN_DEBUG #define UM_KERN_CONT KERN_CONT -#ifdef UML_CONFIG_PRINTK +#if IS_ENABLED(CONFIG_PRINTK) #define printk(...) _printk(__VA_ARGS__) extern int _printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c index 8d78ced9e08f..15c342426489 100644 --- a/arch/um/kernel/dtb.c +++ b/arch/um/kernel/dtb.c @@ -31,6 +31,7 @@ void uml_dtb_init(void) static int __init uml_dtb_setup(char *line, int *add) { + *add = 0; dtb = line; return 0; } diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 3385d653ebd0..a36b7918a011 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -116,8 +116,6 @@ SECTIONS .fini_array : { *(.fini_array) } .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) - . = ALIGN(KERNEL_STACK_SIZE); - *(.data..init_irqstack) DATA_DATA *(.data.* .gnu.linkonce.d.*) SORT(CONSTRUCTORS) @@ -178,3 +176,6 @@ SECTIONS DISCARDS } + +ASSERT(__syscall_stub_end - __syscall_stub_start <= PAGE_SIZE, + "STUB code must not be larger than one page"); diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 47b8cb1a1156..99dba827461c 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -34,6 +34,7 @@ int __init read_initrd(void) static int __init uml_initrd_setup(char *line, int *add) { + *add = 0; initrd = line; return 0; } diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 534e91797f89..338450741aac 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -674,115 +674,3 @@ void __init init_IRQ(void) /* Initialize EPOLL Loop */ os_setup_epoll(); } - -/* - * IRQ stack entry and exit: - * - * Unlike i386, UML doesn't receive IRQs on the normal kernel stack - * and switch over to the IRQ stack after some preparation. We use - * sigaltstack to receive signals on a separate stack from the start. - * These two functions make sure the rest of the kernel won't be too - * upset by being on a different stack. The IRQ stack has a - * thread_info structure at the bottom so that current et al continue - * to work. - * - * to_irq_stack copies the current task's thread_info to the IRQ stack - * thread_info and sets the tasks's stack to point to the IRQ stack. - * - * from_irq_stack copies the thread_info struct back (flags may have - * been modified) and resets the task's stack pointer. - * - * Tricky bits - - * - * What happens when two signals race each other? UML doesn't block - * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal - * could arrive while a previous one is still setting up the - * thread_info. - * - * There are three cases - - * The first interrupt on the stack - sets up the thread_info and - * handles the interrupt - * A nested interrupt interrupting the copying of the thread_info - - * can't handle the interrupt, as the stack is in an unknown state - * A nested interrupt not interrupting the copying of the - * thread_info - doesn't do any setup, just handles the interrupt - * - * The first job is to figure out whether we interrupted stack setup. - * This is done by xchging the signal mask with thread_info->pending. - * If the value that comes back is zero, then there is no setup in - * progress, and the interrupt can be handled. If the value is - * non-zero, then there is stack setup in progress. In order to have - * the interrupt handled, we leave our signal in the mask, and it will - * be handled by the upper handler after it has set up the stack. - * - * Next is to figure out whether we are the outer handler or a nested - * one. As part of setting up the stack, thread_info->real_thread is - * set to non-NULL (and is reset to NULL on exit). This is the - * nesting indicator. If it is non-NULL, then the stack is already - * set up and the handler can run. - */ - -static unsigned long pending_mask; - -unsigned long to_irq_stack(unsigned long *mask_out) -{ - struct thread_info *ti; - unsigned long mask, old; - int nested; - - mask = xchg(&pending_mask, *mask_out); - if (mask != 0) { - /* - * If any interrupts come in at this point, we want to - * make sure that their bits aren't lost by our - * putting our bit in. So, this loop accumulates bits - * until xchg returns the same value that we put in. - * When that happens, there were no new interrupts, - * and pending_mask contains a bit for each interrupt - * that came in. - */ - old = *mask_out; - do { - old |= mask; - mask = xchg(&pending_mask, old); - } while (mask != old); - return 1; - } - - ti = current_thread_info(); - nested = (ti->real_thread != NULL); - if (!nested) { - struct task_struct *task; - struct thread_info *tti; - - task = cpu_tasks[ti->cpu].task; - tti = task_thread_info(task); - - *ti = *tti; - ti->real_thread = tti; - task->stack = ti; - } - - mask = xchg(&pending_mask, 0); - *mask_out |= mask | nested; - return 0; -} - -unsigned long from_irq_stack(int nested) -{ - struct thread_info *ti, *to; - unsigned long mask; - - ti = current_thread_info(); - - pending_mask = 1; - - to = ti->real_thread; - current->stack = to; - ti->real_thread = NULL; - *to = *ti; - - mask = xchg(&pending_mask, 0); - return mask & ~1; -} - diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index a5b4fe2ad931..53248ed04771 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -6,7 +6,6 @@ #include <linux/stddef.h> #include <linux/module.h> #include <linux/memblock.h> -#include <linux/highmem.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> @@ -51,8 +50,6 @@ EXPORT_SYMBOL(empty_zero_page); pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* Initialized at boot time, and readonly after that */ -unsigned long long highmem; -EXPORT_SYMBOL(highmem); int kmalloc_ok = 0; /* Used during early boot */ @@ -98,7 +95,7 @@ static void __init one_page_table_init(pmd_t *pmd) static void __init one_md_table_init(pud_t *pud) { -#ifdef CONFIG_3_LEVEL_PGTABLES +#if CONFIG_PGTABLE_LEVELS > 2 pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); if (!pmd_table) panic("%s: Failed to allocate %lu bytes align=%lx\n", @@ -109,6 +106,19 @@ static void __init one_md_table_init(pud_t *pud) #endif } +static void __init one_ud_table_init(p4d_t *p4d) +{ +#if CONFIG_PGTABLE_LEVELS > 3 + pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!pud_table) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table))); + BUG_ON(pud_table != pud_offset(p4d, 0)); +#endif +} + static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { @@ -126,6 +136,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end, for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { p4d = p4d_offset(pgd, vaddr); + if (p4d_none(*p4d)) + one_ud_table_init(p4d); pud = pud_offset(p4d, vaddr); if (pud_none(*pud)) one_md_table_init(pud); diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index fb2adfb49945..a74f17b033c4 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -22,19 +22,14 @@ static int physmem_fd = -1; unsigned long high_physmem; EXPORT_SYMBOL(high_physmem); -extern unsigned long long physmem_size; - -void __init mem_total_pages(unsigned long physmem, unsigned long iomem, - unsigned long highmem) +void __init mem_total_pages(unsigned long physmem, unsigned long iomem) { - unsigned long phys_pages, highmem_pages; - unsigned long iomem_pages, total_pages; + unsigned long phys_pages, iomem_pages, total_pages; - phys_pages = physmem >> PAGE_SHIFT; - iomem_pages = iomem >> PAGE_SHIFT; - highmem_pages = highmem >> PAGE_SHIFT; + phys_pages = physmem >> PAGE_SHIFT; + iomem_pages = iomem >> PAGE_SHIFT; - total_pages = phys_pages + iomem_pages + highmem_pages; + total_pages = phys_pages + iomem_pages; max_mapnr = total_pages; } @@ -64,13 +59,12 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, * @reserve_end: end address of the physical kernel memory. * @len: Length of total physical memory that should be mapped/made * available, in bytes. - * @highmem: Number of highmem bytes that should be mapped/made available. * - * Creates an unlinked temporary file of size (len + highmem) and memory maps + * Creates an unlinked temporary file of size (len) and memory maps * it on the last executable image address (uml_reserved). * * The offset is needed as the length of the total physical memory - * (len + highmem) includes the size of the memory used be the executable image, + * (len) includes the size of the memory used be the executable image, * but the mapped-to address is the last address of the executable image * (uml_reserved == end address of executable image). * @@ -78,24 +72,24 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, * of all user space processes/kernel tasks. */ void __init setup_physmem(unsigned long start, unsigned long reserve_end, - unsigned long len, unsigned long long highmem) + unsigned long len) { unsigned long reserve = reserve_end - start; - long map_size = len - reserve; + unsigned long map_size = len - reserve; int err; - if(map_size <= 0) { + if (len <= reserve) { os_warn("Too few physical memory! Needed=%lu, given=%lu\n", reserve, len); exit(1); } - physmem_fd = create_mem_file(len + highmem); + physmem_fd = create_mem_file(len); err = os_map_memory((void *) reserve_end, physmem_fd, reserve, map_size, 1, 1, 1); if (err < 0) { - os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p " + os_warn("setup_physmem - mapping %lu bytes of memory at 0x%p " "failed - errno = %d\n", map_size, (void *) reserve_end, err); exit(1); @@ -107,9 +101,8 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end, */ os_seek_file(physmem_fd, __pa(__syscall_stub_start)); os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE); - os_fsync_file(physmem_fd); - memblock_add(__pa(start), len + highmem); + memblock_add(__pa(start), len); memblock_reserve(__pa(start), reserve); min_low_pfn = PFN_UP(__pa(reserve_end)); @@ -137,10 +130,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out) region = region->next; } } - else if (phys < __pa(end_iomem) + highmem) { - fd = physmem_fd; - *offset_out = phys - iomem_size; - } return fd; } @@ -149,6 +138,8 @@ EXPORT_SYMBOL(phys_mapping); static int __init uml_mem_setup(char *line, int *add) { char *retptr; + + *add = 0; physmem_size = memparse(line,&retptr); return 0; } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index be2856af6d4c..30bdc0a87dc8 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -43,7 +43,8 @@ * cares about its entry, so it's OK if another processor is modifying its * entry. */ -struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { NULL } }; +struct task_struct *cpu_tasks[NR_CPUS]; +EXPORT_SYMBOL(cpu_tasks); void free_stack(unsigned long stack, int order) { @@ -64,7 +65,7 @@ unsigned long alloc_stack(int order, int atomic) static inline void set_current(struct task_struct *task) { - cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) { task }); + cpu_tasks[task_thread_info(task)->cpu] = task; } struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to) @@ -116,7 +117,7 @@ void new_thread_handler(void) * callback returns only if the kernel thread execs a process */ fn(arg); - userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); + userspace(¤t->thread.regs.regs); } /* Called magically, see new_thread_handler above */ @@ -133,7 +134,7 @@ static void fork_handler(void) current->thread.prev_sched = NULL; - userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); + userspace(¤t->thread.regs.regs); } int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) @@ -187,6 +188,13 @@ void initial_thread_cb(void (*proc)(void *), void *arg) kmalloc_ok = save_kmalloc_ok; } +int arch_dup_task_struct(struct task_struct *dst, + struct task_struct *src) +{ + memcpy(dst, src, arch_task_struct_size); + return 0; +} + void um_idle_sleep(void) { if (time_travel_mode != TT_MODE_OFF) @@ -287,11 +295,3 @@ unsigned long __get_wchan(struct task_struct *p) return 0; } - -int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu) -{ - int cpu = current_thread_info()->cpu; - - return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu); -} - diff --git a/arch/um/kernel/skas/.gitignore b/arch/um/kernel/skas/.gitignore new file mode 100644 index 000000000000..c3409ced0f38 --- /dev/null +++ b/arch/um/kernel/skas/.gitignore @@ -0,0 +1,2 @@ +stub_exe +stub_exe.dbg diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index 6f86d53e3d69..3384be42691f 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,14 +3,48 @@ # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) # -obj-y := stub.o mmu.o process.o syscall.o uaccess.o +obj-y := stub.o mmu.o process.o syscall.o uaccess.o \ + stub_exe_embed.o + +# Stub executable + +stub_exe_objs-y := stub_exe.o + +stub_exe_objs := $(foreach F,$(stub_exe_objs-y),$(obj)/$F) + +# Object file containing the ELF executable +$(obj)/stub_exe_embed.o: $(src)/stub_exe_embed.S $(obj)/stub_exe + +$(obj)/stub_exe.dbg: $(stub_exe_objs) FORCE + $(call if_changed,stub_exe) + +$(obj)/stub_exe: OBJCOPYFLAGS := -S +$(obj)/stub_exe: $(obj)/stub_exe.dbg FORCE + $(call if_changed,objcopy) + +quiet_cmd_stub_exe = STUB_EXE $@ + cmd_stub_exe = $(CC) -nostdlib -o $@ \ + $(filter-out $(UM_GPROF_OPT) $(UM_GCOV_OPT),$(KBUILD_CFLAGS)) $(STUB_EXE_LDFLAGS) \ + $(filter %.o,$^) + +STUB_EXE_LDFLAGS = -Wl,-n -static + +targets += stub_exe.dbg stub_exe $(stub_exe_objs-y) + +# end # stub.o is in the stub, so it can't be built with profiling # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> # disable it CFLAGS_stub.o := $(CFLAGS_NO_HARDENING) -UNPROFILE_OBJS := stub.o +CFLAGS_stub_exe.o := $(CFLAGS_NO_HARDENING) + +# Clang will call memset() from __builtin_alloca() when stack variable +# initialization is enabled, which is used in stub_exe.c. +CFLAGS_stub_exe.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized) + +UNPROFILE_OBJS := stub.o stub_exe.o KCOV_INSTRUMENT := n include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 886ed5e65674..0eb5a1d3ba70 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -40,35 +40,13 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) goto out_free; } - /* - * Ensure the new MM is clean and nothing unwanted is mapped. - * - * TODO: We should clear the memory up to STUB_START to ensure there is - * nothing mapped there, i.e. we (currently) have: - * - * |- user memory -|- unused -|- stub -|- unused -| - * ^ TASK_SIZE ^ STUB_START - * - * Meaning we have two unused areas where we may still have valid - * mappings from our internal clone(). That isn't really a problem as - * userspace is not going to access them, but it is definitely not - * correct. - * - * However, we are "lucky" and if rseq is configured, then on 32 bit - * it will fall into the first empty range while on 64 bit it is going - * to use an anonymous mapping in the second range. As such, things - * continue to work for now as long as we don't start unmapping these - * areas. - * - * Change this to STUB_START once we have a clean userspace. - */ - unmap(new_id, 0, TASK_SIZE); + /* Ensure the new MM is clean and nothing unwanted is mapped */ + unmap(new_id, 0, STUB_START); return 0; out_free: - if (new_id->stack != 0) - free_pages(new_id->stack, ilog2(STUB_DATA_PAGES)); + free_pages(new_id->stack, ilog2(STUB_DATA_PAGES)); out: return ret; } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 68657988c8d1..05dcdc057af9 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -22,15 +22,13 @@ static int __init start_kernel_proc(void *unused) { block_signals_trace(); - cpu_tasks[0].task = current; - start_kernel(); return 0; } extern int userspace_pid[]; -extern char cpu0_irqstack[]; +static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE); int __init start_uml(void) { diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c index 5d52ffa682dc..796fc266d3bb 100644 --- a/arch/um/kernel/skas/stub.c +++ b/arch/um/kernel/skas/stub.c @@ -35,16 +35,6 @@ static __always_inline int syscall_handler(struct stub_data *d) return -1; } break; - case STUB_SYSCALL_MPROTECT: - res = stub_syscall3(__NR_mprotect, - sc->mem.addr, sc->mem.length, - sc->mem.prot); - if (res) { - d->err = res; - d->syscall_data_len = i; - return -1; - } - break; default: d->err = -95; /* EOPNOTSUPP */ d->syscall_data_len = i; diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c new file mode 100644 index 000000000000..23c99b285e82 --- /dev/null +++ b/arch/um/kernel/skas/stub_exe.c @@ -0,0 +1,95 @@ +#include <sys/ptrace.h> +#include <sys/prctl.h> +#include <asm/unistd.h> +#include <sysdep/stub.h> +#include <stub-data.h> + +void _start(void); + +noinline static void real_init(void) +{ + struct stub_init_data init_data; + unsigned long res; + struct { + void *ss_sp; + int ss_flags; + size_t ss_size; + } stack = { + .ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, + }; + struct { + void *sa_handler_; + unsigned long sa_flags; + void *sa_restorer; + unsigned long long sa_mask; + } sa = { + /* Need to set SA_RESTORER (but the handler never returns) */ + .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000, + /* no need to mask any signals */ + .sa_mask = 0, + }; + + /* set a nice name */ + stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace"); + + /* Make sure this process dies if the kernel dies */ + stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL); + + /* read information from STDIN and close it */ + res = stub_syscall3(__NR_read, 0, + (unsigned long)&init_data, sizeof(init_data)); + if (res != sizeof(init_data)) + stub_syscall1(__NR_exit, 10); + + stub_syscall1(__NR_close, 0); + + /* map stub code + data */ + res = stub_syscall6(STUB_MMAP_NR, + init_data.stub_start, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED, + init_data.stub_code_fd, init_data.stub_code_offset); + if (res != init_data.stub_start) + stub_syscall1(__NR_exit, 11); + + res = stub_syscall6(STUB_MMAP_NR, + init_data.stub_start + UM_KERN_PAGE_SIZE, + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, + init_data.stub_data_fd, init_data.stub_data_offset); + if (res != init_data.stub_start + UM_KERN_PAGE_SIZE) + stub_syscall1(__NR_exit, 12); + + /* setup signal stack inside stub data */ + stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; + stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); + + /* register SIGSEGV handler */ + sa.sa_handler_ = (void *) init_data.segv_handler; + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, + sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 13); + + stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + + stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); + + stub_syscall1(__NR_exit, 14); + + __builtin_unreachable(); +} + +__attribute__((naked)) void _start(void) +{ + /* + * Since the stack after exec() starts at the top-most address, + * but that's exactly where we also want to map the stub data + * and code, this must: + * - push the stack by 1 code and STUB_DATA_PAGES data pages + * - call real_init() + * This way, real_init() can use the stack normally, while the + * original stack further down (higher address) will become + * inaccessible after the mmap() calls above. + */ + stub_start(real_init); +} diff --git a/arch/um/kernel/skas/stub_exe_embed.S b/arch/um/kernel/skas/stub_exe_embed.S new file mode 100644 index 000000000000..6d8914fbe8f1 --- /dev/null +++ b/arch/um/kernel/skas/stub_exe_embed.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/init.h> +#include <linux/linkage.h> + +__INITDATA + +SYM_DATA_START(stub_exe_start) + .incbin "arch/um/kernel/skas/stub_exe" +SYM_DATA_END_LABEL(stub_exe_start, SYM_L_GLOBAL, stub_exe_end) + +__FINIT diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 4bb8622dc512..13ee5666668d 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -32,12 +32,6 @@ void show_stack(struct task_struct *task, unsigned long *stack, struct pt_regs *segv_regs = current->thread.segv_regs; int i; - if (!segv_regs && os_is_signal_stack()) { - pr_err("Received SIGSEGV in SIGSEGV handler," - " aborting stack trace!\n"); - return; - } - if (!stack) stack = get_stack_pointer(task, segv_regs); @@ -52,5 +46,5 @@ void show_stack(struct task_struct *task, unsigned long *stack, } printk("%sCall Trace:\n", loglvl); - dump_trace(current, &stackops, (void *)loglvl); + dump_trace(task ?: current, &stackops, (void *)loglvl); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 29b27b90581f..1394568c0210 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -25,6 +25,8 @@ #include <shared/init.h> #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +#include <linux/sched/clock.h> + enum time_travel_mode time_travel_mode; EXPORT_SYMBOL_GPL(time_travel_mode); @@ -47,6 +49,15 @@ static u16 time_travel_shm_id; static struct um_timetravel_schedshm *time_travel_shm; static union um_timetravel_schedshm_client *time_travel_shm_client; +unsigned long tt_extra_sched_jiffies; + +notrace unsigned long long sched_clock(void) +{ + return (unsigned long long)(jiffies - INITIAL_JIFFIES + + tt_extra_sched_jiffies) + * (NSEC_PER_SEC / HZ); +} + static void time_travel_set_time(unsigned long long ns) { if (unlikely(ns < time_travel_time)) @@ -443,6 +454,11 @@ static void time_travel_periodic_timer(struct time_travel_event *e) { time_travel_add_event(&time_travel_timer_event, time_travel_time + time_travel_timer_interval); + + /* clock tick; decrease extra jiffies by keeping sched_clock constant */ + if (tt_extra_sched_jiffies > 0) + tt_extra_sched_jiffies -= 1; + deliver_alarm(); } @@ -594,6 +610,10 @@ EXPORT_SYMBOL_GPL(time_travel_add_irq_event); static void time_travel_oneshot_timer(struct time_travel_event *e) { + /* clock tick; decrease extra jiffies by keeping sched_clock constant */ + if (tt_extra_sched_jiffies > 0) + tt_extra_sched_jiffies -= 1; + deliver_alarm(); } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 548af31d4111..cf7e0d4407f2 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -23,9 +23,6 @@ struct vm_ops { int phys_fd, unsigned long long offset); int (*unmap)(struct mm_id *mm_idp, unsigned long virt, unsigned long len); - int (*mprotect)(struct mm_id *mm_idp, - unsigned long virt, unsigned long len, - unsigned int prot); }; static int kern_map(struct mm_id *mm_idp, @@ -44,15 +41,6 @@ static int kern_unmap(struct mm_id *mm_idp, return os_unmap_memory((void *)virt, len); } -static int kern_mprotect(struct mm_id *mm_idp, - unsigned long virt, unsigned long len, - unsigned int prot) -{ - return os_protect_memory((void *)virt, len, - prot & UM_PROT_READ, prot & UM_PROT_WRITE, - 1); -} - void report_enomem(void) { printk(KERN_ERR "UML ran out of memory on the host side! " @@ -65,33 +53,37 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, struct vm_ops *ops) { pte_t *pte; - int r, w, x, prot, ret = 0; + int ret = 0; pte = pte_offset_kernel(pmd, addr); do { - r = pte_read(*pte); - w = pte_write(*pte); - x = pte_exec(*pte); - if (!pte_young(*pte)) { - r = 0; - w = 0; - } else if (!pte_dirty(*pte)) - w = 0; - - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | - (x ? UM_PROT_EXEC : 0)); - if (pte_newpage(*pte)) { - if (pte_present(*pte)) { - __u64 offset; - unsigned long phys = pte_val(*pte) & PAGE_MASK; - int fd = phys_mapping(phys, &offset); - - ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, - prot, fd, offset); - } else - ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); - } else if (pte_newprot(*pte)) - ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot); + if (!pte_needsync(*pte)) + continue; + + if (pte_present(*pte)) { + __u64 offset; + unsigned long phys = pte_val(*pte) & PAGE_MASK; + int fd = phys_mapping(phys, &offset); + int r, w, x, prot; + + r = pte_read(*pte); + w = pte_write(*pte); + x = pte_exec(*pte); + if (!pte_young(*pte)) { + r = 0; + w = 0; + } else if (!pte_dirty(*pte)) + w = 0; + + prot = (r ? UM_PROT_READ : 0) | + (w ? UM_PROT_WRITE : 0) | + (x ? UM_PROT_EXEC : 0); + + ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, + prot, fd, offset); + } else + ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); + *pte = pte_mkuptodate(*pte); } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); return ret; @@ -109,7 +101,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); if (!pmd_present(*pmd)) { - if (pmd_newpage(*pmd)) { + if (pmd_needsync(*pmd)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); pmd_mkuptodate(*pmd); @@ -132,7 +124,7 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr, do { next = pud_addr_end(addr, end); if (!pud_present(*pud)) { - if (pud_newpage(*pud)) { + if (pud_needsync(*pud)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); pud_mkuptodate(*pud); @@ -155,7 +147,7 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, do { next = p4d_addr_end(addr, end); if (!p4d_present(*p4d)) { - if (p4d_newpage(*p4d)) { + if (p4d_needsync(*p4d)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); p4d_mkuptodate(*p4d); @@ -180,18 +172,16 @@ int um_tlb_sync(struct mm_struct *mm) if (mm == &init_mm) { ops.mmap = kern_map; ops.unmap = kern_unmap; - ops.mprotect = kern_mprotect; } else { ops.mmap = map; ops.unmap = unmap; - ops.mprotect = protect; } pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); if (!pgd_present(*pgd)) { - if (pgd_newpage(*pgd)) { + if (pgd_needsync(*pgd)) { ret = ops.unmap(ops.mm_idp, addr, next - addr); pgd_mkuptodate(*pgd); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 97c8df9c4401..cdaee3e94273 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -201,7 +201,6 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, struct uml_pt_regs *regs) { - jmp_buf *catcher; int si_code; int err; int is_write = FAULT_WRITE(fi); @@ -246,15 +245,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, address = 0; } - catcher = current->thread.fault_catcher; if (!err) goto out; - else if (catcher != NULL) { - current->thread.fault_addr = (void *) address; - UML_LONGJMP(catcher, 1); - } - else if (current->thread.fault_addr != NULL) - panic("fault_addr set but no fault catcher"); else if (!is_user && arch_fixup(ip, regs)) goto out; @@ -310,14 +302,6 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) } } -void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs) -{ - if (current->thread.fault_catcher != NULL) - UML_LONGJMP(current->thread.fault_catcher, 1); - else - relay_signal(sig, si, regs); -} - void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { do_IRQ(WINCH_IRQ, regs); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index e8e8b54b3037..8037a967225d 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -65,9 +65,6 @@ struct cpuinfo_um boot_cpu_data = { EXPORT_SYMBOL(boot_cpu_data); -union thread_union cpu0_irqstack - __section(".data..init_irqstack") = - { .thread_info = INIT_THREAD_INFO(init_task) }; /* Changed in setup_arch, which is called in early boot */ static char host_info[(__NEW_UTS_LEN + 1) * 5]; @@ -131,7 +128,7 @@ static int have_root __initdata; static int have_console __initdata; /* Set in uml_mem_setup and modified in linux_main */ -long long physmem_size = 64 * 1024 * 1024; +unsigned long long physmem_size = 64 * 1024 * 1024; EXPORT_SYMBOL(physmem_size); static const char *usage_string = @@ -167,19 +164,6 @@ __uml_setup("root=", uml_root_setup, " root=/dev/ubd5\n\n" ); -static int __init no_skas_debug_setup(char *line, int *add) -{ - os_warn("'debug' is not necessary to gdb UML in skas mode - run\n"); - os_warn("'gdb linux'\n"); - - return 0; -} - -__uml_setup("debug", no_skas_debug_setup, -"debug\n" -" this flag is not needed to run gdb on UML in skas mode\n\n" -); - static int __init uml_console_setup(char *line, int *add) { have_console = 1; @@ -257,6 +241,8 @@ static struct notifier_block panic_exit_notifier = { void uml_finishsetup(void) { + cpu_tasks[0] = &init_task; + atomic_notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); @@ -302,7 +288,24 @@ static void parse_cache_line(char *line) } } -int __init linux_main(int argc, char **argv) +static unsigned long get_top_address(char **envp) +{ + unsigned long top_addr = (unsigned long) &top_addr; + int i; + + /* The earliest variable should be after the program name in ELF */ + for (i = 0; envp[i]; i++) { + if ((unsigned long) envp[i] > top_addr) + top_addr = (unsigned long) envp[i]; + } + + top_addr &= ~(UM_KERN_PAGE_SIZE - 1); + top_addr += UM_KERN_PAGE_SIZE; + + return top_addr; +} + +int __init linux_main(int argc, char **argv, char **envp) { unsigned long avail, diff; unsigned long virtmem_size, max_physmem; @@ -324,20 +327,23 @@ int __init linux_main(int argc, char **argv) if (have_console == 0) add_arg(DEFAULT_COMMAND_LINE_CONSOLE); - host_task_size = os_get_top_address(); - /* reserve a few pages for the stubs (taking care of data alignment) */ - /* align the data portion */ - BUILD_BUG_ON(!is_power_of_2(STUB_DATA_PAGES)); - stub_start = (host_task_size - 1) & ~(STUB_DATA_PAGES * PAGE_SIZE - 1); + host_task_size = get_top_address(envp); + /* reserve a few pages for the stubs */ + stub_start = host_task_size - STUB_DATA_PAGES * PAGE_SIZE; /* another page for the code portion */ stub_start -= PAGE_SIZE; host_task_size = stub_start; + /* Limit TASK_SIZE to what is addressable by the page table */ + task_size = host_task_size; + if (task_size > (unsigned long long) PTRS_PER_PGD * PGDIR_SIZE) + task_size = PTRS_PER_PGD * PGDIR_SIZE; + /* * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps * out */ - task_size = host_task_size & PGDIR_MASK; + task_size = task_size & PGDIR_MASK; /* OS sanity checks that need to happen before the kernel runs */ os_early_checks(); @@ -366,18 +372,15 @@ int __init linux_main(int argc, char **argv) setup_machinename(init_utsname()->machine); - highmem = 0; + physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK; iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; + max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; - /* - * Zones have to begin on a 1 << MAX_PAGE_ORDER page boundary, - * so this makes sure that's true for highmem - */ - max_physmem &= ~((1 << (PAGE_SHIFT + MAX_PAGE_ORDER)) - 1); if (physmem_size + iomem_size > max_physmem) { - highmem = physmem_size + iomem_size - max_physmem; - physmem_size -= highmem; + physmem_size = max_physmem - iomem_size; + os_info("Physical memory size shrunk to %llu bytes\n", + physmem_size); } high_physmem = uml_physmem + physmem_size; @@ -398,6 +401,8 @@ int __init linux_main(int argc, char **argv) os_info("Kernel virtual memory size shrunk to %lu bytes\n", virtmem_size); + arch_task_struct_size = sizeof(struct task_struct) + host_fp_size; + os_flush_stdout(); return start_uml(); @@ -412,9 +417,9 @@ void __init setup_arch(char **cmdline_p) { u8 rng_seed[32]; - stack_protections((unsigned long) &init_thread_info); - setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); - mem_total_pages(physmem_size, iomem_size, highmem); + stack_protections((unsigned long) init_task.stack); + setup_physmem(uml_physmem, uml_reserved, physmem_size); + mem_total_pages(physmem_size, iomem_size); uml_dtb_init(); read_initrd(); diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 5c92d58a78e8..a409d4b66114 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -77,8 +77,6 @@ SECTIONS .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) - . = ALIGN(KERNEL_STACK_SIZE); - *(.data..init_irqstack) DATA_DATA *(.gnu.linkonce.d*) CONSTRUCTORS diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 544e0b344c75..049dfa5bc9c6 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -12,6 +12,8 @@ obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ CFLAGS_signal.o += -Wframe-larger-than=4096 +CFLAGS_main.o += -Wno-frame-larger-than + obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index f1d03cf3957f..a0d01c68ce3e 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -255,12 +255,6 @@ void os_close_file(int fd) { close(fd); } -int os_fsync_file(int fd) -{ - if (fsync(fd) < 0) - return -errno; - return 0; -} int os_seek_file(int fd, unsigned long long offset) { diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index f98ff79cdbf7..0afcdeb8995b 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -11,6 +11,7 @@ #include <signal.h> #include <string.h> #include <sys/resource.h> +#include <sys/personality.h> #include <as-layout.h> #include <init.h> #include <kern_util.h> @@ -108,6 +109,21 @@ int __init main(int argc, char **argv, char **envp) char **new_argv; int ret, i, err; + /* Disable randomization and re-exec if it was changed successfully */ + ret = personality(PER_LINUX | ADDR_NO_RANDOMIZE); + if (ret >= 0 && (ret & (PER_LINUX | ADDR_NO_RANDOMIZE)) != + (PER_LINUX | ADDR_NO_RANDOMIZE)) { + char buf[4096] = {}; + ssize_t ret; + + ret = readlink("/proc/self/exe", buf, sizeof(buf)); + if (ret < 0 || ret >= sizeof(buf)) { + perror("readlink failure"); + exit(1); + } + execve(buf, argv, envp); + } + set_stklim(); setup_env_path(); @@ -140,7 +156,7 @@ int __init main(int argc, char **argv, char **envp) #endif change_sig(SIGPIPE, 0); - ret = linux_main(argc, argv); + ret = linux_main(argc, argv, envp); /* * Disable SIGPROF - I have no idea why libc doesn't do this or turn @@ -182,6 +198,7 @@ int __init main(int argc, char **argv, char **envp) } extern void *__real_malloc(int); +extern void __real_free(void *); /* workaround for -Wmissing-prototypes warnings */ void *__wrap_malloc(int size); @@ -219,10 +236,6 @@ void *__wrap_calloc(int n, int size) return ptr; } -extern void __real_free(void *); - -extern unsigned long high_physmem; - void __wrap_free(void *ptr) { unsigned long addr = (unsigned long) ptr; diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index cf44d386f23c..72f302f4d197 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -39,10 +39,22 @@ void kasan_map_memory(void *start, size_t len) strerror(errno)); exit(1); } + + if (madvise(start, len, MADV_DONTDUMP)) { + os_info("Couldn't set MAD_DONTDUMP on shadow memory: %s\n.", + strerror(errno)); + exit(1); + } + + if (madvise(start, len, MADV_DONTFORK)) { + os_info("Couldn't set MADV_DONTFORK on shadow memory: %s\n.", + strerror(errno)); + exit(1); + } } /* Set by make_tempfile() during early boot. */ -static char *tempdir = NULL; +char *tempdir = NULL; /* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */ static int __init check_tmpfs(const char *dir) diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index e52dd37ddadc..9f086f939420 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -12,94 +12,18 @@ #include <fcntl.h> #include <sys/mman.h> #include <sys/ptrace.h> +#include <sys/prctl.h> #include <sys/wait.h> #include <asm/unistd.h> #include <init.h> #include <longjmp.h> #include <os.h> -#define ARBITRARY_ADDR -1 -#define FAILURE_PID -1 - -#define STAT_PATH_LEN sizeof("/proc/#######/stat\0") -#define COMM_SCANF "%*[^)])" - -unsigned long os_process_pc(int pid) -{ - char proc_stat[STAT_PATH_LEN], buf[256]; - unsigned long pc = ARBITRARY_ADDR; - int fd, err; - - sprintf(proc_stat, "/proc/%d/stat", pid); - fd = open(proc_stat, O_RDONLY, 0); - if (fd < 0) { - printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', " - "errno = %d\n", proc_stat, errno); - goto out; - } - CATCH_EINTR(err = read(fd, buf, sizeof(buf))); - if (err < 0) { - printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', " - "err = %d\n", proc_stat, errno); - goto out_close; - } - os_close_file(fd); - pc = ARBITRARY_ADDR; - if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %lu", &pc) != 1) - printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n", - buf); - out_close: - close(fd); - out: - return pc; -} - -int os_process_parent(int pid) -{ - char stat[STAT_PATH_LEN]; - char data[256]; - int parent = FAILURE_PID, n, fd; - - if (pid == -1) - return parent; - - snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); - fd = open(stat, O_RDONLY, 0); - if (fd < 0) { - printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat, - errno); - return parent; - } - - CATCH_EINTR(n = read(fd, data, sizeof(data))); - close(fd); - - if (n < 0) { - printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat, - errno); - return parent; - } - - parent = FAILURE_PID; - n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent); - if (n != 1) - printk(UM_KERN_ERR "Failed to scan '%s'\n", data); - - return parent; -} - void os_alarm_process(int pid) { kill(pid, SIGALRM); } -void os_stop_process(int pid) -{ - kill(pid, SIGSTOP); -} - void os_kill_process(int pid, int reap_child) { kill(pid, SIGKILL); @@ -130,11 +54,6 @@ int os_getpid(void) return syscall(__NR_getpid); } -int os_getpgrp(void) -{ - return getpgrp(); -} - int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x) { @@ -285,3 +204,8 @@ void init_new_thread_signals(void) set_handler(SIGIO); signal(SIGWINCH, SIG_IGN); } + +void os_set_pdeathsig(void) +{ + prctl(PR_SET_PDEATHSIG, SIGKILL); +} diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index bd80b921add0..d7ca148807b2 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -10,11 +10,12 @@ #include <sysdep/ptrace.h> #include <sysdep/ptrace_user.h> #include <registers.h> +#include <stdlib.h> /* This is set once at boot time and not changed thereafter */ static unsigned long exec_regs[MAX_REG_NR]; -static unsigned long exec_fp_regs[FP_SIZE]; +static unsigned long *exec_fp_regs; int init_pid_registers(int pid) { @@ -24,7 +25,11 @@ int init_pid_registers(int pid) if (err < 0) return -errno; - arch_init_registers(pid); + err = arch_init_registers(pid); + if (err < 0) + return err; + + exec_fp_regs = malloc(host_fp_size); get_fp_registers(pid, exec_fp_regs); return 0; } @@ -34,5 +39,5 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs) memcpy(regs, exec_regs, sizeof(exec_regs)); if (fp_regs) - memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs)); + memcpy(fp_regs, exec_fp_regs, host_fp_size); } diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 9e71794839e8..9aac8def4d63 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -55,6 +55,7 @@ static int write_sigio_thread(void *unused) int i, n, respond_fd; char c; + os_set_pdeathsig(); os_fix_helper_signals(); fds = ¤t_poll; while (1) { diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index b11ed66c8bb0..9ea7269ffb77 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -26,7 +26,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGFPE] = relay_signal, [SIGILL] = relay_signal, [SIGWINCH] = winch, - [SIGBUS] = bus_handler, + [SIGBUS] = relay_signal, [SIGSEGV] = segv_handler, [SIGIO] = sigio_handler, }; @@ -65,7 +65,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGALRM_MASK (1 << SIGALRM_BIT) int signals_enabled; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; #endif static unsigned int signals_pending; @@ -75,7 +75,7 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { int enabled = signals_enabled; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) if ((signals_blocked || __atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) && (sig == SIGIO)) { @@ -190,43 +190,8 @@ static void hard_handler(int sig, siginfo_t *si, void *p) { ucontext_t *uc = p; mcontext_t *mc = &uc->uc_mcontext; - unsigned long pending = 1UL << sig; - do { - int nested, bail; - - /* - * pending comes back with one bit set for each - * interrupt that arrived while setting up the stack, - * plus a bit for this interrupt, plus the zero bit is - * set if this is a nested interrupt. - * If bail is true, then we interrupted another - * handler setting up the stack. In this case, we - * have to return, and the upper handler will deal - * with this interrupt. - */ - bail = to_irq_stack(&pending); - if (bail) - return; - - nested = pending & 1; - pending &= ~1; - - while ((sig = ffs(pending)) != 0){ - sig--; - pending &= ~(1 << sig); - (*handlers[sig])(sig, (struct siginfo *)si, mc); - } - - /* - * Again, pending comes back with a mask of signals - * that arrived while tearing down the stack. If this - * is non-zero, we just go back, set up the stack - * again, and handle the new interrupts. - */ - if (!nested) - pending = from_irq_stack(nested); - } while (pending); + (*handlers[sig])(sig, (struct siginfo *)si, mc); } void set_handler(int sig) @@ -297,7 +262,7 @@ void unblock_signals(void) return; signals_enabled = 1; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) deliver_time_travel_irqs(); #endif @@ -389,7 +354,7 @@ int um_set_signals_trace(int enable) return ret; } -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) void mark_sigio_pending(void) { /* @@ -487,11 +452,3 @@ void unblock_signals_hard(void) unblocking = false; } #endif - -int os_is_signal_stack(void) -{ - stack_t ss; - sigaltstack(NULL, &ss); - - return ss.ss_flags & SS_ONSTACK; -} diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index 9a13ac23c606..d7f1814b0e5a 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -217,24 +217,3 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len) return 0; } - -int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, - unsigned int prot) -{ - struct stub_syscall *sc; - - /* Compress with previous syscall if that is possible */ - sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr); - if (sc && sc->mem.prot == prot) { - sc->mem.length += len; - return 0; - } - - sc = syscall_stub_alloc(mm_idp); - sc->syscall = STUB_SYSCALL_MPROTECT; - sc->mem.addr = addr; - sc->mem.length = len; - sc->mem.prot = prot; - - return 0; -} diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index b6f656bcffb1..f683cfc9e51a 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -10,8 +10,11 @@ #include <sched.h> #include <errno.h> #include <string.h> +#include <fcntl.h> +#include <mem_user.h> #include <sys/mman.h> #include <sys/wait.h> +#include <sys/stat.h> #include <asm/unistd.h> #include <as-layout.h> #include <init.h> @@ -141,16 +144,10 @@ bad_wait: extern unsigned long current_stub_stack(void); -static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs) +static void get_skas_faultinfo(int pid, struct faultinfo *fi) { int err; - err = get_fp_registers(pid, aux_fp_regs); - if (err < 0) { - printk(UM_KERN_ERR "save_fp_registers returned %d\n", - err); - fatal_sigsegv(); - } err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); if (err) { printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " @@ -164,18 +161,11 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux * the stub stack page. We just have to copy it. */ memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); - - err = put_fp_registers(pid, aux_fp_regs); - if (err < 0) { - printk(UM_KERN_ERR "put_fp_registers returned %d\n", - err); - fatal_sigsegv(); - } } -static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp_regs) +static void handle_segv(int pid, struct uml_pt_regs *regs) { - get_skas_faultinfo(pid, ®s->faultinfo, aux_fp_regs); + get_skas_faultinfo(pid, ®s->faultinfo); segv(regs->faultinfo, 0, 1, NULL); } @@ -189,69 +179,131 @@ static void handle_trap(int pid, struct uml_pt_regs *regs) extern char __syscall_stub_start[]; -/** - * userspace_tramp() - userspace trampoline - * @stack: pointer to the new userspace stack page - * - * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed. - * This function will run on a temporary stack page. - * It ptrace()'es itself, then - * Two pages are mapped into the userspace address space: - * - STUB_CODE (with EXEC), which contains the skas stub code - * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel. - * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process. - * And last the process stops itself to give control to the UML kernel for this userspace process. - * - * Return: Always zero, otherwise the current userspace process is ended with non null exit() call - */ +static int stub_exe_fd; + static int userspace_tramp(void *stack) { - struct sigaction sa; - void *addr; - int fd; + char *const argv[] = { "uml-userspace", NULL }; + int pipe_fds[2]; unsigned long long offset; - unsigned long segv_handler = STUB_CODE + - (unsigned long) stub_segv_handler - - (unsigned long) __syscall_stub_start; - - ptrace(PTRACE_TRACEME, 0, 0, 0); - - signal(SIGTERM, SIG_DFL); - signal(SIGWINCH, SIG_IGN); - - fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); - addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, - PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); - if (addr == MAP_FAILED) { - os_info("mapping mmap stub at 0x%lx failed, errno = %d\n", - STUB_CODE, errno); - exit(1); + struct stub_init_data init_data = { + .stub_start = STUB_START, + .segv_handler = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) __syscall_stub_start, + }; + struct iomem_region *iomem; + int ret; + + init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), + &offset); + init_data.stub_code_offset = MMAP_OFFSET(offset); + + init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); + init_data.stub_data_offset = MMAP_OFFSET(offset); + + /* Set CLOEXEC on all FDs and then unset on all memory related FDs */ + close_range(0, ~0U, CLOSE_RANGE_CLOEXEC); + + fcntl(init_data.stub_data_fd, F_SETFD, 0); + for (iomem = iomem_regions; iomem; iomem = iomem->next) + fcntl(iomem->fd, F_SETFD, 0); + + /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */ + if (pipe(pipe_fds)) + exit(2); + + if (dup2(pipe_fds[0], 0) < 0) + exit(3); + close(pipe_fds[0]); + + /* Write init_data and close write side */ + ret = write(pipe_fds[1], &init_data, sizeof(init_data)); + close(pipe_fds[1]); + + if (ret != sizeof(init_data)) + exit(4); + + execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH); + + exit(5); +} + +extern char stub_exe_start[]; +extern char stub_exe_end[]; + +extern char *tempdir; + +#define STUB_EXE_NAME_TEMPLATE "/uml-userspace-XXXXXX" + +#ifndef MFD_EXEC +#define MFD_EXEC 0x0010U +#endif + +static int __init init_stub_exe_fd(void) +{ + size_t written = 0; + char *tmpfile = NULL; + + stub_exe_fd = memfd_create("uml-userspace", + MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING); + + if (stub_exe_fd < 0) { + printk(UM_KERN_INFO "Could not create executable memfd, using temporary file!"); + + tmpfile = malloc(strlen(tempdir) + + strlen(STUB_EXE_NAME_TEMPLATE) + 1); + if (tmpfile == NULL) + panic("Failed to allocate memory for stub binary name"); + + strcpy(tmpfile, tempdir); + strcat(tmpfile, STUB_EXE_NAME_TEMPLATE); + + stub_exe_fd = mkstemp(tmpfile); + if (stub_exe_fd < 0) + panic("Could not create temporary file for stub binary: %d", + -errno); } - fd = phys_mapping(uml_to_phys(stack), &offset); - addr = mmap((void *) STUB_DATA, - STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_SHARED, fd, offset); - if (addr == MAP_FAILED) { - os_info("mapping segfault stack at 0x%lx failed, errno = %d\n", - STUB_DATA, errno); - exit(1); + while (written < stub_exe_end - stub_exe_start) { + ssize_t res = write(stub_exe_fd, stub_exe_start + written, + stub_exe_end - stub_exe_start - written); + if (res < 0) { + if (errno == EINTR) + continue; + + if (tmpfile) + unlink(tmpfile); + panic("Failed write stub binary: %d", -errno); + } + + written += res; } - set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; - sa.sa_sigaction = (void *) segv_handler; - sa.sa_restorer = NULL; - if (sigaction(SIGSEGV, &sa, NULL) < 0) { - os_info("%s - setting SIGSEGV handler failed - errno = %d\n", - __func__, errno); - exit(1); + if (!tmpfile) { + fcntl(stub_exe_fd, F_ADD_SEALS, + F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL); + } else { + if (fchmod(stub_exe_fd, 00500) < 0) { + unlink(tmpfile); + panic("Could not make stub binary executable: %d", + -errno); + } + + close(stub_exe_fd); + stub_exe_fd = open(tmpfile, O_RDONLY | O_CLOEXEC | O_NOFOLLOW); + if (stub_exe_fd < 0) { + unlink(tmpfile); + panic("Could not reopen stub binary: %d", -errno); + } + + unlink(tmpfile); + free(tmpfile); } - kill(os_getpid(), SIGSTOP); return 0; } +__initcall(init_stub_exe_fd); int userspace_pid[NR_CPUS]; @@ -270,7 +322,7 @@ int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n, flags, err; + int pid, status, n, err; /* setup a temporary stack page */ stack = mmap(NULL, UM_KERN_PAGE_SIZE, @@ -286,10 +338,10 @@ int start_userspace(unsigned long stub_stack) /* set stack pointer to the end of the stack page, so it can grow downwards */ sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; - flags = CLONE_FILES | SIGCHLD; - /* clone into new userspace process */ - pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); + pid = clone(userspace_tramp, (void *) sp, + CLONE_VFORK | CLONE_VM | SIGCHLD, + (void *)stub_stack); if (pid < 0) { err = -errno; printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", @@ -336,7 +388,10 @@ int start_userspace(unsigned long stub_stack) return err; } -void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) +int unscheduled_userspace_iterations; +extern unsigned long tt_extra_sched_jiffies; + +void userspace(struct uml_pt_regs *regs) { int err, status, op, pid = userspace_pid[0]; siginfo_t si; @@ -345,6 +400,29 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) interrupt_end(); while (1) { + /* + * When we are in time-travel mode, userspace can theoretically + * do a *lot* of work without being scheduled. The problem with + * this is that it will prevent kernel bookkeeping (primarily + * the RCU) from running and this can for example cause OOM + * situations. + * + * This code accounts a jiffie against the scheduling clock + * after the defined userspace iterations in the same thread. + * By doing so the situation is effectively prevented. + */ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { +#ifdef CONFIG_UML_MAX_USERSPACE_ITERATIONS + if (CONFIG_UML_MAX_USERSPACE_ITERATIONS && + unscheduled_userspace_iterations++ > + CONFIG_UML_MAX_USERSPACE_ITERATIONS) { + tt_extra_sched_jiffies += 1; + unscheduled_userspace_iterations = 0; + } +#endif + } + time_travel_print_bc_msg(); current_mm_sync(); @@ -435,11 +513,11 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) case SIGSEGV: if (PTRACE_FULL_FAULTINFO) { get_skas_faultinfo(pid, - ®s->faultinfo, aux_fp_regs); + ®s->faultinfo); (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, regs); } - else handle_segv(pid, regs, aux_fp_regs); + else handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs); @@ -487,6 +565,8 @@ void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) void switch_threads(jmp_buf *me, jmp_buf *you) { + unscheduled_userspace_iterations = 0; + if (UML_SETJMP(me) == 0) UML_LONGJMP(you, 1); } @@ -570,6 +650,7 @@ static bool noreboot; static int __init noreboot_cmd_param(char *str, int *add) { + *add = 0; noreboot = true; return 0; } diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index e09d65b05d1c..eb523ab1e218 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -358,6 +358,8 @@ char *get_umid(void) static int __init set_uml_dir(char *name, int *add) { + *add = 0; + if (*name == '\0') { os_warn("uml_dir can't be an empty string\n"); return 0; diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 1dca4ffbd572..4193e04d7e4a 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -52,8 +52,8 @@ void setup_machinename(char *machine_out) struct utsname host; uname(&host); -#ifdef UML_CONFIG_UML_X86 -# ifndef UML_CONFIG_64BIT +#if IS_ENABLED(CONFIG_UML_X86) +# if !IS_ENABLED(CONFIG_64BIT) if (!strcmp(host.machine, "x86_64")) { strcpy(machine_out, "i686"); return; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6c633d93c639..9d7bd0ae48c4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -128,6 +128,8 @@ config X86 select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_RT + select ARCH_SUPPORTS_AUTOFDO_CLANG + select ARCH_SUPPORTS_PROPELLER_CLANG if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if X86_CMPXCHG64 select ARCH_USE_MEMTEST diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 580636cdc257..4d3c9d00d6b6 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -34,4 +34,8 @@ static inline void __tlb_remove_table(void *table) free_page_and_swap_cache(table); } +static inline void invlpg(unsigned long addr) +{ + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); +} #endif /* _ASM_X86_TLB_H */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 823f44f7bc94..d8408aafeed9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -798,6 +798,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static const struct x86_cpu_desc erratum_1386_microcode[] = { AMD_CPU_DESC(0x17, 0x1, 0x2, 0x0800126e), AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052), + {}, }; static void fix_erratum_1386(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ca327cfa42ae..a5c28975c608 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -169,7 +169,7 @@ static void ppin_init(struct cpuinfo_x86 *c) } clear_ppin: - clear_cpu_cap(c, info->feature); + setup_clear_cpu_cap(info->feature); } static void default_init(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 31a73715d755..fb5d0c67fbab 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -34,6 +34,7 @@ #include <asm/setup.h> #include <asm/cpu.h> #include <asm/msr.h> +#include <asm/tlb.h> #include "internal.h" @@ -483,11 +484,25 @@ static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) } } -static int __apply_microcode_amd(struct microcode_amd *mc) +static int __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) { + unsigned long p_addr = (unsigned long)&mc->hdr.data_code; u32 rev, dummy; - native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc->hdr.data_code); + native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); + + if (x86_family(bsp_cpuid_1_eax) == 0x17) { + unsigned long p_addr_end = p_addr + psize - 1; + + invlpg(p_addr); + + /* + * Flush next page too if patch image is crossing a page + * boundary. + */ + if (p_addr >> PAGE_SHIFT != p_addr_end >> PAGE_SHIFT) + invlpg(p_addr_end); + } /* verify patch application was successful */ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); @@ -529,7 +544,7 @@ static bool early_apply_microcode(u32 old_rev, void *ucode, size_t size) if (old_rev > mc->hdr.patch_id) return ret; - return !__apply_microcode_amd(mc); + return !__apply_microcode_amd(mc, desc.psize); } static bool get_builtin_microcode(struct cpio_data *cp) @@ -745,7 +760,7 @@ void reload_ucode_amd(unsigned int cpu) rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) + if (!__apply_microcode_amd(mc, p->size)) pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); } } @@ -798,7 +813,7 @@ static enum ucode_state apply_microcode_amd(int cpu) goto out; } - if (__apply_microcode_amd(mc_amd)) { + if (__apply_microcode_amd(mc_amd, p->size)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); return UCODE_ERROR; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 68efd8cd8bf1..fab3ac9a4574 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -420,6 +420,10 @@ SECTIONS STABS_DEBUG DWARF_DEBUG +#ifdef CONFIG_PROPELLER_CLANG + .llvm_bb_addr_map : { *(.llvm_bb_addr_map) } +#endif + ELF_DETAILS DISCARDS diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 069e421c2247..95bc50a8541c 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -354,7 +354,7 @@ bool cpu_cache_has_invalidate_memregion(void) { return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR); } -EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, DEVMEM); +EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM"); int cpu_cache_invalidate_memregion(int res_desc) { @@ -363,7 +363,7 @@ int cpu_cache_invalidate_memregion(int res_desc) wbinvd_on_all_cpus(); return 0; } -EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, DEVMEM); +EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, "DEVMEM"); #endif static void __cpa_flush_all(void *arg) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index b0d5a644fc84..a2becb85bea7 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -20,6 +20,7 @@ #include <asm/cacheflush.h> #include <asm/apic.h> #include <asm/perf_event.h> +#include <asm/tlb.h> #include "mm_internal.h" @@ -1140,7 +1141,7 @@ STATIC_NOPV void native_flush_tlb_one_user(unsigned long addr) bool cpu_pcide; /* Flush 'addr' from the kernel PCID: */ - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + invlpg(addr); /* If PTI is off there is no user PCID and nothing to flush. */ if (!static_cpu_has(X86_FEATURE_PTI)) diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 186f13268401..986045d5e638 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -10,6 +10,7 @@ config UML_X86 def_bool y select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 select DCACHE_WORD_ACCESS + select HAVE_EFFICIENT_UNALIGNED_ACCESS config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" @@ -28,17 +29,6 @@ config X86_64 def_bool 64BIT select MODULES_USE_ELF_RELA -config 3_LEVEL_PGTABLES - bool "Three-level pagetables" if !64BIT - default 64BIT - help - Three-level pagetables will let UML have more than 4G of physical - memory. All the memory that can't be mapped directly will be treated - as high memory. - - However, this it experimental on 32-bit architectures, so if unsure say - N (on x86-64 it's automatically enabled, instead, as it's safe there). - config ARCH_HAS_SC_SIGNALS def_bool !64BIT diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 36e67fc97c22..b42c31cd2390 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -10,7 +10,7 @@ else endif obj-y = bugs_$(BITS).o delay.o fault.o \ - ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ + ptrace.o ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ stub_segv.o \ sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ mem_$(BITS).o subarch.o os-Linux/ diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 6052200fe925..62ed5d68a978 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -8,6 +8,8 @@ #include <asm/user.h> #include <skas.h> +#define CORE_DUMP_USE_REGSET + #ifdef CONFIG_X86_32 #define R_386_NONE 0 diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index 2fef3da55533..2641d28d115c 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h @@ -2,6 +2,16 @@ #ifndef __UM_X86_PTRACE_H #define __UM_X86_PTRACE_H +/* This is here because signal.c needs the REGSET_FP_LEGACY definition */ +enum { + REGSET_GENERAL, +#ifdef CONFIG_X86_32 + REGSET_FP_LEGACY, +#endif + REGSET_FP, + REGSET_XSTATE, +}; + #include <linux/compiler.h> #ifndef CONFIG_X86_32 #define __FRAME_OFFSETS /* Needed to get the R* macros */ diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile index 5249bbc30dcd..77a308aaa5ec 100644 --- a/arch/x86/um/os-Linux/Makefile +++ b/arch/x86/um/os-Linux/Makefile @@ -3,7 +3,7 @@ # Licensed under the GPL # -obj-y = registers.o task_size.o mcontext.o +obj-y = registers.o mcontext.o obj-$(CONFIG_X86_32) += tls.o diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index f3638dd09cec..76eaeb93928c 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -16,133 +16,58 @@ #include <asm/sigcontext.h> #include <linux/elf.h> #include <registers.h> +#include <sys/mman.h> -static int have_xstate_support; +unsigned long host_fp_size; -int save_i387_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) - return -errno; - return 0; -} - -int save_fp_registers(int pid, unsigned long *fp_regs) +int get_fp_registers(int pid, unsigned long *regs) { -#ifdef PTRACE_GETREGSET - struct iovec iov; + struct iovec iov = { + .iov_base = regs, + .iov_len = host_fp_size, + }; - if (have_xstate_support) { - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) - return -errno; - return 0; - } else -#endif - return save_i387_registers(pid, fp_regs); -} - -int restore_i387_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) return -errno; return 0; } -int restore_fp_registers(int pid, unsigned long *fp_regs) -{ -#ifdef PTRACE_SETREGSET - struct iovec iov; - if (have_xstate_support) { - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) - return -errno; - return 0; - } else -#endif - return restore_i387_registers(pid, fp_regs); -} - -#ifdef __i386__ -int have_fpx_regs = 1; -int save_fpx_registers(int pid, unsigned long *fp_regs) +int put_fp_registers(int pid, unsigned long *regs) { - if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0) - return -errno; - return 0; -} + struct iovec iov = { + .iov_base = regs, + .iov_len = host_fp_size, + }; -int restore_fpx_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0) + if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) return -errno; return 0; } -int get_fp_registers(int pid, unsigned long *regs) -{ - if (have_fpx_regs) - return save_fpx_registers(pid, regs); - else - return save_fp_registers(pid, regs); -} - -int put_fp_registers(int pid, unsigned long *regs) -{ - if (have_fpx_regs) - return restore_fpx_registers(pid, regs); - else - return restore_fp_registers(pid, regs); -} - -void arch_init_registers(int pid) -{ - struct user_fpxregs_struct fpx_regs; - int err; - - err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs); - if (!err) - return; - - if (errno != EIO) - panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", - errno); - - have_fpx_regs = 0; -} -#else - -int get_fp_registers(int pid, unsigned long *regs) +int arch_init_registers(int pid) { - return save_fp_registers(pid, regs); + struct iovec iov = { + /* Just use plenty of space, it does not cost us anything */ + .iov_len = 2 * 1024 * 1024, + }; + int ret; + + iov.iov_base = mmap(NULL, iov.iov_len, PROT_WRITE | PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (iov.iov_base == MAP_FAILED) + return -ENOMEM; + + /* GDB has x86_xsave_length, which uses x86_cpuid_count */ + ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov); + if (ret) + ret = -errno; + munmap(iov.iov_base, 2 * 1024 * 1024); + + host_fp_size = iov.iov_len; + + return ret; } -int put_fp_registers(int pid, unsigned long *regs) -{ - return restore_fp_registers(pid, regs); -} - -void arch_init_registers(int pid) -{ -#ifdef PTRACE_GETREGSET - void * fp_regs; - struct iovec iov; - - fp_regs = malloc(FP_SIZE * sizeof(unsigned long)); - if(fp_regs == NULL) - return; - - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0) - have_xstate_support = 1; - - free(fp_regs); -#endif -} -#endif - unsigned long get_thread_reg(int reg, jmp_buf *buf) { switch (reg) { diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c deleted file mode 100644 index 1dc9adc20b1c..000000000000 --- a/arch/x86/um/os-Linux/task_size.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <stdlib.h> -#include <signal.h> -#include <sys/mman.h> -#include <longjmp.h> - -#ifdef __i386__ - -static jmp_buf buf; - -static void segfault(int sig) -{ - longjmp(buf, 1); -} - -static int page_ok(unsigned long page) -{ - unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); - unsigned long n = ~0UL; - void *mapped = NULL; - int ok = 0; - - /* - * First see if the page is readable. If it is, it may still - * be a VDSO, so we go on to see if it's writable. If not - * then try mapping memory there. If that fails, then we're - * still in the kernel area. As a sanity check, we'll fail if - * the mmap succeeds, but gives us an address different from - * what we wanted. - */ - if (setjmp(buf) == 0) - n = *address; - else { - mapped = mmap(address, UM_KERN_PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mapped == MAP_FAILED) - return 0; - if (mapped != address) - goto out; - } - - /* - * Now, is it writeable? If so, then we're in user address - * space. If not, then try mprotecting it and try the write - * again. - */ - if (setjmp(buf) == 0) { - *address = n; - ok = 1; - goto out; - } else if (mprotect(address, UM_KERN_PAGE_SIZE, - PROT_READ | PROT_WRITE) != 0) - goto out; - - if (setjmp(buf) == 0) { - *address = n; - ok = 1; - } - - out: - if (mapped != NULL) - munmap(mapped, UM_KERN_PAGE_SIZE); - return ok; -} - -unsigned long os_get_top_address(void) -{ - struct sigaction sa, old; - unsigned long bottom = 0; - /* - * A 32-bit UML on a 64-bit host gets confused about the VDSO at - * 0xffffe000. It is mapped, is readable, can be reprotected writeable - * and written. However, exec discovers later that it can't be - * unmapped. So, just set the highest address to be checked to just - * below it. This might waste some address space on 4G/4G 32-bit - * hosts, but shouldn't hurt otherwise. - */ - unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; - unsigned long test, original; - - printf("Locating the bottom of the address space ... "); - fflush(stdout); - - /* - * We're going to be longjmping out of the signal handler, so - * SA_DEFER needs to be set. - */ - sa.sa_handler = segfault; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_NODEFER; - if (sigaction(SIGSEGV, &sa, &old)) { - perror("os_get_top_address"); - exit(1); - } - - /* Manually scan the address space, bottom-up, until we find - * the first valid page (or run out of them). - */ - for (bottom = 0; bottom < top; bottom++) { - if (page_ok(bottom)) - break; - } - - /* If we've got this far, we ran out of pages. */ - if (bottom == top) { - fprintf(stderr, "Unable to determine bottom of address " - "space.\n"); - exit(1); - } - - printf("0x%lx\n", bottom << UM_KERN_PAGE_SHIFT); - printf("Locating the top of the address space ... "); - fflush(stdout); - - original = bottom; - - /* This could happen with a 4G/4G split */ - if (page_ok(top)) - goto out; - - do { - test = bottom + (top - bottom) / 2; - if (page_ok(test)) - bottom = test; - else - top = test; - } while (top - bottom > 1); - -out: - /* Restore the old SIGSEGV handling */ - if (sigaction(SIGSEGV, &old, NULL)) { - perror("os_get_top_address"); - exit(1); - } - top <<= UM_KERN_PAGE_SHIFT; - printf("0x%lx\n", top); - - return top; -} - -#else - -unsigned long os_get_top_address(void) -{ - /* The old value of CONFIG_TOP_ADDR */ - return 0x7fc0002000; -} - -#endif diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c new file mode 100644 index 000000000000..57c504fd5626 --- /dev/null +++ b/arch/x86/um/ptrace.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/sched.h> +#include <linux/elf.h> +#include <linux/regset.h> +#include <asm/user32.h> +#include <asm/sigcontext.h> + +#ifdef CONFIG_X86_32 +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) +{ + struct _fpxreg *st = NULL; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16) + + for (i = 0; i < 8; i++) { + if (twd & 0x1) { + st = (struct _fpxreg *) FPREG_ADDR(fxsave, i); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if (!st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3]) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if (st->significand[3] & 0x8000) + tag = 0; /* Valid */ + else + tag = 2; /* Special */ + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + +/* Get/set the old 32bit i387 registers (pre-FPX) */ +static int fpregs_legacy_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; + int i; + + membuf_store(&to, (unsigned long)fxsave->cwd | 0xffff0000ul); + membuf_store(&to, (unsigned long)fxsave->swd | 0xffff0000ul); + membuf_store(&to, twd_fxsr_to_i387(fxsave)); + membuf_store(&to, fxsave->fip); + membuf_store(&to, fxsave->fcs | ((unsigned long)fxsave->fop << 16)); + membuf_store(&to, fxsave->foo); + membuf_store(&to, fxsave->fos); + + for (i = 0; i < 8; i++) + membuf_write(&to, (void *)fxsave->st_space + i * 16, 10); + + return 0; +} + +static int fpregs_legacy_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; + const struct user_i387_struct *from; + struct user_i387_struct buf; + int i; + + if (ubuf) { + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + from = &buf; + } else { + from = kbuf; + } + + fxsave->cwd = (unsigned short)(from->cwd & 0xffff); + fxsave->swd = (unsigned short)(from->swd & 0xffff); + fxsave->twd = twd_i387_to_fxsr((unsigned short)(from->twd & 0xffff)); + fxsave->fip = from->fip; + fxsave->fop = (unsigned short)((from->fcs & 0xffff0000ul) >> 16); + fxsave->fcs = (from->fcs & 0xffff); + fxsave->foo = from->foo; + fxsave->fos = from->fos; + + for (i = 0; i < 8; i++) { + memcpy((void *)fxsave->st_space + i * 16, + (void *)from->st_space + i * 10, 10); + } + + return 0; +} +#endif + +static int genregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int reg; + + for (reg = 0; to.left; reg++) + membuf_store(&to, getreg(target, reg * sizeof(unsigned long))); + return 0; +} + +static int genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + if (kbuf) { + const unsigned long *k = kbuf; + + while (count >= sizeof(*k) && !ret) { + ret = putreg(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const unsigned long __user *u = ubuf; + + while (count >= sizeof(*u) && !ret) { + unsigned long word; + + ret = __get_user(word, u++); + if (ret) + break; + ret = putreg(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } + } + return ret; +} + +static int generic_fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return regset->n; +} + +static int generic_fpregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + void *fpregs = task_pt_regs(target)->regs.fp; + + membuf_write(&to, fpregs, regset->size * regset->n); + return 0; +} + +static int generic_fpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + void *fpregs = task_pt_regs(target)->regs.fp; + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, 0, regset->size * regset->n); +} + +static struct user_regset uml_regsets[] __ro_after_init = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_regs_struct) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .regset_get = genregs_get, + .set = genregs_set + }, +#ifdef CONFIG_X86_32 + /* Old FP registers, they are needed in signal frames */ + [REGSET_FP_LEGACY] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_i387_ia32_struct) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = fpregs_legacy_get, + .set = fpregs_legacy_set, + }, +#endif + [REGSET_FP] = { +#ifdef CONFIG_X86_32 + .core_note_type = NT_PRXFPREG, + .n = sizeof(struct user32_fxsr_struct) / sizeof(long), +#else + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_i387_struct) / sizeof(long), +#endif + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = generic_fpregs_get, + .set = generic_fpregs_set, + }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = generic_fpregs_get, + .set = generic_fpregs_set, + }, + /* TODO: Add TLS regset for 32bit */ +}; + +static const struct user_regset_view user_uml_view = { +#ifdef CONFIG_X86_32 + .name = "i386", .e_machine = EM_386, +#else + .name = "x86_64", .e_machine = EM_X86_64, +#endif + .regsets = uml_regsets, .n = ARRAY_SIZE(uml_regsets) +}; + +const struct user_regset_view * +task_user_regset_view(struct task_struct *tsk) +{ + return &user_uml_view; +} + +static int __init init_regset_xstate_info(void) +{ + uml_regsets[REGSET_XSTATE].n = + host_fp_size / uml_regsets[REGSET_XSTATE].size; + + return 0; +} +arch_initcall(init_regset_xstate_info); diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index b0a71c6cdc6e..3af3cb821524 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -6,6 +6,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/uaccess.h> +#include <linux/regset.h> #include <asm/ptrace-abi.h> #include <registers.h> #include <skas.h> @@ -168,65 +169,6 @@ int peek_user(struct task_struct *child, long addr, long data) return put_user(tmp, (unsigned long __user *) data); } -static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = task_cpu(child); - struct user_i387_struct fpregs; - - err = save_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if(n > 0) - return -EFAULT; - - return n; -} - -static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int n, cpu = task_cpu(child); - struct user_i387_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - -static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = task_cpu(child); - struct user_fxsr_struct fpregs; - - err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if(n > 0) - return -EFAULT; - - return n; -} - -static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) -{ - int n, cpu = task_cpu(child); - struct user_fxsr_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_fpx_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -234,17 +176,25 @@ long subarch_ptrace(struct task_struct *child, long request, void __user *datap = (void __user *) data; switch (request) { case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP_LEGACY, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP_LEGACY, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_GETFPXREGS: /* Get the child FPU state. */ - ret = get_fpxregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_fxsr_struct), + datap); case PTRACE_SETFPXREGS: /* Set the child FPU state. */ - ret = set_fpxregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_fxsr_struct), + datap); default: ret = -EIO; } diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index aa68d83d3f44..e0d4120a45c8 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -8,6 +8,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/errno.h> +#include <linux/regset.h> #define __FRAME_OFFSETS #include <asm/ptrace.h> #include <linux/uaccess.h> @@ -188,36 +189,6 @@ int peek_user(struct task_struct *child, long addr, long data) return put_user(tmp, (unsigned long *) data); } -static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = ((struct thread_info *) child->stack)->cpu; - struct user_i387_struct fpregs; - - err = save_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return n; -} - -static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int n, cpu = ((struct thread_info *) child->stack)->cpu; - struct user_i387_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -226,11 +197,15 @@ long subarch_ptrace(struct task_struct *child, long request, switch (request) { case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_ARCH_PRCTL: /* XXX Calls ptrace on the host - needs some SMP thinking */ ret = arch_prctl(child, data, (void __user *) addr); diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 6ca4ecabc55b..2dd4ca6713f8 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -56,12 +56,16 @@ struct syscall_args { UPT_SYSCALL_ARG5(r), \ UPT_SYSCALL_ARG6(r) } } ) +extern unsigned long host_fp_size; + struct uml_pt_regs { unsigned long gp[MAX_REG_NR]; - unsigned long fp[MAX_FP_NR]; struct faultinfo faultinfo; long syscall; int is_user; + + /* Dynamically sized FP registers (holds an XSTATE) */ + unsigned long fp[]; }; #define EMPTY_UML_PT_REGS { } @@ -72,4 +76,6 @@ struct uml_pt_regs { extern int user_context(unsigned long sp); +extern int arch_init_registers(int pid); + #endif /* __SYSDEP_X86_PTRACE_H */ diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h index 0c4989842fbe..2392470cac4d 100644 --- a/arch/x86/um/shared/sysdep/ptrace_32.h +++ b/arch/x86/um/shared/sysdep/ptrace_32.h @@ -6,8 +6,6 @@ #ifndef __SYSDEP_I386_PTRACE_H #define __SYSDEP_I386_PTRACE_H -#define MAX_FP_NR HOST_FPX_SIZE - #define UPT_SYSCALL_ARG1(r) UPT_BX(r) #define UPT_SYSCALL_ARG2(r) UPT_CX(r) #define UPT_SYSCALL_ARG3(r) UPT_DX(r) @@ -15,6 +13,4 @@ #define UPT_SYSCALL_ARG5(r) UPT_DI(r) #define UPT_SYSCALL_ARG6(r) UPT_BP(r) -extern void arch_init_registers(int pid); - #endif diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h index 0dc223aa1c2d..e73573ac871f 100644 --- a/arch/x86/um/shared/sysdep/ptrace_64.h +++ b/arch/x86/um/shared/sysdep/ptrace_64.h @@ -8,8 +8,6 @@ #ifndef __SYSDEP_X86_64_PTRACE_H #define __SYSDEP_X86_64_PTRACE_H -#define MAX_FP_NR HOST_FP_SIZE - #define REGS_R8(r) ((r)[HOST_R8]) #define REGS_R9(r) ((r)[HOST_R9]) #define REGS_R10(r) ((r)[HOST_R10]) @@ -57,6 +55,4 @@ #define UPT_SYSCALL_ARG5(r) UPT_R8(r) #define UPT_SYSCALL_ARG6(r) UPT_R9(r) -extern void arch_init_registers(int pid); - #endif diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h index 1d1a824fa652..98da23120538 100644 --- a/arch/x86/um/shared/sysdep/ptrace_user.h +++ b/arch/x86/um/shared/sysdep/ptrace_user.h @@ -11,12 +11,6 @@ #define REGS_IP_INDEX HOST_IP #define REGS_SP_INDEX HOST_SP -#ifdef __i386__ -#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE) -#else -#define FP_SIZE HOST_FP_SIZE -#endif - /* * glibc before 2.27 does not include PTRACE_SYSEMU_SINGLESTEP in its enum, * ensure we have a definition by (re-)defining it here. diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index 0b44a86dd346..390988132c0a 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -112,11 +112,23 @@ static __always_inline void *get_stub_data(void) unsigned long ret; asm volatile ( - "movl %%esp,%0 ;" - "andl %1,%0" + "call _here_%=;" + "_here_%=:" + "popl %0;" + "andl %1, %0 ;" + "addl %2, %0 ;" : "=a" (ret) - : "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1))); + : "g" (~(UM_KERN_PAGE_SIZE - 1)), + "g" (UM_KERN_PAGE_SIZE)); return (void *)ret; } + +#define stub_start(fn) \ + asm volatile ( \ + "subl %0,%%esp ;" \ + "movl %1, %%eax ; " \ + "call *%%eax ;" \ + :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ + "i" (&fn)) #endif diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index 67f44284f1aa..294affbec742 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -28,6 +28,17 @@ static __always_inline long stub_syscall0(long syscall) return ret; } +static __always_inline long stub_syscall1(long syscall, long arg1) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1) : __syscall_clobber ); + + return ret; +} + static __always_inline long stub_syscall2(long syscall, long arg1, long arg2) { long ret; @@ -106,11 +117,21 @@ static __always_inline void *get_stub_data(void) unsigned long ret; asm volatile ( - "movq %%rsp,%0 ;" - "andq %1,%0" + "lea 0(%%rip), %0;" + "andq %1, %0 ;" + "addq %2, %0 ;" : "=a" (ret) - : "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1))); + : "g" (~(UM_KERN_PAGE_SIZE - 1)), + "g" (UM_KERN_PAGE_SIZE)); return (void *)ret; } + +#define stub_start(fn) \ + asm volatile ( \ + "subq %0,%%rsp ;" \ + "movq %1,%%rax ;" \ + "call *%%rax ;" \ + :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ + "i" (&fn)) #endif diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 2cc8c2309022..75087e85b6fd 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -16,145 +16,24 @@ #include <registers.h> #include <skas.h> -#ifdef CONFIG_X86_32 - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr(unsigned short twd) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - return tmp; -} - -static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) -{ - struct _fpxreg *st = NULL; - unsigned long twd = (unsigned long) fxsave->twd; - unsigned long tag; - unsigned long ret = 0xffff0000; - int i; - -#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16) - - for (i = 0; i < 8; i++) { - if (twd & 0x1) { - st = (struct _fpxreg *) FPREG_ADDR(fxsave, i); - - switch (st->exponent & 0x7fff) { - case 0x7fff: - tag = 2; /* Special */ - break; - case 0x0000: - if ( !st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3] ) { - tag = 1; /* Zero */ - } else { - tag = 2; /* Special */ - } - break; - default: - if (st->significand[3] & 0x8000) { - tag = 0; /* Valid */ - } else { - tag = 2; /* Special */ - } - break; - } - } else { - tag = 3; /* Empty */ - } - ret |= (tag << (2 * i)); - twd = twd >> 1; - } - return ret; -} - -static int convert_fxsr_to_user(struct _fpstate __user *buf, - struct user_fxsr_struct *fxsave) -{ - unsigned long env[7]; - struct _fpreg __user *to; - struct _fpxreg *from; - int i; - - env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; - env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; - env[2] = twd_fxsr_to_i387(fxsave); - env[3] = fxsave->fip; - env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); - env[5] = fxsave->foo; - env[6] = fxsave->fos; - - if (__copy_to_user(buf, env, 7 * sizeof(unsigned long))) - return 1; - - to = &buf->_st[0]; - from = (struct _fpxreg *) &fxsave->st_space[0]; - for (i = 0; i < 8; i++, to++, from++) { - unsigned long __user *t = (unsigned long __user *)to; - unsigned long *f = (unsigned long *)from; - - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; - } - return 0; -} - -static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave, - struct _fpstate __user *buf) -{ - unsigned long env[7]; - struct _fpxreg *to; - struct _fpreg __user *from; - int i; - - if (copy_from_user( env, buf, 7 * sizeof(long))) - return 1; - - fxsave->cwd = (unsigned short)(env[0] & 0xffff); - fxsave->swd = (unsigned short)(env[1] & 0xffff); - fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); - fxsave->fip = env[3]; - fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); - fxsave->fcs = (env[4] & 0xffff); - fxsave->foo = env[5]; - fxsave->fos = env[6]; - - to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; - for (i = 0; i < 8; i++, to++, from++) { - unsigned long *t = (unsigned long *)to; - unsigned long __user *f = (unsigned long __user *)from; - - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; - } - return 0; -} - -extern int have_fpx_regs; +#include <linux/regset.h> +#include <asm/sigframe.h> +#ifdef CONFIG_X86_32 +struct _xstate_64 { + struct _fpstate_64 fpstate; + struct _header xstate_hdr; + struct _ymmh_state ymmh; + /* New processor state extensions go here: */ +}; +#else +#define _xstate_64 _xstate #endif static int copy_sc_from_user(struct pt_regs *regs, struct sigcontext __user *from) { + struct _xstate_64 __user *from_fp64; struct sigcontext sc; int err; @@ -203,35 +82,27 @@ static int copy_sc_from_user(struct pt_regs *regs, #undef GETREG #ifdef CONFIG_X86_32 - if (have_fpx_regs) { - struct user_fxsr_struct fpx; - int pid = userspace_pid[current_thread_info()->cpu]; + from_fp64 = ((void __user *)sc.fpstate) + + offsetof(struct _fpstate_32, _fxsr_env); +#else + from_fp64 = (void __user *)sc.fpstate; +#endif - err = copy_from_user(&fpx, - &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0], - sizeof(struct user_fxsr_struct)); - if (err) - return 1; + err = copy_from_user(regs->regs.fp, from_fp64, host_fp_size); + if (err) + return 1; - err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate); - if (err) - return 1; - - err = restore_fpx_registers(pid, (unsigned long *) &fpx); - if (err < 0) { - printk(KERN_ERR "copy_sc_from_user - " - "restore_fpx_registers failed, errno = %d\n", - -err); - return 1; - } - } else +#ifdef CONFIG_X86_32 + /* Data is duplicated and this copy is the important one */ + err = copy_regset_from_user(current, + task_user_regset_view(current), + REGSET_FP_LEGACY, 0, + sizeof(struct user_i387_struct), + (void __user *)sc.fpstate); + if (err < 0) + return err; #endif - { - err = copy_from_user(regs->regs.fp, (void *)sc.fpstate, - sizeof(struct _xstate)); - if (err) - return 1; - } + return 0; } @@ -239,6 +110,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, struct _xstate __user *to_fp, struct pt_regs *regs, unsigned long mask) { + struct _xstate_64 __user *to_fp64; struct sigcontext sc; struct faultinfo * fi = ¤t->thread.arch.faultinfo; int err; @@ -290,35 +162,46 @@ static int copy_sc_to_user(struct sigcontext __user *to, return 1; #ifdef CONFIG_X86_32 - if (have_fpx_regs) { - int pid = userspace_pid[current_thread_info()->cpu]; - struct user_fxsr_struct fpx; - - err = save_fpx_registers(pid, (unsigned long *) &fpx); - if (err < 0){ - printk(KERN_ERR "copy_sc_to_user - save_fpx_registers " - "failed, errno = %d\n", err); - return 1; - } - - err = convert_fxsr_to_user(&to_fp->fpstate, &fpx); - if (err) - return 1; + err = copy_regset_to_user(current, + task_user_regset_view(current), + REGSET_FP_LEGACY, 0, + sizeof(struct _fpstate_32), to_fp); + if (err < 0) + return err; - err |= __put_user(fpx.swd, &to_fp->fpstate.status); - err |= __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic); - if (err) - return 1; + __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic); + + BUILD_BUG_ON(offsetof(struct _xstate, xstate_hdr) != + offsetof(struct _xstate_64, xstate_hdr) + + offsetof(struct _fpstate_32, _fxsr_env)); + to_fp64 = (void __user *)to_fp + + offsetof(struct _fpstate_32, _fxsr_env); +#else + to_fp64 = to_fp; +#endif /* CONFIG_X86_32 */ + + if (copy_to_user(to_fp64, regs->regs.fp, host_fp_size)) + return 1; - if (copy_to_user(&to_fp->fpstate._fxsr_env[0], &fpx, - sizeof(struct user_fxsr_struct))) - return 1; - } else + /* + * Put magic/size values for userspace. We do not bother to verify them + * later on, however, userspace needs them should it try to read the + * XSTATE data. And ptrace does not fill in these parts. + */ + BUILD_BUG_ON(sizeof(int) != FP_XSTATE_MAGIC2_SIZE); +#ifdef CONFIG_X86_32 + __put_user(offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE, + &to_fp64->fpstate.sw_reserved.extended_size); +#else + __put_user(host_fp_size + FP_XSTATE_MAGIC2_SIZE, + &to_fp64->fpstate.sw_reserved.extended_size); #endif - { - if (copy_to_user(to_fp, regs->regs.fp, sizeof(struct _xstate))) - return 1; - } + __put_user(host_fp_size, &to_fp64->fpstate.sw_reserved.xstate_size); + + __put_user(FP_XSTATE_MAGIC1, &to_fp64->fpstate.sw_reserved.magic1); + __put_user(FP_XSTATE_MAGIC2, + (int __user *)((void __user *)to_fp64 + host_fp_size)); return 0; } @@ -336,34 +219,15 @@ static int copy_ucontext_to_user(struct ucontext __user *uc, return err; } -struct sigframe -{ - char __user *pretcode; - int sig; - struct sigcontext sc; - struct _xstate fpstate; - unsigned long extramask[_NSIG_WORDS-1]; - char retcode[8]; -}; - -struct rt_sigframe -{ - char __user *pretcode; - int sig; - struct siginfo __user *pinfo; - void __user *puc; - struct siginfo info; - struct ucontext uc; - struct _xstate fpstate; - char retcode[8]; -}; - int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *mask) { + size_t math_size = offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct sigframe __user *frame; void __user *restorer; int err = 0, sig = ksig->sig; + unsigned long fp_to; /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */ stack_top = ((stack_top + 4) & -16UL) - 4; @@ -371,13 +235,21 @@ int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, if (!access_ok(frame, sizeof(*frame))) return 1; + /* Add required space for math frame */ + frame = (struct sigframe __user *)((unsigned long)frame - math_size); + restorer = frame->retcode; if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(restorer, (void __user * __user *)&frame->pretcode); err |= __put_user(sig, &frame->sig); - err |= copy_sc_to_user(&frame->sc, &frame->fpstate, regs, mask->sig[0]); + + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_sc_to_user(&frame->sc, + (struct _xstate __user *)fp_to, + regs, mask->sig[0]); if (_NSIG_WORDS > 1) err |= __copy_to_user(&frame->extramask, &mask->sig[1], sizeof(frame->extramask)); @@ -407,26 +279,35 @@ int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *mask) { + size_t math_size = offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct rt_sigframe __user *frame; void __user *restorer; int err = 0, sig = ksig->sig; + unsigned long fp_to; stack_top &= -8UL; frame = (struct rt_sigframe __user *) stack_top - 1; if (!access_ok(frame, sizeof(*frame))) return 1; + /* Add required space for math frame */ + frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size); + restorer = frame->retcode; if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(restorer, (void __user * __user *)&frame->pretcode); err |= __put_user(sig, &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); + err |= __put_user(&frame->info, (void __user * __user *)&frame->pinfo); + err |= __put_user(&frame->uc, (void __user * __user *)&frame->puc); err |= copy_siginfo_to_user(&frame->info, &ksig->info); - err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, - PT_REGS_SP(regs)); + + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_ucontext_to_user(&frame->uc, (struct _xstate __user *)fp_to, + mask, PT_REGS_SP(regs)); /* * This is movl $,%eax ; int $0x80 @@ -478,27 +359,24 @@ SYSCALL_DEFINE0(sigreturn) #else -struct rt_sigframe -{ - char __user *pretcode; - struct ucontext uc; - struct siginfo info; - struct _xstate fpstate; -}; - int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *set) { + unsigned long math_size = host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct rt_sigframe __user *frame; int err = 0, sig = ksig->sig; unsigned long fp_to; frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16); + + /* Add required space for math frame */ + frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size); + /* Subtract 128 for a red zone and 8 for proper alignment */ frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8); - if (!access_ok(frame, sizeof(*frame))) + if (!access_ok(frame, sizeof(*frame) + math_size)) goto out; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { @@ -509,12 +387,14 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(NULL, &frame->uc.uc_link); err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs)); - err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, - set->sig[0]); - fp_to = (unsigned long)&frame->fpstate; + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_sc_to_user(&frame->uc.uc_mcontext, + (struct _xstate __user *)fp_to, + regs, set->sig[0]); err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate); if (sizeof(*set) == 16) { @@ -531,7 +411,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, */ /* x86-64 should always use SA_RESTORER. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) - err |= __put_user((void *)ksig->ka.sa.sa_restorer, + err |= __put_user((void __user *)ksig->ka.sa.sa_restorer, &frame->pretcode); else /* could use a vstub here */ diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index 1c77d9946199..d6e1cd9956bf 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -20,9 +20,6 @@ void foo(void); void foo(void) { #ifdef __i386__ - DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct)); - DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct)); - DEFINE(HOST_IP, EIP); DEFINE(HOST_SP, UESP); DEFINE(HOST_EFLAGS, EFL); @@ -41,11 +38,6 @@ void foo(void) DEFINE(HOST_GS, GS); DEFINE(HOST_ORIG_AX, ORIG_EAX); #else -#ifdef FP_XSTATE_MAGIC1 - DEFINE_LONGS(HOST_FP_SIZE, 2696); -#else - DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); -#endif DEFINE_LONGS(HOST_BX, RBX); DEFINE_LONGS(HOST_CX, RCX); DEFINE_LONGS(HOST_DI, RDI); diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 6a77ea6434ff..7478d11dacb7 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -56,7 +56,6 @@ CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage quiet_cmd_vdso = VDSO $@ cmd_vdso = $(CC) -nostdlib -o $@ \ $(CC_FLAGS_LTO) $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ - sh $(src)/checkundef.sh '$(NM)' '$@' + -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) -VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack +VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack -Wl,--no-undefined diff --git a/arch/x86/um/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh deleted file mode 100644 index 8e3ea6bb956f..000000000000 --- a/arch/x86/um/vdso/checkundef.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -nm="$1" -file="$2" -$nm "$file" | grep '^ *U' > /dev/null 2>&1 -if [ $? -eq 1 ]; then - exit 0 -else - echo "$file: undefined symbols found" >&2 - exit 1 -fi |
