From 3d9aba6618d115750729bba2d1f8af180bd7d3bd Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 22 Apr 2026 15:44:26 +0800 Subject: LoongArch: Adjust build infrastructure for 32BIT/64BIT Adjust build infrastructure (Kconfig, Makefile and ld scripts) to let us enable both 32BIT/64BIT kernel build. Reviewed-by: Arnd Bergmann Signed-off-by: Jiaxun Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 115 ++++++++++++++++++++++++---------- arch/loongarch/Makefile | 23 ++++++- arch/loongarch/boot/Makefile | 6 ++ arch/loongarch/kernel/vmlinux.lds.S | 7 ++- arch/loongarch/kvm/Kconfig | 2 +- arch/loongarch/lib/Makefile | 6 +- drivers/firmware/efi/libstub/Makefile | 1 + drivers/pci/controller/Kconfig | 2 +- lib/crc/Kconfig | 2 +- 9 files changed, 122 insertions(+), 42 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 92068ff38685..01da52fb72f5 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -21,11 +21,11 @@ config LOONGARCH select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV - select ARCH_HAS_KERNEL_FPU_SUPPORT if CPU_HAS_FPU + select ARCH_HAS_KERNEL_FPU_SUPPORT if 64BIT && CPU_HAS_FPU select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PREEMPT_LAZY - select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_PTE_SPECIAL if 64BIT select ARCH_HAS_SET_MEMORY select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST @@ -60,16 +60,15 @@ config LOONGARCH select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO - select ARCH_SPARSEMEM_ENABLE select ARCH_STACKWALK select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_HUGETLBFS + select ARCH_SUPPORTS_HUGETLBFS if 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS - select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_NUMA_BALANCING if NUMA select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_SUPPORTS_RT select ARCH_SUPPORTS_SCHED_SMT if SMP @@ -79,10 +78,10 @@ config LOONGARCH select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_WANT_DEFAULT_BPF_JIT + select ARCH_WANT_DEFAULT_BPF_JIT if HAVE_EBPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_LD_ORPHAN_WARN - select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP + select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if 64BIT select ARCH_WANTS_NO_INSTR select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BUILDTIME_TABLE_SORT @@ -90,13 +89,14 @@ config LOONGARCH select CPU_PM select EDAC_SUPPORT select EFI + select GENERIC_ATOMIC64 if 32BIT select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_DEVICES select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY - select GENERIC_GETTIMEOFDAY + select GENERIC_GETTIMEOFDAY if 64BIT select GENERIC_IOREMAP if !ARCH_IOREMAP select GENERIC_IRQ_MATRIX_ALLOCATOR select GENERIC_IRQ_MULTI_HANDLER @@ -111,16 +111,16 @@ config LOONGARCH select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD - select GENERIC_TIME_VSYSCALL + select GENERIC_TIME_VSYSCALL if GENERIC_GETTIMEOFDAY select GPIOLIB select HAS_IOPORT - select HAVE_ALIGNED_STRUCT_PAGE + select HAVE_ALIGNED_STRUCT_PAGE if 64BIT select HAVE_ARCH_AUDITSYSCALL - select HAVE_ARCH_BITREVERSE + select HAVE_ARCH_BITREVERSE if 64BIT select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE - select HAVE_ARCH_KASAN - select HAVE_ARCH_KFENCE + select HAVE_ARCH_KASAN if 64BIT + select HAVE_ARCH_KFENCE if 64BIT select HAVE_ARCH_KGDB if PERF_EVENTS select HAVE_ARCH_KSTACK_ERASE select HAVE_ARCH_MMAP_RND_BITS if MMU @@ -128,8 +128,8 @@ config LOONGARCH select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT + select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD select HAVE_ASM_MODVERSIONS select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL @@ -143,7 +143,7 @@ config LOONGARCH select HAVE_FTRACE_REGS_HAVING_PT_REGS select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_EBPF_JIT + select HAVE_EBPF_JIT if 64BIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN select HAVE_EXIT_THREAD select HAVE_GENERIC_TIF_BITS @@ -166,9 +166,9 @@ config LOONGARCH select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI - select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB + select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && 64BIT select HAVE_PCI - select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS if 64BIT select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_POSIX_CPU_TIMERS_TASK_WORK @@ -210,18 +210,50 @@ config LOONGARCH select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_EXCEPTION_TRACE - select SWIOTLB + select SWIOTLB if 64BIT select TRACE_IRQFLAGS_SUPPORT select USE_PERCPU_NUMA_NODE_ID select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM - select ZONE_DMA32 + select ZONE_DMA32 if 64BIT + +menu "Kernel type and options" + +choice + prompt "Kernel type" config 32BIT - bool + bool "32-bit kernel" + help + Select this option if you want to build a 32-bit kernel. config 64BIT - def_bool y + bool "64-bit kernel" + help + Select this option if you want to build a 64-bit kernel. + +endchoice + +if 32BIT + +choice + prompt "32-bit kernel sub-type" + +config 32BIT_REDUCED + bool "32-bit kernel for LA32R" + help + Select this option if you want to build a 32-bit kernel for + LoongArch32 Reduced (LA32R). + +config 32BIT_STANDARD + bool "32-bit kernel for LA32S" + help + Select this option if you want to build a 32-bit kernel for + LoongArch32 Standard (LA32S). + +endchoice + +endif config GENERIC_BUG def_bool y @@ -314,8 +346,6 @@ config RUSTC_HAS_ANNOTATE_TABLEJUMP depends on RUST def_bool $(rustc-option,-Cllvm-args=--loongarch-annotate-tablejump) -menu "Kernel type and options" - source "kernel/Kconfig.hz" choice @@ -327,8 +357,17 @@ choice of page size and page table levels. The size of virtual memory address space are determined by the page table layout. +config 4KB_2LEVEL + bool "4KB with 2 levels" + select HAVE_PAGE_SIZE_4KB + select PGTABLE_2LEVEL + help + This option selects 4KB page size with 2 level page tables, which + support a maximum of 32 bits of application virtual memory. + config 4KB_3LEVEL bool "4KB with 3 levels" + depends on 64BIT select HAVE_PAGE_SIZE_4KB select PGTABLE_3LEVEL help @@ -337,6 +376,7 @@ config 4KB_3LEVEL config 4KB_4LEVEL bool "4KB with 4 levels" + depends on 64BIT select HAVE_PAGE_SIZE_4KB select PGTABLE_4LEVEL help @@ -353,6 +393,7 @@ config 16KB_2LEVEL config 16KB_3LEVEL bool "16KB with 3 levels" + depends on 64BIT select HAVE_PAGE_SIZE_16KB select PGTABLE_3LEVEL help @@ -369,6 +410,7 @@ config 64KB_2LEVEL config 64KB_3LEVEL bool "64KB with 3 levels" + depends on 64BIT select HAVE_PAGE_SIZE_64KB select PGTABLE_3LEVEL help @@ -466,6 +508,7 @@ config EFI_STUB config SMP bool "Multi-Processing support" + depends on 64BIT help This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more @@ -504,6 +547,7 @@ config NR_CPUS config NUMA bool "NUMA Support" select SMP + depends on 64BIT help Say Y to compile the kernel with NUMA (Non-Uniform Memory Access) support. This option improves performance on systems with more @@ -586,7 +630,7 @@ config CPU_HAS_FPU config CPU_HAS_LSX bool "Support for the Loongson SIMD Extension" - depends on AS_HAS_LSX_EXTENSION + depends on AS_HAS_LSX_EXTENSION && 64BIT help Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers and a set of SIMD instructions to operate on them. When this option @@ -601,7 +645,7 @@ config CPU_HAS_LSX config CPU_HAS_LASX bool "Support for the Loongson Advanced SIMD Extension" depends on CPU_HAS_LSX - depends on AS_HAS_LASX_EXTENSION + depends on AS_HAS_LASX_EXTENSION && 64BIT help Loongson Advanced SIMD Extension (LASX) introduces 256 bit wide vector registers and a set of SIMD instructions to operate on them. When this @@ -615,7 +659,7 @@ config CPU_HAS_LASX config CPU_HAS_LBT bool "Support for the Loongson Binary Translation Extension" - depends on AS_HAS_LBT_EXTENSION + depends on AS_HAS_LBT_EXTENSION && 64BIT help Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0 to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop). @@ -643,13 +687,13 @@ config ARCH_SELECTS_KEXEC_FILE select HAVE_IMA_KEXEC if IMA config ARCH_SUPPORTS_CRASH_DUMP - def_bool y + def_bool 64BIT config ARCH_DEFAULT_CRASH_DUMP - def_bool y + def_bool 64BIT config ARCH_SELECTS_CRASH_DUMP - def_bool y + def_bool 64BIT depends on CRASH_DUMP select RELOCATABLE @@ -658,6 +702,7 @@ config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION config RELOCATABLE bool "Relocatable kernel" + depends on 64BIT select ARCH_HAS_RELR help This builds the kernel as a Position Independent Executable (PIE), @@ -694,7 +739,7 @@ source "kernel/livepatch/Kconfig" config PARAVIRT bool "Enable paravirtualization code" - depends on AS_HAS_LVZ_EXTENSION + depends on AS_HAS_LVZ_EXTENSION && 64BIT select HAVE_PV_STEAL_CLOCK_GEN help This changes the kernel so it can modify itself when it is run @@ -723,7 +768,7 @@ config ARCH_FLATMEM_ENABLE depends on !NUMA config ARCH_SPARSEMEM_ENABLE - def_bool y + def_bool 64BIT select SPARSEMEM_VMEMMAP_ENABLE help Say Y to support efficient handling of sparse physical memory, @@ -740,10 +785,12 @@ config MMU default y config ARCH_MMAP_RND_BITS_MIN - default 12 + default 10 if 32BIT + default 12 if 64BIT config ARCH_MMAP_RND_BITS_MAX - default 18 + default 15 if 32BIT + default 20 if 64BIT config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 8d45b860fe56..47516aeea9d2 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -25,6 +25,7 @@ endif # # Select the object file format to substitute into the linker script. # +32bit-tool-archpref = loongarch32 64bit-tool-archpref = loongarch64 32bit-bfd = elf32-loongarch 64bit-bfd = elf64-loongarch @@ -51,7 +52,10 @@ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY CC_FLAGS_FTRACE := -fpatchable-function-entry=2 endif -ifdef CONFIG_64BIT +ifdef CONFIG_32BIT +tool-archpref = $(32bit-tool-archpref) +UTS_MACHINE := loongarch32 +else tool-archpref = $(64bit-tool-archpref) UTS_MACHINE := loongarch64 endif @@ -62,9 +66,19 @@ ifneq ($(SUBARCH),$(ARCH)) endif endif +ifdef CONFIG_32BIT +ifdef CONFIG_32BIT_STANDARD +ld-emul = $(32bit-emul) +cflags-y += -march=la32v1.0 -mabi=ilp32s -mcmodel=normal +else # CONFIG_32BIT_REDUCED +ld-emul = $(32bit-emul) +cflags-y += -march=la32rv1.0 -mabi=ilp32s -mcmodel=normal +endif +endif + ifdef CONFIG_64BIT ld-emul = $(64bit-emul) -cflags-y += -mabi=lp64s -mcmodel=normal +cflags-y += -march=loongarch64 -mabi=lp64s -mcmodel=normal endif cflags-y += -pipe $(CC_FLAGS_NO_FPU) @@ -140,7 +154,12 @@ ifndef CONFIG_KASAN cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset endif +ifdef CONFIG_32BIT +load-y = 0xa0200000 +else load-y = 0x9000000000200000 +endif + bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) drivers-$(CONFIG_PCI) += arch/loongarch/pci/ diff --git a/arch/loongarch/boot/Makefile b/arch/loongarch/boot/Makefile index 4e1c374c5782..8b6d9b42b5f0 100644 --- a/arch/loongarch/boot/Makefile +++ b/arch/loongarch/boot/Makefile @@ -20,7 +20,13 @@ $(obj)/vmlinux.efi: vmlinux FORCE $(call if_changed,objcopy) EFI_ZBOOT_PAYLOAD := vmlinux.efi + +ifdef CONFIG_32BIT +EFI_ZBOOT_BFD_TARGET := elf32-loongarch +EFI_ZBOOT_MACH_TYPE := LOONGARCH32 +else EFI_ZBOOT_BFD_TARGET := elf64-loongarch EFI_ZBOOT_MACH_TYPE := LOONGARCH64 +endif include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index d0e1377a041d..840d944c2f73 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -6,7 +6,12 @@ #define PAGE_SIZE _PAGE_SIZE #define RO_EXCEPTION_TABLE_ALIGN 4 -#define PHYSADDR_MASK 0xffffffffffff /* 48-bit */ + +#ifdef CONFIG_32BIT +#define PHYSADDR_MASK 0x1fffffff /* 29-bit */ +#else +#define PHYSADDR_MASK 0xffffffffffff /* 48-bit */ +#endif /* * Put .bss..swapper_pg_dir as the first thing in .bss. This will diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 8e5213609975..15da2d88c0c1 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -19,7 +19,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" - depends on AS_HAS_LVZ_EXTENSION + depends on AS_HAS_LVZ_EXTENSION && 64BIT select HAVE_KVM_DIRTY_RING_ACQ_REL select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_IRQCHIP diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index ccea3bbd4353..a19466b22ab9 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -3,8 +3,10 @@ # Makefile for LoongArch-specific library files. # -lib-y += delay.o memset.o memcpy.o memmove.o \ - clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o +lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o + +lib-$(CONFIG_32BIT) += bswapsi.o bswapdi.o +lib-$(CONFIG_64BIT) += memset.o memcpy.o memmove.o csum.o obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index e386ffd009b7..7c65e82525a8 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -97,6 +97,7 @@ zboot-obj-$(CONFIG_KERNEL_ZSTD) := zboot-decompress-zstd.o lib-xxhash.o CFLAGS_zboot-decompress-zstd.o += -I$(srctree)/lib/zstd zboot-obj-$(CONFIG_RISCV) += lib-clz_ctz.o lib-ashldi3.o +zboot-obj-$(CONFIG_LOONGARCH) += lib-clz_ctz.o lib-ashldi3.o lib-$(CONFIG_EFI_ZBOOT) += zboot.o $(zboot-obj-y) lib-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o bitmap.o find.o diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 5aaed8ac6e44..64a413396c14 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -187,7 +187,7 @@ config VMD config PCI_LOONGSON bool "LOONGSON PCIe controller" - depends on MACH_LOONGSON64 || COMPILE_TEST + depends on MACH_LOONGSON32 || MACH_LOONGSON64 || COMPILE_TEST depends on OF || ACPI depends on PCI_QUIRKS default MACH_LOONGSON64 diff --git a/lib/crc/Kconfig b/lib/crc/Kconfig index 70e7a6016de3..5bf613405fdd 100644 --- a/lib/crc/Kconfig +++ b/lib/crc/Kconfig @@ -65,7 +65,7 @@ config CRC32_ARCH depends on CRC32 && CRC_OPTIMIZATIONS default y if ARM && KERNEL_MODE_NEON default y if ARM64 - default y if LOONGARCH + default y if LOONGARCH && 64BIT default y if MIPS && CPU_MIPSR6 default y if PPC64 && ALTIVEC default y if RISCV && RISCV_ISA_ZBC -- cgit v1.2.3 From 8b81576c16c0681b0c0148200a8c3ce33ad5f6fa Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 22 Apr 2026 15:44:54 +0800 Subject: LoongArch: Add HIGHMEM (PKMAP and FIX_KMAP) support Add HIGHMEM (High Memory) support for LoongArch, mostly needed by 32BIT kernel because the size of kernel virtual memory space is only 512MB and the size of usable physical memory is only 256MB in this case. HIGHMEM adds permanent kernel mapping (PKMAP) and fixed kernel mapping (FIX_KMAP), which increase usable physical memory up to 2.25GB (2304MB). We can just use the generic copy_user_highpage(), so remove the custom version. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 5 +++ arch/loongarch/include/asm/fixmap.h | 14 +++++++++ arch/loongarch/include/asm/highmem.h | 43 +++++++++++++++++++++++++ arch/loongarch/include/asm/page.h | 4 --- arch/loongarch/include/asm/pgtable.h | 12 +++++++ arch/loongarch/mm/Makefile | 1 + arch/loongarch/mm/highmem.c | 12 +++++++ arch/loongarch/mm/init.c | 61 +++++++++++++++++++++++++++--------- arch/loongarch/mm/pgtable.c | 27 ++++++++++++++++ 9 files changed, 161 insertions(+), 18 deletions(-) create mode 100644 arch/loongarch/include/asm/highmem.h create mode 100644 arch/loongarch/mm/highmem.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 01da52fb72f5..b3a10c07f990 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -348,6 +348,11 @@ config RUSTC_HAS_ANNOTATE_TABLEJUMP source "kernel/Kconfig.hz" +config HIGHMEM + bool "High Memory Support" + depends on 32BIT + select KMAP_LOCAL + choice prompt "Page Table Layout" default 16KB_2LEVEL if 32BIT diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h index d2e55ae55bb9..dce2da6ba787 100644 --- a/arch/loongarch/include/asm/fixmap.h +++ b/arch/loongarch/include/asm/fixmap.h @@ -8,10 +8,19 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H +#ifdef CONFIG_HIGHMEM +#include +#include +#endif + #define NR_FIX_BTMAPS 64 enum fixed_addresses { FIX_HOLE, +#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, +#endif FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses }; @@ -25,4 +34,9 @@ extern void __set_fixmap(enum fixed_addresses idx, #include +/* + * Called from pagetable_init() + */ +extern void fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base); + #endif diff --git a/arch/loongarch/include/asm/highmem.h b/arch/loongarch/include/asm/highmem.h new file mode 100644 index 000000000000..e6d7a662d340 --- /dev/null +++ b/arch/loongarch/include/asm/highmem.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 2025 Loongson Technology Corporation Limited + */ +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef __KERNEL__ + +#include + +#ifndef __ASSEMBLER__ + +extern pte_t *pkmap_page_table; + +#define ARCH_HAS_KMAP_FLUSH_TLB +void kmap_flush_tlb(unsigned long addr); + +#endif /* !__ASSEMBLER__ */ + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +#define LAST_PKMAP 1024 +#define LAST_PKMAP_MASK (LAST_PKMAP - 1) +#define PKMAP_NR(virt) ((virt - PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +#define flush_cache_kmaps() do {} while (0) + +#define arch_kmap_local_post_map(vaddr, pteval) local_flush_tlb_one(vaddr) +#define arch_kmap_local_post_unmap(vaddr) local_flush_tlb_one(vaddr) + +#endif /* __KERNEL__ */ + +#endif /* _ASM_HIGHMEM_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 327bf0bc92bf..8121c0f136da 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -36,10 +36,6 @@ extern unsigned long shm_align_mask; struct page; struct vm_area_struct; -void copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr, struct vm_area_struct *vma); - -#define __HAVE_ARCH_COPY_USER_HIGHPAGE typedef struct { unsigned long pte; } pte_t; #define pte_val(x) ((x).pte) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index c33b3bcb733e..cd5e56bfbe7f 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -23,6 +23,10 @@ #include #endif +#ifdef CONFIG_HIGHMEM +#include +#endif + #if CONFIG_PGTABLE_LEVELS == 2 #define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT - PTRLOG)) #elif CONFIG_PGTABLE_LEVELS == 3 @@ -86,7 +90,15 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_32BIT #define VMALLOC_START (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE)) + +#ifdef CONFIG_HIGHMEM +#define VMALLOC_END (PKMAP_BASE - (2 * PAGE_SIZE)) +#else #define VMALLOC_END (FIXADDR_START - (2 * PAGE_SIZE)) +#endif + +#define PKMAP_BASE (PKMAP_END - (PAGE_SIZE * LAST_PKMAP)) +#define PKMAP_END ((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1)) #endif diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile index 278be2c8fc36..2aae3773de77 100644 --- a/arch/loongarch/mm/Makefile +++ b/arch/loongarch/mm/Makefile @@ -7,6 +7,7 @@ obj-y += init.o cache.o tlb.o tlbex.o extable.o \ fault.o ioremap.o maccess.o mmap.o pgtable.o \ page.o pageattr.o +obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/loongarch/mm/highmem.c b/arch/loongarch/mm/highmem.c new file mode 100644 index 000000000000..8a5789ee6842 --- /dev/null +++ b/arch/loongarch/mm/highmem.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +void kmap_flush_tlb(unsigned long addr) +{ + flush_tlb_one(addr); +} +EXPORT_SYMBOL(kmap_flush_tlb); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index c331bf69d2ec..bf51f4a1b086 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -39,20 +39,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -void copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr, struct vm_area_struct *vma) -{ - void *vfrom, *vto; - - vfrom = kmap_local_page(from); - vto = kmap_local_page(to); - copy_page(vto, vfrom); - kunmap_local(vfrom); - kunmap_local(vto); - /* Make sure this page is cleared on other CPU's too before using it */ - smp_wmb(); -} - int __ref page_is_ram(unsigned long pfn) { unsigned long addr = PFN_PHYS(pfn); @@ -66,6 +52,9 @@ void __init arch_zone_limits_init(unsigned long *max_zone_pfns) max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; +#ifdef CONFIG_HIGHMEM + max_zone_pfns[ZONE_HIGHMEM] = max_pfn; +#endif } void __ref free_initmem(void) @@ -73,6 +62,50 @@ void __ref free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } +#ifdef CONFIG_HIGHMEM + +void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int i, j, k; + int ptrs_per_pgd; + unsigned long vaddr; + + vaddr = start; + i = pgd_index(vaddr); + j = pud_index(vaddr); + k = pmd_index(vaddr); + pgd = pgd_base + i; + ptrs_per_pgd = min((1 << (BITS_PER_LONG - PGDIR_SHIFT)), PTRS_PER_PGD); + + for ( ; (i < ptrs_per_pgd) && (vaddr < end); pgd++, i++) { + pud = (pud_t *)pgd; + for ( ; (j < PTRS_PER_PUD) && (vaddr < end); pud++, j++) { + pmd = (pmd_t *)pud; + for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + kernel_pte_init(pte); + set_pmd(pmd, __pmd((unsigned long)pte)); + BUG_ON(pte != pte_offset_kernel(pmd, 0)); + } + vaddr += PMD_SIZE; + } + k = 0; + } + j = 0; + } +} + +#endif + #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 352d9b2e02ab..4ee188e38fed 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -144,6 +145,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, void __init pagetable_init(void) { +#ifdef CONFIG_HIGHMEM + unsigned long vaddr; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; +#endif + /* Initialize the entire pgd. */ pgd_init(swapper_pg_dir); pgd_init(invalid_pg_dir); @@ -153,4 +163,21 @@ void __init pagetable_init(void) #ifndef __PAGETABLE_PMD_FOLDED pmd_init(invalid_pmd_table); #endif + +#ifdef CONFIG_HIGHMEM + /* Permanent kmaps */ + vaddr = PKMAP_BASE; + fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE * LAST_PKMAP, swapper_pg_dir); + + pgd = swapper_pg_dir + pgd_index(vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; + + /* Fixed mappings */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1); + fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, swapper_pg_dir); +#endif } -- cgit v1.2.3 From 1829419bc3b291ad9547abe70053c2620832ac41 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 22 Apr 2026 15:45:11 +0800 Subject: LoongArch: Handle CONFIG_32BIT in syscall_get_arch() If CONFIG_32BIT is set, it should return AUDIT_ARCH_LOONGARCH32 instead of AUDIT_ARCH_LOONGARCH64 in syscall_get_arch(). Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/syscall.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/loongarch/include/asm/syscall.h b/arch/loongarch/include/asm/syscall.h index 81d2733f7b94..df8ea223c77b 100644 --- a/arch/loongarch/include/asm/syscall.h +++ b/arch/loongarch/include/asm/syscall.h @@ -78,7 +78,11 @@ static inline void syscall_set_arguments(struct task_struct *task, static inline int syscall_get_arch(struct task_struct *task) { +#ifdef CONFIG_32BIT + return AUDIT_ARCH_LOONGARCH32; +#else return AUDIT_ARCH_LOONGARCH64; +#endif } static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) -- cgit v1.2.3 From e3f4591f7920ce169f2f78fa5a89639ada7d7058 Mon Sep 17 00:00:00 2001 From: Lisa Robinson Date: Wed, 22 Apr 2026 15:45:11 +0800 Subject: LoongArch: Align FPU register state to 32 bytes Move fpr to the beginning of struct loongarch_fpu so it is naturally aligned to FPU_ALIGN (32 bytes), improving 256-bit SIMD (LASX) context switch performance. Also adjust process.c and fpu.S to work well with the new loongarch_fpu layout. Signed-off-by: Lisa Robinson Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/processor.h | 2 +- arch/loongarch/kernel/fpu.S | 12 ++++++------ arch/loongarch/kernel/process.c | 2 ++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index c3bc44b5f5b3..ce8b953f8c79 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,10 +80,10 @@ BUILD_FPR_ACCESS(32) BUILD_FPR_ACCESS(64) struct loongarch_fpu { + union fpureg fpr[NUM_FPU_REGS]; uint64_t fcc; /* 8x8 */ uint32_t fcsr; uint32_t ftop; - union fpureg fpr[NUM_FPU_REGS]; }; struct loongarch_lbt { diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index f225dcc5b530..bf7d6b8bf600 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -97,7 +97,7 @@ .endm #ifdef CONFIG_32BIT - .macro sc_save_fcc thread tmp0 tmp1 + .macro sc_save_fcc base tmp0 tmp1 movcf2gr \tmp0, $fcc0 move \tmp1, \tmp0 movcf2gr \tmp0, $fcc1 @@ -106,7 +106,7 @@ bstrins.w \tmp1, \tmp0, 23, 16 movcf2gr \tmp0, $fcc3 bstrins.w \tmp1, \tmp0, 31, 24 - EX st.w \tmp1, \thread, THREAD_FCC + EX st.w \tmp1, \base, 0 movcf2gr \tmp0, $fcc4 move \tmp1, \tmp0 movcf2gr \tmp0, $fcc5 @@ -115,11 +115,11 @@ bstrins.w \tmp1, \tmp0, 23, 16 movcf2gr \tmp0, $fcc7 bstrins.w \tmp1, \tmp0, 31, 24 - EX st.w \tmp1, \thread, (THREAD_FCC + 4) + EX st.w \tmp1, \base, 4 .endm - .macro sc_restore_fcc thread tmp0 tmp1 - EX ld.w \tmp0, \thread, THREAD_FCC + .macro sc_restore_fcc base tmp0 tmp1 + EX ld.w \tmp0, \base, 0 bstrpick.w \tmp1, \tmp0, 7, 0 movgr2cf $fcc0, \tmp1 bstrpick.w \tmp1, \tmp0, 15, 8 @@ -128,7 +128,7 @@ movgr2cf $fcc2, \tmp1 bstrpick.w \tmp1, \tmp0, 31, 24 movgr2cf $fcc3, \tmp1 - EX ld.w \tmp0, \thread, (THREAD_FCC + 4) + EX ld.w \tmp0, \base, 4 bstrpick.w \tmp1, \tmp0, 7, 0 movgr2cf $fcc4, \tmp1 bstrpick.w \tmp1, \tmp0, 15, 8 diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 4ac1c3086152..17e88eedb154 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -135,6 +135,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } + dst->thread.fpu.fcsr = src->thread.fpu.fcsr; + if (!used_math()) memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr)); else -- cgit v1.2.3 From 847634955b0810d0b93382a588312f745a5947be Mon Sep 17 00:00:00 2001 From: Yuqian Yang Date: Wed, 22 Apr 2026 15:45:11 +0800 Subject: LoongArch: Improve the logging of disabling KASLR Whether KASLR is disabled is not handled in nokaslr() which is the early param "nokaslr" setup function, but in kaslr_disabled(). However, the logging was previously done in nokaslr() and lack detail. So we move the logging to the right place and add more specific infomation about why it is disabled. Suggested-by: Wentao Guan Signed-off-by: Yuqian Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 82aa3f035927..16f6a9b39659 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -128,24 +128,28 @@ static inline __init unsigned long get_random_boot(void) static int __init nokaslr(char *p) { - pr_info("KASLR is disabled.\n"); - - return 0; /* Print a notice and silence the boot warning */ + return 0; /* Just silence the boot warning */ } early_param("nokaslr", nokaslr); +#define KASLR_DISABLED_MESSAGE "KASLR is disabled by %s in %s cmdline.\n" + static inline __init bool kaslr_disabled(void) { char *str; const char *builtin_cmdline = CONFIG_CMDLINE; str = strstr(builtin_cmdline, "nokaslr"); - if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' ')) + if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' ')) { + pr_info(KASLR_DISABLED_MESSAGE, "\'nokaslr\'", "built-in"); return true; + } str = strstr(boot_command_line, "nokaslr"); - if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' ')) + if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' ')) { + pr_info(KASLR_DISABLED_MESSAGE, "\'nokaslr\'", "bootloader"); return true; + } #ifdef CONFIG_HIBERNATION str = strstr(builtin_cmdline, "nohibernate"); @@ -165,17 +169,23 @@ static inline __init bool kaslr_disabled(void) return false; str = strstr(builtin_cmdline, "resume="); - if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' ')) + if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' ')) { + pr_info(KASLR_DISABLED_MESSAGE, "\'resume=\'", "built-in"); return true; + } str = strstr(boot_command_line, "resume="); - if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' ')) + if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' ')) { + pr_info(KASLR_DISABLED_MESSAGE, "\'resume=\'", "bootloader"); return true; + } #endif str = strstr(boot_command_line, "kexec_file"); - if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' ')) + if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' ')) { + pr_info(KASLR_DISABLED_MESSAGE, "\'kexec_file\'", "bootloader"); return true; + } return false; } -- cgit v1.2.3 From a28547576b3b3c95f2261cd5374c1e459f36d9dc Mon Sep 17 00:00:00 2001 From: Luo Qiu Date: Wed, 22 Apr 2026 15:45:12 +0800 Subject: LoongArch: Use get_random_canary() for stack canary init Like others, replace the custom stack canary initialization with the get_random_canary() helper, following the pattern established in commit 622754e84b10 ("stackprotector: actually use get_random_canary()"). Signed-off-by: Luo Qiu Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/stackprotector.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/loongarch/include/asm/stackprotector.h b/arch/loongarch/include/asm/stackprotector.h index a1a965751a7b..42f6c3f69115 100644 --- a/arch/loongarch/include/asm/stackprotector.h +++ b/arch/loongarch/include/asm/stackprotector.h @@ -12,9 +12,6 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H -#include -#include - extern unsigned long __stack_chk_guard; /* @@ -25,11 +22,7 @@ extern unsigned long __stack_chk_guard; */ static __always_inline void boot_init_stack_canary(void) { - unsigned long canary; - - /* Try to get a semi random initial value. */ - get_random_bytes(&canary, sizeof(canary)); - canary ^= LINUX_VERSION_CODE; + unsigned long canary = get_random_canary(); current->stack_canary = canary; __stack_chk_guard = current->stack_canary; -- cgit v1.2.3 From 02a6a1f9d77a816fbac01de9bfcd0e0914552f2f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 22 Apr 2026 15:45:12 +0800 Subject: LoongArch: Make arch_irq_work_has_interrupt() true only if IPI HW exist After commit 7c405fb3279b3924 ("rcu: Use an intermediate irq_work to start process_srcu()"), Loongson-2K0300/2K0500 fail to boot. Because IRQ_WORK need IPI but Loongson-2K0300/2K0500 don't have IPI HW. So make arch_irq_work_has_interrupt() return true only if IPI HW exist. Cc: stable@vger.kernel.org Reported-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/irq_work.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/irq_work.h b/arch/loongarch/include/asm/irq_work.h index d63076e9160d..63aee0335d1a 100644 --- a/arch/loongarch/include/asm/irq_work.h +++ b/arch/loongarch/include/asm/irq_work.h @@ -4,7 +4,7 @@ static inline bool arch_irq_work_has_interrupt(void) { - return IS_ENABLED(CONFIG_SMP); + return IS_ENABLED(CONFIG_SMP) && cpu_opt(LOONGARCH_CPU_CSRIPI); } #endif /* _ASM_LOONGARCH_IRQ_WORK_H */ -- cgit v1.2.3 From 37e57e8ad96cdec4a57b55fd10bef50f7370a954 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 22 Apr 2026 15:45:12 +0800 Subject: LoongArch: Show CPU vulnerabilites correctly Most LoongArch processors are vulnerable to Spectre-V1 Proof-of-Concept (PoC). And the generic mechanism, __user pointer sanitization, can be used as a mitigation. This means to use array_index_nospec() to prevent out of boundry access in syscall and other critical paths. Implement the arch-specific cpu_show_spectre_v1() to show CPU Spectre-V1 vulnerabilites correctly. Cc: stable@vger.kernel.org Link: https://cc-sw.com/chinese-loongarch-architecture-evaluation-part-3-of-3/ Signed-off-by: Huacai Chen --- arch/loongarch/kernel/cpu-probe.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 657bbae6c1c7..82cf426faafd 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -402,3 +403,9 @@ void cpu_probe(void) cpu_report(); } + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n"); +} -- cgit v1.2.3 From 0c965d2784fbbd7f8e3b96d875c9cfdf7c00da3d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Apr 2026 15:45:12 +0800 Subject: LoongArch: Add spectre boundry for syscall dispatch table The LoongArch syscall number is directly controlled by userspace, but does not have a array_index_nospec() boundry to prevent access past the syscall function pointer tables. Cc: stable@vger.kernel.org Assisted-by: gkh_clanker_2000 Signed-off-by: Greg Kroah-Hartman Signed-off-by: Huacai Chen --- arch/loongarch/kernel/syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index 1249d82c1cd0..dac435c32743 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -74,7 +75,7 @@ void noinstr __no_stack_protector do_syscall(struct pt_regs *regs) add_random_kstack_offset(); if (nr < NR_syscalls) { - syscall_fn = sys_call_table[nr]; + syscall_fn = sys_call_table[array_index_nospec(nr, NR_syscalls)]; regs->regs[4] = syscall_fn(regs->orig_a0, regs->regs[5], regs->regs[6], regs->regs[7], regs->regs[8], regs->regs[9]); } -- cgit v1.2.3 From adf346e500647d91d115e1319f04c3c7972620d9 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Wed, 22 Apr 2026 15:45:12 +0800 Subject: LoongArch: Add flush_icache_all()/local_flush_icache_all() LoongArch maintains ICache/DCache coherency by hardware, so we just need "ibar 0" to avoid instruction hazard here. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cacheflush.h | 16 +++++++++++++++- arch/loongarch/mm/cache.c | 10 ---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h index f8754d08a31a..190651be9546 100644 --- a/arch/loongarch/include/asm/cacheflush.h +++ b/arch/loongarch/include/asm/cacheflush.h @@ -32,8 +32,22 @@ static inline unsigned int cpu_last_level_cache_line_size(void) } asmlinkage void __flush_cache_all(void); -void local_flush_icache_range(unsigned long start, unsigned long end); +/* + * LoongArch maintains ICache/DCache coherency by hardware, + * we just need "ibar" to avoid instruction hazard here. + */ +static inline void local_flush_icache_all(void) +{ + asm volatile ("ibar\t0\n"::); +} + +static inline void local_flush_icache_range(unsigned long start, unsigned long end) +{ + asm volatile ("ibar\t0\n"::); +} + +#define flush_icache_all local_flush_icache_all #define flush_icache_range local_flush_icache_range #define flush_icache_user_range local_flush_icache_range diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index 496916845ff7..06dc570eb429 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -31,16 +31,6 @@ void cache_error_setup(void) set_merr_handler(0x0, &except_vec_cex, 0x80); } -/* - * LoongArch maintains ICache/DCache coherency by hardware, - * we just need "ibar" to avoid instruction hazard here. - */ -void local_flush_icache_range(unsigned long start, unsigned long end) -{ - asm volatile ("\tibar 0\n"::); -} -EXPORT_SYMBOL(local_flush_icache_range); - static void flush_cache_leaf(unsigned int leaf) { int i, j, nr_nodes; -- cgit v1.2.3 From 2c749f734ebfe350da55bf40ea55444fb85d4055 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Wed, 22 Apr 2026 15:45:13 +0800 Subject: LoongArch: Batch the icache maintenance for jump_label Switch to the batched version of the jump label update functions so instruction cache maintenance is deferred until the end of the update. Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/jump_label.h | 2 ++ arch/loongarch/kernel/inst.c | 6 +++--- arch/loongarch/kernel/jump_label.c | 12 ++++++++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h index dcaecf69ea5a..7ef4ae3abf08 100644 --- a/arch/loongarch/include/asm/jump_label.h +++ b/arch/loongarch/include/asm/jump_label.h @@ -13,6 +13,8 @@ #include #include +#define HAVE_JUMP_LABEL_BATCH + #define JUMP_LABEL_NOP_SIZE 4 #ifdef CONFIG_32BIT diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 1a728082944c..0b9228b7c13a 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -209,6 +209,9 @@ int larch_insn_write(void *addr, u32 insn) int ret; unsigned long flags = 0; + if ((unsigned long)addr & 3) + return -EINVAL; + raw_spin_lock_irqsave(&patch_lock, flags); ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE); raw_spin_unlock_irqrestore(&patch_lock, flags); @@ -221,9 +224,6 @@ int larch_insn_patch_text(void *addr, u32 insn) int ret; u32 *tp = addr; - if ((unsigned long)tp & 3) - return -EINVAL; - ret = larch_insn_write(tp, insn); if (!ret) flush_icache_range((unsigned long)tp, diff --git a/arch/loongarch/kernel/jump_label.c b/arch/loongarch/kernel/jump_label.c index 31891214b767..24a3f4d8540c 100644 --- a/arch/loongarch/kernel/jump_label.c +++ b/arch/loongarch/kernel/jump_label.c @@ -6,9 +6,10 @@ */ #include #include +#include #include -void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { u32 insn; void *addr = (void *)jump_entry_code(entry); @@ -18,5 +19,12 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type ty else insn = larch_insn_gen_nop(); - larch_insn_patch_text(addr, insn); + larch_insn_write(addr, insn); + + return true; +} + +void arch_jump_label_transform_apply(void) +{ + flush_icache_all(); } -- cgit v1.2.3 From 1dd3e8a8eeb4059fb34b07578362380cf35b7ed5 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 22 Apr 2026 15:45:13 +0800 Subject: LoongArch: Define instruction formats for AM{SWAP/ADD}.{B/H} and DBAR The 8 and 16 bit read-modify-write atomic instructions amadd.{b/h} and amswap.{b/h} were newly added in the latest LoongArch Reference Manual, define the instruction format and check whether support via CPUCFG. Furthermore, define the instruction format for DBAR which will be used to support BPF load-acquire and store-release instructions. This is preparation for later patches. Acked-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-features.h | 1 + arch/loongarch/include/asm/cpu.h | 64 ++++++++++++++++--------------- arch/loongarch/include/asm/inst.h | 10 +++++ arch/loongarch/include/uapi/asm/hwcap.h | 1 + arch/loongarch/kernel/cpu-probe.c | 4 ++ arch/loongarch/kernel/proc.c | 2 + 6 files changed, 51 insertions(+), 31 deletions(-) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 8eefe7a2098b..62059c5551b9 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -35,6 +35,7 @@ */ #define cpu_has_cpucfg cpu_opt(LOONGARCH_CPU_CPUCFG) #define cpu_has_lam cpu_opt(LOONGARCH_CPU_LAM) +#define cpu_has_lam_bh cpu_opt(LOONGARCH_CPU_LAM_BH) #define cpu_has_scq cpu_opt(LOONGARCH_CPU_SCQ) #define cpu_has_ual cpu_opt(LOONGARCH_CPU_UAL) #define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU) diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 1e60ab264cd0..91b96938861e 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -95,40 +95,42 @@ static inline char *id_to_core_name(unsigned int id) */ #define CPU_FEATURE_CPUCFG 0 /* CPU has CPUCFG */ #define CPU_FEATURE_LAM 1 /* CPU has Atomic instructions */ -#define CPU_FEATURE_SCQ 2 /* CPU has SC.Q instruction */ -#define CPU_FEATURE_UAL 3 /* CPU supports unaligned access */ -#define CPU_FEATURE_FPU 4 /* CPU has FPU */ -#define CPU_FEATURE_LSX 5 /* CPU has LSX (128-bit SIMD) */ -#define CPU_FEATURE_LASX 6 /* CPU has LASX (256-bit SIMD) */ -#define CPU_FEATURE_CRC32 7 /* CPU has CRC32 instructions */ -#define CPU_FEATURE_COMPLEX 8 /* CPU has Complex instructions */ -#define CPU_FEATURE_CRYPTO 9 /* CPU has Crypto instructions */ -#define CPU_FEATURE_LVZ 10 /* CPU has Virtualization extension */ -#define CPU_FEATURE_LBT_X86 11 /* CPU has X86 Binary Translation */ -#define CPU_FEATURE_LBT_ARM 12 /* CPU has ARM Binary Translation */ -#define CPU_FEATURE_LBT_MIPS 13 /* CPU has MIPS Binary Translation */ -#define CPU_FEATURE_TLB 14 /* CPU has TLB */ -#define CPU_FEATURE_CSR 15 /* CPU has CSR */ -#define CPU_FEATURE_IOCSR 16 /* CPU has IOCSR */ -#define CPU_FEATURE_WATCH 17 /* CPU has watchpoint registers */ -#define CPU_FEATURE_VINT 18 /* CPU has vectored interrupts */ -#define CPU_FEATURE_CSRIPI 19 /* CPU has CSR-IPI */ -#define CPU_FEATURE_EXTIOI 20 /* CPU has EXT-IOI */ -#define CPU_FEATURE_PREFETCH 21 /* CPU has prefetch instructions */ -#define CPU_FEATURE_PMP 22 /* CPU has perfermance counter */ -#define CPU_FEATURE_SCALEFREQ 23 /* CPU supports cpufreq scaling */ -#define CPU_FEATURE_FLATMODE 24 /* CPU has flat mode */ -#define CPU_FEATURE_EIODECODE 25 /* CPU has EXTIOI interrupt pin decode mode */ -#define CPU_FEATURE_GUESTID 26 /* CPU has GuestID feature */ -#define CPU_FEATURE_HYPERVISOR 27 /* CPU has hypervisor (running in VM) */ -#define CPU_FEATURE_PTW 28 /* CPU has hardware page table walker */ -#define CPU_FEATURE_LSPW 29 /* CPU has LSPW (lddir/ldpte instructions) */ -#define CPU_FEATURE_MSGINT 30 /* CPU has MSG interrupt */ -#define CPU_FEATURE_AVECINT 31 /* CPU has AVEC interrupt */ -#define CPU_FEATURE_REDIRECTINT 32 /* CPU has interrupt remapping */ +#define CPU_FEATURE_LAM_BH 2 /* CPU has AM{SWAP/ADD}[_DB].{B/H} instructions */ +#define CPU_FEATURE_SCQ 3 /* CPU has SC.Q instruction */ +#define CPU_FEATURE_UAL 4 /* CPU supports unaligned access */ +#define CPU_FEATURE_FPU 5 /* CPU has FPU */ +#define CPU_FEATURE_LSX 6 /* CPU has LSX (128-bit SIMD) */ +#define CPU_FEATURE_LASX 7 /* CPU has LASX (256-bit SIMD) */ +#define CPU_FEATURE_CRC32 8 /* CPU has CRC32 instructions */ +#define CPU_FEATURE_COMPLEX 9 /* CPU has Complex instructions */ +#define CPU_FEATURE_CRYPTO 10 /* CPU has Crypto instructions */ +#define CPU_FEATURE_LVZ 11 /* CPU has Virtualization extension */ +#define CPU_FEATURE_LBT_X86 12 /* CPU has X86 Binary Translation */ +#define CPU_FEATURE_LBT_ARM 13 /* CPU has ARM Binary Translation */ +#define CPU_FEATURE_LBT_MIPS 14 /* CPU has MIPS Binary Translation */ +#define CPU_FEATURE_TLB 15 /* CPU has TLB */ +#define CPU_FEATURE_CSR 16 /* CPU has CSR */ +#define CPU_FEATURE_IOCSR 17 /* CPU has IOCSR */ +#define CPU_FEATURE_WATCH 18 /* CPU has watchpoint registers */ +#define CPU_FEATURE_VINT 19 /* CPU has vectored interrupts */ +#define CPU_FEATURE_CSRIPI 20 /* CPU has CSR-IPI */ +#define CPU_FEATURE_EXTIOI 21 /* CPU has EXT-IOI */ +#define CPU_FEATURE_PREFETCH 22 /* CPU has prefetch instructions */ +#define CPU_FEATURE_PMP 23 /* CPU has perfermance counter */ +#define CPU_FEATURE_SCALEFREQ 24 /* CPU supports cpufreq scaling */ +#define CPU_FEATURE_FLATMODE 25 /* CPU has flat mode */ +#define CPU_FEATURE_EIODECODE 26 /* CPU has EXTIOI interrupt pin decode mode */ +#define CPU_FEATURE_GUESTID 27 /* CPU has GuestID feature */ +#define CPU_FEATURE_HYPERVISOR 28 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_PTW 29 /* CPU has hardware page table walker */ +#define CPU_FEATURE_LSPW 30 /* CPU has LSPW (lddir/ldpte instructions) */ +#define CPU_FEATURE_MSGINT 31 /* CPU has MSG interrupt */ +#define CPU_FEATURE_AVECINT 32 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_REDIRECTINT 33 /* CPU has interrupt remapping */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) +#define LOONGARCH_CPU_LAM_BH BIT_ULL(CPU_FEATURE_LAM_BH) #define LOONGARCH_CPU_SCQ BIT_ULL(CPU_FEATURE_SCQ) #define LOONGARCH_CPU_UAL BIT_ULL(CPU_FEATURE_UAL) #define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index f9f207082d0e..76b723590023 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -36,6 +36,7 @@ enum reg0i15_op { break_op = 0x54, + dbar_op = 0x70e4, }; enum reg0i26_op { @@ -194,6 +195,10 @@ enum reg3_op { fstxs_op = 0x7070, fstxd_op = 0x7078, scq_op = 0x70ae, + amswapb_op = 0x70b8, + amswaph_op = 0x70b9, + amaddb_op = 0x70ba, + amaddh_op = 0x70bb, amswapw_op = 0x70c0, amswapd_op = 0x70c1, amaddw_op = 0x70c2, @@ -543,6 +548,7 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \ } DEF_EMIT_REG0I15_FORMAT(break, break_op) +DEF_EMIT_REG0I15_FORMAT(dbar, dbar_op) /* like emit_break(imm) but returns a constant expression */ #define __emit_break(imm) ((u32)((imm) | (break_op << 15))) @@ -763,6 +769,8 @@ DEF_EMIT_REG3_FORMAT(stxb, stxb_op) DEF_EMIT_REG3_FORMAT(stxh, stxh_op) DEF_EMIT_REG3_FORMAT(stxw, stxw_op) DEF_EMIT_REG3_FORMAT(stxd, stxd_op) +DEF_EMIT_REG3_FORMAT(amaddb, amaddb_op) +DEF_EMIT_REG3_FORMAT(amaddh, amaddh_op) DEF_EMIT_REG3_FORMAT(amaddw, amaddw_op) DEF_EMIT_REG3_FORMAT(amaddd, amaddd_op) DEF_EMIT_REG3_FORMAT(amandw, amandw_op) @@ -771,6 +779,8 @@ DEF_EMIT_REG3_FORMAT(amorw, amorw_op) DEF_EMIT_REG3_FORMAT(amord, amord_op) DEF_EMIT_REG3_FORMAT(amxorw, amxorw_op) DEF_EMIT_REG3_FORMAT(amxord, amxord_op) +DEF_EMIT_REG3_FORMAT(amswapb, amswapb_op) +DEF_EMIT_REG3_FORMAT(amswaph, amswaph_op) DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op) DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op) diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h index 49519b4362c6..90e96113ba51 100644 --- a/arch/loongarch/include/uapi/asm/hwcap.h +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -19,5 +19,6 @@ #define HWCAP_LOONGARCH_PTW (1 << 13) #define HWCAP_LOONGARCH_LSPW (1 << 14) #define HWCAP_LOONGARCH_SCQ (1 << 15) +#define HWCAP_LOONGARCH_LAM_BH (1 << 16) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 82cf426faafd..74d31f260dfd 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -178,6 +178,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_LAM; elf_hwcap |= HWCAP_LOONGARCH_LAM; } + if (config & CPUCFG2_LAM_BH) { + c->options |= LOONGARCH_CPU_LAM_BH; + elf_hwcap |= HWCAP_LOONGARCH_LAM_BH; + } if (config & CPUCFG2_SCQ) { c->options |= LOONGARCH_CPU_SCQ; elf_hwcap |= HWCAP_LOONGARCH_SCQ; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index a8127e83da65..d4ce5b585453 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -64,6 +64,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_puts(m, " cpucfg"); if (cpu_has_lam) seq_puts(m, " lam"); + if (cpu_has_lam_bh) + seq_puts(m, " lam_bh"); if (cpu_has_scq) seq_puts(m, " scq"); if (cpu_has_ual) -- cgit v1.2.3 From 534768410598539712e0097e060331c85f2d0c9d Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 22 Apr 2026 15:45:34 +0800 Subject: LoongArch: BPF: Add the default case in emit_atomic() and rename it Like the other archs such as x86 and riscv, add the default case in emit_atomic() to print an error message for the invalid opcode and return -EINVAL, then make its return type as int. While at it, given that all of the instructions in emit_atomic() are only read-modify-write instructions, rename emit_atomic() to emit_atomic_rmw() to make it clear, because there will be a new function emit_atomic_ld_st() for load-acquire and store-release instructions in the later patch. Acked-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 9cb796e16379..fefda4050a20 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -344,7 +344,7 @@ toofar: #undef jmp_offset } -static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) +static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 t1 = LOONGARCH_GPR_T1; const u8 t2 = LOONGARCH_GPR_T2; @@ -448,7 +448,12 @@ static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_zext_32(ctx, r0, true); } break; + default: + pr_err_once("bpf-jit: invalid atomic read-modify-write opcode %02x\n", imm); + return -EINVAL; } + + return 0; } static bool is_signed_bpf_cond(u8 cond) @@ -1256,7 +1261,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - emit_atomic(insn, ctx); + ret = emit_atomic_rmw(insn, ctx); + if (ret) + return ret; break; /* Speculation barrier */ -- cgit v1.2.3 From fc935c190c7967070506a2795575adc7f9f501ef Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 22 Apr 2026 15:45:34 +0800 Subject: LoongArch: BPF: Support 8 and 16 bit read-modify-write instructions The 8 and 16 bit read-modify-write instructions {amadd/amswap}.{b/h} were newly added in the latest LoongArch Reference Manual, use them to avoid the error of unknown opcode if possible. Acked-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 77 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index fefda4050a20..6bd2d20a9f2d 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -363,10 +363,28 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) switch (imm) { /* lock *(size *)(dst + off) = src */ case BPF_ADD: - if (isdw) - emit_insn(ctx, amaddd, t2, t1, src); - else + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (!cpu_has_lam_bh) { + pr_err_once("bpf-jit: amadd.b instruction is not supported\n"); + return -EINVAL; + } + emit_insn(ctx, amaddb, t2, t1, src); + break; + case BPF_H: + if (!cpu_has_lam_bh) { + pr_err_once("bpf-jit: amadd.h instruction is not supported\n"); + return -EINVAL; + } + emit_insn(ctx, amaddh, t2, t1, src); + break; + case BPF_W: emit_insn(ctx, amaddw, t2, t1, src); + break; + case BPF_DW: + emit_insn(ctx, amaddd, t2, t1, src); + break; + } break; case BPF_AND: if (isdw) @@ -388,11 +406,30 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* src = atomic_fetch_(dst + off, src) */ case BPF_ADD | BPF_FETCH: - if (isdw) { - emit_insn(ctx, amaddd, src, t1, t3); - } else { + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (!cpu_has_lam_bh) { + pr_err_once("bpf-jit: amadd.b instruction is not supported\n"); + return -EINVAL; + } + emit_insn(ctx, amaddb, src, t1, t3); + emit_zext_32(ctx, src, true); + break; + case BPF_H: + if (!cpu_has_lam_bh) { + pr_err_once("bpf-jit: amadd.h instruction is not supported\n"); + return -EINVAL; + } + emit_insn(ctx, amaddh, src, t1, t3); + emit_zext_32(ctx, src, true); + break; + case BPF_W: emit_insn(ctx, amaddw, src, t1, t3); emit_zext_32(ctx, src, true); + break; + case BPF_DW: + emit_insn(ctx, amaddd, src, t1, t3); + break; } break; case BPF_AND | BPF_FETCH: @@ -421,11 +458,30 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* src = atomic_xchg(dst + off, src); */ case BPF_XCHG: - if (isdw) { - emit_insn(ctx, amswapd, src, t1, t3); - } else { + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (!cpu_has_lam_bh) { + pr_err_once("bpf-jit: amswap.b instruction is not supported\n"); + return -EINVAL; + } + emit_insn(ctx, amswapb, src, t1, t3); + emit_zext_32(ctx, src, true); + break; + case BPF_H: + if (!cpu_has_lam_bh) { + pr_err_once("bpf-jit: amswap.h instruction is not supported\n"); + return -EINVAL; + } + emit_insn(ctx, amswaph, src, t1, t3); + emit_zext_32(ctx, src, true); + break; + case BPF_W: emit_insn(ctx, amswapw, src, t1, t3); emit_zext_32(ctx, src, true); + break; + case BPF_DW: + emit_insn(ctx, amswapd, src, t1, t3); + break; } break; /* r0 = atomic_cmpxchg(dst + off, r0, src); */ @@ -1259,6 +1315,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext return ret; break; + /* Atomics */ + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: ret = emit_atomic_rmw(insn, ctx); -- cgit v1.2.3 From ee823fe7c12f92bac5e5b1ea6dd0ac8b267dd464 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 22 Apr 2026 15:45:34 +0800 Subject: LoongArch: BPF: Support load-acquire and store-release instructions Use the LoongArch common memory access instructions with the barrier 'dbar' to support the BPF load-acquire and store-release instructions. With this patch, the following testcases passed on LoongArch if the macro CAN_USE_LOAD_ACQ_STORE_REL is usable in bpf selftests: sudo ./test_progs -t verifier_load_acquire sudo ./test_progs -t verifier_store_release sudo ./test_progs -t verifier_precision/bpf_load_acquire sudo ./test_progs -t verifier_precision/bpf_store_release sudo ./test_progs -t compute_live_registers/atomic_load_acq_store_rel Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 98 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 6bd2d20a9f2d..648a42c559a8 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -512,6 +512,99 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } +static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const u8 t1 = LOONGARCH_GPR_T1; + const u8 src = regmap[insn->src_reg]; + const u8 dst = regmap[insn->dst_reg]; + const s16 off = insn->off; + const s32 imm = insn->imm; + + switch (imm) { + /* dst_reg = load_acquire(src_reg + off16) */ + case BPF_LOAD_ACQ: + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldbu, dst, src, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, ldxbu, dst, src, t1); + } + break; + case BPF_H: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldhu, dst, src, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, ldxhu, dst, src, t1); + } + break; + case BPF_W: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldwu, dst, src, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, ldxwu, dst, src, t1); + } + break; + case BPF_DW: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldd, dst, src, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, ldxd, dst, src, t1); + } + break; + } + emit_insn(ctx, dbar, 0b10100); + break; + /* store_release(dst_reg + off16, src_reg) */ + case BPF_STORE_REL: + emit_insn(ctx, dbar, 0b10010); + switch (BPF_SIZE(insn->code)) { + case BPF_B: + if (is_signed_imm12(off)) { + emit_insn(ctx, stb, src, dst, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, stxb, src, dst, t1); + } + break; + case BPF_H: + if (is_signed_imm12(off)) { + emit_insn(ctx, sth, src, dst, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, stxh, src, dst, t1); + } + break; + case BPF_W: + if (is_signed_imm12(off)) { + emit_insn(ctx, stw, src, dst, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, stxw, src, dst, t1); + } + break; + case BPF_DW: + if (is_signed_imm12(off)) { + emit_insn(ctx, std, src, dst, off); + } else { + move_imm(ctx, t1, off, false); + emit_insn(ctx, stxd, src, dst, t1); + } + break; + } + break; + default: + pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm); + return -EINVAL; + } + + return 0; +} + static bool is_signed_bpf_cond(u8 cond) { return cond == BPF_JSGT || cond == BPF_JSLT || @@ -1320,7 +1413,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - ret = emit_atomic_rmw(insn, ctx); + if (!bpf_atomic_is_load_store(insn)) + ret = emit_atomic_rmw(insn, ctx); + else + ret = emit_atomic_ld_st(insn, ctx); if (ret) return ret; break; -- cgit v1.2.3 From 4653682c6f6559e3209586f7bb30183f36375f00 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 22 Apr 2026 15:45:34 +0800 Subject: LoongArch: BPF: Open code and remove invoke_bpf_mod_ret() invoke_bpf_mod_ret() is a small wrapper over invoke_bpf_prog(), it should check the return value of invoke_bpf_prog() and then return immediately if invoke_bpf_prog() failed, just open code and remove it due to it is called only once. Acked-by: Hengqi Chen Tested-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 648a42c559a8..0a8dc21473f9 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1712,20 +1712,6 @@ static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, return ret; } -static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, - int args_off, int retval_off, int run_ctx_off, u32 **branches) -{ - int i; - - emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off); - for (i = 0; i < tl->nr_links; i++) { - invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, true); - emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off); - branches[i] = (u32 *)ctx->image + ctx->idx; - emit_insn(ctx, nop); - } -} - void *arch_alloc_bpf_trampoline(unsigned int size) { return bpf_prog_pack_alloc(size, jit_fill_hole); @@ -1937,7 +1923,16 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i if (!branches) return -ENOMEM; - invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, run_ctx_off, branches); + emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off); + for (i = 0; i < fmod_ret->nr_links; i++) { + ret = invoke_bpf_prog(ctx, fmod_ret->links[i], + args_off, retval_off, run_ctx_off, true); + if (ret) + goto out; + emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off); + branches[i] = (u32 *)ctx->image + ctx->idx; + emit_insn(ctx, nop); + } } if (flags & BPF_TRAMP_F_CALL_ORIG) { -- cgit v1.2.3 From 0ef8b96051555aaded204c9e65edbd3656d9613f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 22 Apr 2026 15:45:34 +0800 Subject: LoongArch: BPF: Support small struct arguments for trampoline In the current BPF code, the struct argument size is at most 16 bytes, enforced by the verifier. According to the Procedure Call Standard for LoongArch, the struct argument size below 16 bytes are provided as part of the 8 argument registers, that is to say, the struct argument may be passed in a pair of registers if its size is more than 8 bytes and no more than 16 bytes. Extend the BPF trampoline JIT to support attachment to functions that take small structures (up to 16 bytes) as argument, save and restore a number of "argument registers" rather than a number of arguments. With this patch, the following related testcases passed: sudo ./test_progs -a tracing_struct/struct_args sudo ./test_progs -a tracing_struct/union_args Link: https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#structures Acked-by: Hengqi Chen Tested-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 55 +++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 0a8dc21473f9..9fc930e89b12 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1628,21 +1628,21 @@ int bpf_arch_text_invalidate(void *dst, size_t len) return ret; } -static void store_args(struct jit_ctx *ctx, int nargs, int args_off) +static void store_args(struct jit_ctx *ctx, int nregs, int args_off) { int i; - for (i = 0; i < nargs; i++) { + for (i = 0; i < nregs; i++) { emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); args_off -= 8; } } -static void restore_args(struct jit_ctx *ctx, int nargs, int args_off) +static void restore_args(struct jit_ctx *ctx, int nregs, int args_off) { int i; - for (i = 0; i < nargs; i++) { + for (i = 0; i < nregs; i++) { emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); args_off -= 8; } @@ -1763,8 +1763,8 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i void *func_addr, u32 flags) { int i, ret, save_ret; - int stack_size, nargs; - int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; + int stack_size, nregs = m->nr_args; + int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; @@ -1784,11 +1784,11 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i * * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or * BPF_TRAMP_F_RET_FENTRY_RET - * [ argN ] + * [ arg regN ] * [ ... ] - * FP - args_off [ arg1 ] + * FP - args_off [ arg reg1 ] * - * FP - nargs_off [ regs count ] + * FP - nregs_off [ arg regs count ] * * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG * @@ -1799,15 +1799,23 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i * FP - tcc_ptr_off [ tail_call_cnt_ptr ] */ - if (m->nr_args > LOONGARCH_MAX_REG_ARGS) - return -ENOTSUPP; - - /* FIXME: No support of struct argument */ + /* Extra registers for struct arguments */ for (i = 0; i < m->nr_args; i++) { - if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) - return -ENOTSUPP; + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) { + /* + * The struct argument size is at most 16 bytes, + * enforced by the verifier. The struct argument + * may be passed in a pair of registers if its + * size is more than 8 bytes and no more than 16 + * bytes. + */ + nregs += round_up(m->arg_size[i], 8) / 8 - 1; + } } + if (nregs > LOONGARCH_MAX_REG_ARGS) + return -ENOTSUPP; + if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) return -ENOTSUPP; @@ -1821,13 +1829,12 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i retval_off = stack_size; /* Room of trampoline frame to store args */ - nargs = m->nr_args; - stack_size += nargs * 8; + stack_size += nregs * 8; args_off = stack_size; /* Room of trampoline frame to store args number */ stack_size += 8; - nargs_off = stack_size; + nregs_off = stack_size; /* Room of trampoline frame to store ip address */ if (flags & BPF_TRAMP_F_IP_ARG) { @@ -1890,11 +1897,11 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off); } - /* store nargs number */ - move_imm(ctx, LOONGARCH_GPR_T1, nargs, false); - emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nargs_off); + /* store arg regs count */ + move_imm(ctx, LOONGARCH_GPR_T1, nregs, false); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nregs_off); - store_args(ctx, nargs, args_off); + store_args(ctx, nregs, args_off); /* To traced function */ /* Ftrace jump skips 2 NOP instructions */ @@ -1936,7 +1943,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_args(ctx, m->nr_args, args_off); + restore_args(ctx, nregs, args_off); if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off); @@ -1972,7 +1979,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_args(ctx, m->nr_args, args_off); + restore_args(ctx, nregs, args_off); if (save_ret) { emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); -- cgit v1.2.3 From c9ebe2016de967b47ce99d5af9bc791939c955f4 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 22 Apr 2026 15:45:34 +0800 Subject: LoongArch: BPF: Support up to 12 function arguments for trampoline Currently, LoongArch bpf trampoline supports up to 8 function arguments. According to the statistics from commit 473e3150e30a ("bpf, x86: allow function arguments up to 12 for TRACING"), there are over 200 functions accept 9 to 12 arguments, so add 12 arguments support for trampoline. With this patch, the following related testcases passed: sudo ./test_progs -a tracing_struct/struct_many_args sudo ./test_progs -a fentry_test/fentry_many_args sudo ./test_progs -a fexit_test/fexit_many_args Acked-by: Hengqi Chen Tested-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 99 ++++++++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 9fc930e89b12..215c5fb339b0 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1628,26 +1628,46 @@ int bpf_arch_text_invalidate(void *dst, size_t len) return ret; } -static void store_args(struct jit_ctx *ctx, int nregs, int args_off) +static void store_args(struct jit_ctx *ctx, int nr_arg_slots, int args_off) { int i; - for (i = 0; i < nregs; i++) { - emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); + for (i = 0; i < nr_arg_slots; i++) { + if (i < LOONGARCH_MAX_REG_ARGS) + emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); + else { + /* Skip slots for T0 and FP of traced function */ + emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, + 16 + (i - LOONGARCH_MAX_REG_ARGS) * 8); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -args_off); + } args_off -= 8; } } -static void restore_args(struct jit_ctx *ctx, int nregs, int args_off) +static void restore_args(struct jit_ctx *ctx, int nr_reg_args, int args_off) { int i; - for (i = 0; i < nregs; i++) { + for (i = 0; i < nr_reg_args; i++) { emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); args_off -= 8; } } +static void restore_stk_args(struct jit_ctx *ctx, int nr_stk_args, int args_off, int stk_args_off) +{ + int i; + + for (i = 0; i < nr_stk_args; i++) { + emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, + -(args_off - LOONGARCH_MAX_REG_ARGS * 8)); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -stk_args_off); + args_off -= 8; + stk_args_off -= 8; + } +} + static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, int args_off, int retval_off, int run_ctx_off, bool save_ret) { @@ -1763,8 +1783,8 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i void *func_addr, u32 flags) { int i, ret, save_ret; - int stack_size, nregs = m->nr_args; - int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; + int stack_size, args_off, stk_args_off, nr_arg_slots = 0; + int retval_off, nregs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; @@ -1782,40 +1802,42 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i * FP - 16 [ FP of traced func ] frame pointer of traced * function * - * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or - * BPF_TRAMP_F_RET_FENTRY_RET - * [ arg regN ] - * [ ... ] - * FP - args_off [ arg reg1 ] + * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * [ arg regN ] + * [ ... ] + * FP - args_off [ arg reg1 ] + * + * FP - nregs_off [ arg regs count ] * - * FP - nregs_off [ arg regs count ] + * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG * - * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG + * FP - run_ctx_off [ bpf_tramp_run_ctx ] * - * FP - run_ctx_off [ bpf_tramp_run_ctx ] + * FP - sreg_off [ callee saved reg ] * - * FP - sreg_off [ callee saved reg ] + * FP - tcc_ptr_off [ tail_call_cnt_ptr ] * - * FP - tcc_ptr_off [ tail_call_cnt_ptr ] + * [ stack_argN ] + * [ ... ] + * FP - stk_args_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG */ + if (m->nr_args > MAX_BPF_FUNC_ARGS) + return -ENOTSUPP; + /* Extra registers for struct arguments */ for (i = 0; i < m->nr_args; i++) { - if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) { - /* - * The struct argument size is at most 16 bytes, - * enforced by the verifier. The struct argument - * may be passed in a pair of registers if its - * size is more than 8 bytes and no more than 16 - * bytes. - */ - nregs += round_up(m->arg_size[i], 8) / 8 - 1; - } + /* + * The struct argument size is at most 16 bytes, + * enforced by the verifier. The struct argument + * may be passed in a pair of registers if its + * size is more than 8 bytes and no more than 16 + * bytes. + */ + nr_arg_slots += round_up(m->arg_size[i], 8) / 8; } - if (nregs > LOONGARCH_MAX_REG_ARGS) - return -ENOTSUPP; - if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) return -ENOTSUPP; @@ -1829,7 +1851,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i retval_off = stack_size; /* Room of trampoline frame to store args */ - stack_size += nregs * 8; + stack_size += nr_arg_slots * 8; args_off = stack_size; /* Room of trampoline frame to store args number */ @@ -1855,8 +1877,14 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i tcc_ptr_off = stack_size; } + if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - LOONGARCH_MAX_REG_ARGS > 0)) + stack_size += (nr_arg_slots - LOONGARCH_MAX_REG_ARGS) * 8; + stack_size = round_up(stack_size, 16); + /* Room for args on stack must be at the top of stack */ + stk_args_off = stack_size; + if (is_struct_ops) { /* * For the trampoline called directly, just handle @@ -1898,10 +1926,10 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i } /* store arg regs count */ - move_imm(ctx, LOONGARCH_GPR_T1, nregs, false); + move_imm(ctx, LOONGARCH_GPR_T1, nr_arg_slots, false); emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nregs_off); - store_args(ctx, nregs, args_off); + store_args(ctx, nr_arg_slots, args_off); /* To traced function */ /* Ftrace jump skips 2 NOP instructions */ @@ -1943,7 +1971,8 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_args(ctx, nregs, args_off); + restore_args(ctx, min_t(int, nr_arg_slots, LOONGARCH_MAX_REG_ARGS), args_off); + restore_stk_args(ctx, nr_arg_slots - LOONGARCH_MAX_REG_ARGS, args_off, stk_args_off); if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off); @@ -1979,7 +2008,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_args(ctx, nregs, args_off); + restore_args(ctx, min_t(int, nr_arg_slots, LOONGARCH_MAX_REG_ARGS), args_off); if (save_ret) { emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); -- cgit v1.2.3 From 6ef04707e8eee09360f70812c0ac63c712460bd0 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 23 Apr 2026 12:49:36 +0800 Subject: LoongArch: BPF: Introduce emit_store_stack_imm64() helper Introduce a helper to store 64-bit immediate on the trampoline stack. The helper will be used in the next patch. Also refactor the existing code to use this helper. Tested-by: Vincent Li Reviewed-by: Menglong Dong Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 215c5fb339b0..a6c001583083 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -344,6 +344,12 @@ toofar: #undef jmp_offset } +static void emit_store_stack_imm64(struct jit_ctx *ctx, int reg, int stack_off, u64 imm64) +{ + move_imm(ctx, reg, imm64, false); + emit_insn(ctx, std, reg, LOONGARCH_GPR_FP, stack_off); +} + static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 t1 = LOONGARCH_GPR_T1; @@ -1676,12 +1682,11 @@ static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, struct bpf_prog *p = l->link.prog; int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); - if (l->cookie) { - move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false); - emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); - } else { + if (l->cookie) + emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, + -run_ctx_off + cookie_off, l->cookie); + else emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); - } /* arg1: prog */ move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); @@ -1920,14 +1925,11 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); /* store ip address of the traced function */ - if (flags & BPF_TRAMP_F_IP_ARG) { - move_imm(ctx, LOONGARCH_GPR_T1, (const s64)func_addr, false); - emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off); - } + if (flags & BPF_TRAMP_F_IP_ARG) + emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -ip_off, (u64)func_addr); /* store arg regs count */ - move_imm(ctx, LOONGARCH_GPR_T1, nr_arg_slots, false); - emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nregs_off); + emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -nregs_off, nr_arg_slots); store_args(ctx, nr_arg_slots, args_off); -- cgit v1.2.3 From e815df29b6a5e59293500085a010d5882374cb3e Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 23 Apr 2026 12:49:36 +0800 Subject: LoongArch: BPF: Add fsession support for trampolines Implement BPF_TRACE_FSESSION support in LoongArch BPF JIT. The logic here is almost identical to what has been done in RISC-V JIT. The key changes are: - Allocate stack space for function meta and session cookies - Introduce invoke_bpf() as a wrapper around invoke_bpf_prog() that populates session cookies before each invocation - Implement bpf_jit_supports_fsession() callback Tested-by: Vincent Li Reviewed-by: Menglong Dong Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 76 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index a6c001583083..ec3c25b45882 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1737,6 +1737,29 @@ static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, return ret; } +static int invoke_bpf(struct jit_ctx *ctx, struct bpf_tramp_links *tl, + int args_off, int retval_off, int run_ctx_off, + int func_meta_off, bool save_ret, u64 func_meta, int cookie_off) +{ + int i, cur_cookie = (cookie_off - args_off) / 8; + + for (i = 0; i < tl->nr_links; i++) { + int err; + + if (bpf_prog_calls_session_cookie(tl->links[i])) { + u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT); + + emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, meta); + cur_cookie--; + } + err = invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, save_ret); + if (err) + return err; + } + + return 0; +} + void *arch_alloc_bpf_trampoline(unsigned int size) { return bpf_prog_pack_alloc(size, jit_fill_hole); @@ -1788,8 +1811,10 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i void *func_addr, u32 flags) { int i, ret, save_ret; + int cookie_cnt, cookie_off; int stack_size, args_off, stk_args_off, nr_arg_slots = 0; - int retval_off, nregs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; + int retval_off, func_meta_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; + unsigned long long func_meta; bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; @@ -1813,10 +1838,14 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i * [ ... ] * FP - args_off [ arg reg1 ] * - * FP - nregs_off [ arg regs count ] + * FP - func_meta_off [ regs count, etc ] * * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG * + * [ stack cookie N ] + * [ ... ] + * FP - cookie_off [ stack cookie 1 ] + * * FP - run_ctx_off [ bpf_tramp_run_ctx ] * * FP - sreg_off [ callee saved reg ] @@ -1859,9 +1888,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i stack_size += nr_arg_slots * 8; args_off = stack_size; - /* Room of trampoline frame to store args number */ + /* Room of function metadata, such as regs count */ stack_size += 8; - nregs_off = stack_size; + func_meta_off = stack_size; /* Room of trampoline frame to store ip address */ if (flags & BPF_TRAMP_F_IP_ARG) { @@ -1869,6 +1898,12 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i ip_off = stack_size; } + cookie_cnt = bpf_fsession_cookie_cnt(tlinks); + + /* Room for session cookies */ + stack_size += cookie_cnt * 8; + cookie_off = stack_size; + /* Room of trampoline frame to store struct bpf_tramp_run_ctx */ stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); run_ctx_off = stack_size; @@ -1929,10 +1964,20 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -ip_off, (u64)func_addr); /* store arg regs count */ - emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -nregs_off, nr_arg_slots); + func_meta = nr_arg_slots; + emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, func_meta); store_args(ctx, nr_arg_slots, args_off); + if (bpf_fsession_cnt(tlinks)) { + /* clear all session cookies' value */ + for (i = 0; i < cookie_cnt; i++) + emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -cookie_off + 8 * i); + + /* clear return value to make sure fentry always get 0 */ + emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off); + } + /* To traced function */ /* Ftrace jump skips 2 NOP instructions */ if (is_kernel_text((unsigned long)orig_call) || @@ -1949,9 +1994,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i return ret; } - for (i = 0; i < fentry->nr_links; i++) { - ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off, - run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); + if (fentry->nr_links) { + ret = invoke_bpf(ctx, fentry, args_off, retval_off, run_ctx_off, func_meta_off, + flags & BPF_TRAMP_F_RET_FENTRY_RET, func_meta, cookie_off); if (ret) return ret; } @@ -1995,8 +2040,14 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i *branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset); } - for (i = 0; i < fexit->nr_links; i++) { - ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, run_ctx_off, false); + /* Set "is_return" flag for fsession */ + func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT); + if (bpf_fsession_cnt(tlinks)) + emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, func_meta); + + if (fexit->nr_links) { + ret = invoke_bpf(ctx, fexit, args_off, retval_off, run_ctx_off, + func_meta_off, false, func_meta, cookie_off); if (ret) goto out; } @@ -2331,6 +2382,11 @@ bool bpf_jit_supports_arena(void) return true; } +bool bpf_jit_supports_fsession(void) +{ + return true; +} + /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ bool bpf_jit_supports_subprog_tailcalls(void) { -- cgit v1.2.3 From 7939f96f26e96b69db1fe4e7c18537a679696358 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 23 Apr 2026 12:49:46 +0800 Subject: selftests/bpf: Enable CAN_USE_LOAD_ACQ_STORE_REL for LoongArch In order to do the following load-acquire and store-release tests on LoongArch: sudo ./test_progs -t verifier_load_acquire sudo ./test_progs -t verifier_store_release sudo ./test_progs -t verifier_precision/bpf_load_acquire sudo ./test_progs -t verifier_precision/bpf_store_release sudo ./test_progs -t compute_live_registers/atomic_load_acq_store_rel It needs to enable CAN_USE_LOAD_ACQ_STORE_REL for LoongArch. Acked-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- tools/testing/selftests/bpf/progs/bpf_misc.h | 4 ++-- tools/testing/selftests/bpf/progs/verifier_precision.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index c9bfbe1bafc1..19f0bf44a9e1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -257,8 +257,8 @@ #if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) || \ - (defined(__TARGET_ARCH_powerpc)) + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_loongarch)) #define CAN_USE_LOAD_ACQ_STORE_REL #endif diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 4794903aec8e..6f325876efdd 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -75,8 +75,8 @@ __naked int bpf_end_to_be(void) #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ - __clang_major__ >= 18 + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && __clang_major__ >= 18 SEC("?raw_tp") __success __log_level(2) -- cgit v1.2.3