diff options
Diffstat (limited to 'arch')
37 files changed, 1439 insertions, 345 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9537a61ebae0..b8d36a261009 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1293,9 +1293,8 @@ config TASK_SIZE_BOOL Say N here unless you know what you are doing. config TASK_SIZE - hex "Size of user task space" if TASK_SIZE_BOOL + hex "Size of maximum user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx - default "0xb0000000" if PPC_BOOK3S_32 && EXECMEM default "0xc0000000" config MODULES_SIZE_BOOL diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 9e9833faa4af..9d2f612cfb1d 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -102,7 +102,7 @@ do { \ #else /* !CONFIG_PPC_BARRIER_NOSPEC */ #define barrier_nospec_asm -#define barrier_nospec() +#define barrier_nospec() do {} while (0) #endif /* CONFIG_PPC_BARRIER_NOSPEC */ /* diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 873c5146e326..a3558419c41b 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -97,8 +97,7 @@ static __always_inline unsigned long __kuap_get_and_assert_locked(void) } #define __kuap_get_and_assert_locked __kuap_get_and_assert_locked -static __always_inline void allow_user_access(void __user *to, const void __user *from, - u32 size, unsigned long dir) +static __always_inline void allow_user_access(void __user *to, unsigned long dir) { BUILD_BUG_ON(!__builtin_constant_p(dir)); diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 8435bf3cdabf..387d370c8a35 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -192,12 +192,15 @@ extern s32 patch__hash_page_B, patch__hash_page_C; extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2; extern s32 patch__flush_hash_B; +#include <linux/sizes.h> +#include <linux/align.h> + #include <asm/reg.h> #include <asm/task_size_32.h> static __always_inline void update_user_segment(u32 n, u32 val) { - if (n << 28 < TASK_SIZE) + if (n << 28 < ALIGN(TASK_SIZE, SZ_256M)) mtsr(val + n * 0x111, n << 28); } diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 87dcca962be7..41ae404d0b7a 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -195,10 +195,6 @@ void unmap_kernel_page(unsigned long va); #define VMALLOC_END ioremap_bot #endif -#define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) -#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M) -#define MODULES_VADDR (MODULES_END - MODULES_SIZE) - #ifndef __ASSEMBLER__ #include <linux/sched.h> #include <linux/threads.h> diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 03aec3c6c851..9ccf8a5e0926 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -353,8 +353,7 @@ __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return (regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ; } -static __always_inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static __always_inline void allow_user_access(void __user *to, unsigned long dir) { unsigned long thread_amr = 0; @@ -383,8 +382,7 @@ static __always_inline unsigned long get_kuap(void) static __always_inline void set_kuap(unsigned long value) { } -static __always_inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static __always_inline void allow_user_access(void __user *to, unsigned long dir) { } #endif /* !CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5e34611de9ef..b7ebb4ac2c71 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -289,6 +289,8 @@ void eeh_pe_dev_traverse(struct eeh_pe *root, void eeh_pe_restore_bars(struct eeh_pe *pe); const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); +const char *eeh_pe_loc_get_bus(struct pci_bus *bus); +struct pci_bus *eeh_pe_bus_get_nolock(struct eeh_pe *pe); void eeh_show_enabled(void); int __init eeh_init(struct eeh_ops *ops); diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 9aef16149d92..dff90a7d7f70 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -360,7 +360,9 @@ #define H_GUEST_RUN_VCPU 0x480 #define H_GUEST_COPY_MEMORY 0x484 #define H_GUEST_DELETE 0x488 -#define MAX_HCALL_OPCODE H_GUEST_DELETE +#define H_PKS_WRAP_OBJECT 0x490 +#define H_PKS_UNWRAP_OBJECT 0x494 +#define MAX_HCALL_OPCODE H_PKS_UNWRAP_OBJECT /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) diff --git a/arch/powerpc/include/asm/kgdb.h b/arch/powerpc/include/asm/kgdb.h index f39531903325..ab5af235832d 100644 --- a/arch/powerpc/include/asm/kgdb.h +++ b/arch/powerpc/include/asm/kgdb.h @@ -25,7 +25,6 @@ #define BREAK_INSTR_SIZE 4 #define BUFMAX ((NUMREGBYTES * 2) + 512) -#define OUTBUFMAX ((NUMREGBYTES * 2) + 512) #define BREAK_INSTR 0x7d821008 /* twge r2, r2 */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index dab63b82a8d4..4a4145a244f2 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -72,8 +72,7 @@ static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned * platforms. */ #ifndef CONFIG_PPC_BOOK3S_64 -static __always_inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } +static __always_inline void allow_user_access(void __user *to, unsigned long dir) { } static __always_inline void prevent_user_access(unsigned long dir) { } static __always_inline unsigned long prevent_user_access_return(void) { return 0UL; } static __always_inline void restore_user_access(unsigned long flags) { } @@ -132,55 +131,6 @@ static __always_inline void kuap_assert_locked(void) kuap_get_and_assert_locked(); } -static __always_inline void allow_read_from_user(const void __user *from, unsigned long size) -{ - barrier_nospec(); - allow_user_access(NULL, from, size, KUAP_READ); -} - -static __always_inline void allow_write_to_user(void __user *to, unsigned long size) -{ - allow_user_access(to, NULL, size, KUAP_WRITE); -} - -static __always_inline void allow_read_write_user(void __user *to, const void __user *from, - unsigned long size) -{ - barrier_nospec(); - allow_user_access(to, from, size, KUAP_READ_WRITE); -} - -static __always_inline void prevent_read_from_user(const void __user *from, unsigned long size) -{ - prevent_user_access(KUAP_READ); -} - -static __always_inline void prevent_write_to_user(void __user *to, unsigned long size) -{ - prevent_user_access(KUAP_WRITE); -} - -static __always_inline void prevent_read_write_user(void __user *to, const void __user *from, - unsigned long size) -{ - prevent_user_access(KUAP_READ_WRITE); -} - -static __always_inline void prevent_current_access_user(void) -{ - prevent_user_access(KUAP_READ_WRITE); -} - -static __always_inline void prevent_current_read_from_user(void) -{ - prevent_user_access(KUAP_READ); -} - -static __always_inline void prevent_current_write_to_user(void) -{ - prevent_user_access(KUAP_WRITE); -} - #endif /* !__ASSEMBLER__ */ #endif /* _ASM_POWERPC_KUAP_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 08486b15b207..efffb5006d19 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -49,8 +49,7 @@ static __always_inline void uaccess_end_8xx(void) "i"(SPRN_MD_AP), "r"(MD_APG_KUAP), "i"(MMU_FTR_KUAP) : "memory"); } -static __always_inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static __always_inline void allow_user_access(void __user *to, unsigned long dir) { uaccess_begin_8xx(MD_APG_INIT); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index f19115db8072..74ad32e1588c 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -170,10 +170,6 @@ #define mmu_linear_psize MMU_PAGE_8M -#define MODULES_END PAGE_OFFSET -#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M) -#define MODULES_VADDR (MODULES_END - MODULES_SIZE) - #ifndef __ASSEMBLER__ #include <linux/mmdebug.h> diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h index d6bbb6d78bbe..cb2d5a96c3df 100644 --- a/arch/powerpc/include/asm/nohash/kup-booke.h +++ b/arch/powerpc/include/asm/nohash/kup-booke.h @@ -73,8 +73,7 @@ static __always_inline void uaccess_end_booke(void) "i"(SPRN_PID), "r"(0), "i"(MMU_FTR_KUAP) : "memory"); } -static __always_inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static __always_inline void allow_user_access(void __user *to, unsigned long dir) { uaccess_begin_booke(current->thread.pid); } diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h index 7a84069759b0..e87f90e40d4e 100644 --- a/arch/powerpc/include/asm/plpks.h +++ b/arch/powerpc/include/asm/plpks.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/list.h> +#include <linux/kobject.h> // Object policy flags from supported_policies #define PLPKS_OSSECBOOTAUDIT PPC_BIT32(1) // OS secure boot must be audit/enforce @@ -22,6 +23,7 @@ #define PLPKS_IMMUTABLE PPC_BIT32(5) // Once written, object cannot be removed #define PLPKS_TRANSIENT PPC_BIT32(6) // Object does not persist through reboot #define PLPKS_SIGNEDUPDATE PPC_BIT32(7) // Object can only be modified by signed updates +#define PLPKS_WRAPPINGKEY PPC_BIT32(8) // Object contains a wrapping key #define PLPKS_HVPROVISIONED PPC_BIT32(28) // Hypervisor has provisioned this object // Signature algorithm flags from signed_update_algorithms @@ -67,128 +69,67 @@ struct plpks_var_name_list { struct plpks_var_name varlist[]; }; -/** - * Updates the authenticated variable. It expects NULL as the component. - */ int plpks_signed_update_var(struct plpks_var *var, u64 flags); -/** - * Writes the specified var and its data to PKS. - * Any caller of PKS driver should present a valid component type for - * their variable. - */ int plpks_write_var(struct plpks_var var); -/** - * Removes the specified var and its data from PKS. - */ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname); -/** - * Returns the data for the specified os variable. - * - * Caller must allocate a buffer in var->data with length in var->datalen. - * If no buffer is provided, var->datalen will be populated with the object's - * size. - */ int plpks_read_os_var(struct plpks_var *var); -/** - * Returns the data for the specified firmware variable. - * - * Caller must allocate a buffer in var->data with length in var->datalen. - * If no buffer is provided, var->datalen will be populated with the object's - * size. - */ int plpks_read_fw_var(struct plpks_var *var); -/** - * Returns the data for the specified bootloader variable. - * - * Caller must allocate a buffer in var->data with length in var->datalen. - * If no buffer is provided, var->datalen will be populated with the object's - * size. - */ int plpks_read_bootloader_var(struct plpks_var *var); -/** - * Returns if PKS is available on this LPAR. - */ bool plpks_is_available(void); -/** - * Returns version of the Platform KeyStore. - */ u8 plpks_get_version(void); -/** - * Returns hypervisor storage overhead per object, not including the size of - * the object or label. Only valid for config version >= 2 - */ u16 plpks_get_objoverhead(void); -/** - * Returns maximum password size. Must be >= 32 bytes - */ u16 plpks_get_maxpwsize(void); -/** - * Returns maximum object size supported by Platform KeyStore. - */ u16 plpks_get_maxobjectsize(void); -/** - * Returns maximum object label size supported by Platform KeyStore. - */ u16 plpks_get_maxobjectlabelsize(void); -/** - * Returns total size of the configured Platform KeyStore. - */ u32 plpks_get_totalsize(void); -/** - * Returns used space from the total size of the Platform KeyStore. - */ u32 plpks_get_usedspace(void); -/** - * Returns bitmask of policies supported by the hypervisor. - */ u32 plpks_get_supportedpolicies(void); -/** - * Returns maximum byte size of a single object supported by the hypervisor. - * Only valid for config version >= 3 - */ u32 plpks_get_maxlargeobjectsize(void); -/** - * Returns bitmask of signature algorithms supported for signed updates. - * Only valid for config version >= 3 - */ u64 plpks_get_signedupdatealgorithms(void); -/** - * Returns the length of the PLPKS password in bytes. - */ +u64 plpks_get_wrappingfeatures(void); + u16 plpks_get_passwordlen(void); -/** - * Called in early init to retrieve and clear the PLPKS password from the DT. - */ void plpks_early_init_devtree(void); -/** - * Populates the FDT with the PLPKS password to prepare for kexec. - */ int plpks_populate_fdt(void *fdt); + +int plpks_config_create_softlink(struct kobject *from); + +bool plpks_wrapping_is_supported(void); + +int plpks_gen_wrapping_key(void); + +int plpks_wrap_object(u8 **input_buf, u32 input_len, u16 wrap_flags, + u8 **output_buf, u32 *output_len); + +int plpks_unwrap_object(u8 **input_buf, u32 input_len, + u8 **output_buf, u32 *output_len); #else // CONFIG_PSERIES_PLPKS static inline bool plpks_is_available(void) { return false; } static inline u16 plpks_get_passwordlen(void) { BUILD_BUG(); } static inline void plpks_early_init_devtree(void) { } static inline int plpks_populate_fdt(void *fdt) { BUILD_BUG(); } +static inline int plpks_config_create_softlink(struct kobject *from) + { return 0; } #endif // CONFIG_PSERIES_PLPKS #endif // _ASM_POWERPC_PLPKS_H diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h index 4828e0ab7e3c..fd5006307f2a 100644 --- a/arch/powerpc/include/asm/secvar.h +++ b/arch/powerpc/include/asm/secvar.h @@ -20,7 +20,6 @@ struct secvar_operations { int (*set)(const char *key, u64 key_len, u8 *data, u64 data_size); ssize_t (*format)(char *buf, size_t bufsize); int (*max_size)(u64 *max_size); - const struct attribute **config_attrs; // NULL-terminated array of fixed variable names // Only used if get_next() isn't provided diff --git a/arch/powerpc/include/asm/task_size_32.h b/arch/powerpc/include/asm/task_size_32.h index de7290ee770f..725ddbf06217 100644 --- a/arch/powerpc/include/asm/task_size_32.h +++ b/arch/powerpc/include/asm/task_size_32.h @@ -2,11 +2,37 @@ #ifndef _ASM_POWERPC_TASK_SIZE_32_H #define _ASM_POWERPC_TASK_SIZE_32_H +#include <linux/sizes.h> + #if CONFIG_TASK_SIZE > CONFIG_KERNEL_START #error User TASK_SIZE overlaps with KERNEL_START address #endif -#define TASK_SIZE (CONFIG_TASK_SIZE) +#ifdef CONFIG_PPC_8xx +#define MODULES_END ASM_CONST(CONFIG_PAGE_OFFSET) +#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M) +#define MODULES_VADDR (MODULES_END - MODULES_SIZE) +#define MODULES_BASE (MODULES_VADDR & ~(UL(SZ_4M) - 1)) +#define USER_TOP (MODULES_BASE - SZ_4M) +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 +#define MODULES_END (ASM_CONST(CONFIG_PAGE_OFFSET) & ~(UL(SZ_256M) - 1)) +#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M) +#define MODULES_VADDR (MODULES_END - MODULES_SIZE) +#define MODULES_BASE (MODULES_VADDR & ~(UL(SZ_256M) - 1)) +#define USER_TOP (MODULES_BASE - SZ_4M) +#endif + +#ifndef USER_TOP +#define USER_TOP ((ASM_CONST(CONFIG_PAGE_OFFSET) - SZ_128K) & ~(UL(SZ_128K) - 1)) +#endif + +#if CONFIG_TASK_SIZE < USER_TOP +#define TASK_SIZE ASM_CONST(CONFIG_TASK_SIZE) +#else +#define TASK_SIZE USER_TOP +#endif /* * This decides where the kernel will search for a free chunk of vm space during diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 784a00e681fa..ba1d878c3f40 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -2,6 +2,8 @@ #ifndef _ARCH_POWERPC_UACCESS_H #define _ARCH_POWERPC_UACCESS_H +#include <linux/sizes.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/extable.h> @@ -45,14 +47,14 @@ do { \ __label__ __pu_failed; \ \ - allow_write_to_user(__pu_addr, __pu_size); \ + allow_user_access(__pu_addr, KUAP_WRITE); \ __put_user_size_goto(__pu_val, __pu_addr, __pu_size, __pu_failed); \ - prevent_write_to_user(__pu_addr, __pu_size); \ + prevent_user_access(KUAP_WRITE); \ __pu_err = 0; \ break; \ \ __pu_failed: \ - prevent_write_to_user(__pu_addr, __pu_size); \ + prevent_user_access(KUAP_WRITE); \ __pu_err = -EFAULT; \ } while (0); \ \ @@ -301,9 +303,10 @@ do { \ __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \ \ might_fault(); \ - allow_read_from_user(__gu_addr, __gu_size); \ + barrier_nospec(); \ + allow_user_access(NULL, KUAP_READ); \ __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ - prevent_read_from_user(__gu_addr, __gu_size); \ + prevent_user_access(KUAP_READ); \ (x) = (__typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ @@ -329,9 +332,10 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { unsigned long ret; - allow_read_write_user(to, from, n); + barrier_nospec(); + allow_user_access(to, KUAP_READ_WRITE); ret = __copy_tofrom_user(to, from, n); - prevent_read_write_user(to, from, n); + prevent_user_access(KUAP_READ_WRITE); return ret; } #endif /* __powerpc64__ */ @@ -341,9 +345,9 @@ static inline unsigned long raw_copy_from_user(void *to, { unsigned long ret; - allow_read_from_user(from, n); + allow_user_access(NULL, KUAP_READ); ret = __copy_tofrom_user((__force void __user *)to, from, n); - prevent_read_from_user(from, n); + prevent_user_access(KUAP_READ); return ret; } @@ -352,9 +356,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) { unsigned long ret; - allow_write_to_user(to, n); + allow_user_access(to, KUAP_WRITE); ret = __copy_tofrom_user(to, (__force const void __user *)from, n); - prevent_write_to_user(to, n); + prevent_user_access(KUAP_WRITE); return ret; } @@ -365,9 +369,9 @@ static inline unsigned long __clear_user(void __user *addr, unsigned long size) unsigned long ret; might_fault(); - allow_write_to_user(addr, size); + allow_user_access(addr, KUAP_WRITE); ret = __arch_clear_user(addr, size); - prevent_write_to_user(addr, size); + prevent_user_access(KUAP_WRITE); return ret; } @@ -395,9 +399,9 @@ copy_mc_to_user(void __user *to, const void *from, unsigned long n) { if (check_copy_size(from, n, true)) { if (access_ok(to, n)) { - allow_write_to_user(to, n); + allow_user_access(to, KUAP_WRITE); n = copy_mc_generic((void __force *)to, from, n); - prevent_write_to_user(to, n); + prevent_user_access(KUAP_WRITE); } } @@ -408,48 +412,104 @@ copy_mc_to_user(void __user *to, const void *from, unsigned long n) extern long __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size); -static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +static __must_check __always_inline bool __user_access_begin(const void __user *ptr, size_t len, + unsigned long dir) { if (unlikely(!access_ok(ptr, len))) return false; might_fault(); - allow_read_write_user((void __user *)ptr, ptr, len); + if (dir & KUAP_READ) + barrier_nospec(); + allow_user_access((void __user *)ptr, dir); return true; } -#define user_access_begin user_access_begin -#define user_access_end prevent_current_access_user + +#define user_access_begin(p, l) __user_access_begin(p, l, KUAP_READ_WRITE) +#define user_read_access_begin(p, l) __user_access_begin(p, l, KUAP_READ) +#define user_write_access_begin(p, l) __user_access_begin(p, l, KUAP_WRITE) + +#define user_access_end() prevent_user_access(KUAP_READ_WRITE) +#define user_read_access_end() prevent_user_access(KUAP_READ) +#define user_write_access_end() prevent_user_access(KUAP_WRITE) + #define user_access_save prevent_user_access_return #define user_access_restore restore_user_access -static __must_check __always_inline bool -user_read_access_begin(const void __user *ptr, size_t len) +/* + * Masking the user address is an alternative to a conditional + * user_access_begin that can avoid the fencing. This only works + * for dense accesses starting at the address. + */ +static inline void __user *mask_user_address_simple(const void __user *ptr) { - if (unlikely(!access_ok(ptr, len))) - return false; + unsigned long addr = (unsigned long)ptr; + unsigned long mask = (unsigned long)(((long)addr >> (BITS_PER_LONG - 1)) & LONG_MAX); - might_fault(); + return (void __user *)(addr & ~mask); +} - allow_read_from_user(ptr, len); - return true; +static inline void __user *mask_user_address_isel(const void __user *ptr) +{ + unsigned long addr; + + asm("cmplw %1, %2; iselgt %0, %2, %1" : "=r"(addr) : "r"(ptr), "r"(TASK_SIZE) : "cr0"); + + return (void __user *)addr; } -#define user_read_access_begin user_read_access_begin -#define user_read_access_end prevent_current_read_from_user -static __must_check __always_inline bool -user_write_access_begin(const void __user *ptr, size_t len) +/* TASK_SIZE is a multiple of 128K for shifting by 17 to the right */ +static inline void __user *mask_user_address_32(const void __user *ptr) { - if (unlikely(!access_ok(ptr, len))) - return false; + unsigned long addr = (unsigned long)ptr; + unsigned long mask = (unsigned long)((long)((TASK_SIZE >> 17) - 1 - (addr >> 17)) >> 31); + + addr = (addr & ~mask) | (TASK_SIZE & mask); + + return (void __user *)addr; +} + +static inline void __user *mask_user_address_fallback(const void __user *ptr) +{ + unsigned long addr = (unsigned long)ptr; + + return (void __user *)(likely(addr < TASK_SIZE) ? addr : TASK_SIZE); +} + +static inline void __user *mask_user_address(const void __user *ptr) +{ +#ifdef MODULES_VADDR + const unsigned long border = MODULES_VADDR; +#else + const unsigned long border = PAGE_OFFSET; +#endif + + if (IS_ENABLED(CONFIG_PPC64)) + return mask_user_address_simple(ptr); + if (IS_ENABLED(CONFIG_E500)) + return mask_user_address_isel(ptr); + if (TASK_SIZE <= UL(SZ_2G) && border >= UL(SZ_2G)) + return mask_user_address_simple(ptr); + if (IS_ENABLED(CONFIG_PPC_BARRIER_NOSPEC)) + return mask_user_address_32(ptr); + return mask_user_address_fallback(ptr); +} + +static __always_inline void __user *__masked_user_access_begin(const void __user *p, + unsigned long dir) +{ + void __user *ptr = mask_user_address(p); might_fault(); + allow_user_access(ptr, dir); - allow_write_to_user((void __user *)ptr, len); - return true; + return ptr; } -#define user_write_access_begin user_write_access_begin -#define user_write_access_end prevent_current_write_to_user + +#define masked_user_access_begin(p) __masked_user_access_begin(p, KUAP_READ_WRITE) +#define masked_user_read_access_begin(p) __masked_user_access_begin(p, KUAP_READ) +#define masked_user_write_access_begin(p) __masked_user_access_begin(p, KUAP_WRITE) #define arch_unsafe_get_user(x, p, e) do { \ __long_type(*(p)) __gu_val; \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a4bc80b30410..46149f326fd4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -331,7 +331,7 @@ int main(void) #ifndef CONFIG_PPC64 DEFINE(TASK_SIZE, TASK_SIZE); - DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); + DEFINE(NUM_USER_SEGMENTS, ALIGN(TASK_SIZE, SZ_256M) >> 28); #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index aa3689d61917..73e10bd4d56d 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -65,6 +65,21 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, return true; } +bool arch_dma_alloc_direct(struct device *dev) +{ + if (dev->dma_ops_bypass) + return true; + + return false; +} + +bool arch_dma_free_direct(struct device *dev, dma_addr_t dma_handle) +{ + if (!dev->dma_ops_bypass) + return false; + + return is_direct_handle(dev, dma_handle); +} #endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */ /* @@ -146,17 +161,12 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { /* - * dma_iommu_bypass_supported() sets dma_max when there is - * 1:1 mapping but it is somehow limited. - * ibm,pmemory is one example. + * fixed ops will be used for RAM. This is limited by + * bus_dma_limit which is set when RAM is pre-mapped. */ - dev->dma_ops_bypass = dev->bus_dma_limit == 0; - if (!dev->dma_ops_bypass) - dev_warn(dev, - "iommu: 64-bit OK but direct DMA is limited by %llx\n", - dev->bus_dma_limit); - else - dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + dev->dma_ops_bypass = true; + dev_info(dev, "iommu: 64-bit OK but direct DMA is limited by %llx\n", + dev->bus_dma_limit); return 1; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index ef78ff77cf8f..028f69158532 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -846,7 +846,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pci_lock_rescan_remove(); - bus = eeh_pe_bus_get(pe); + bus = eeh_pe_bus_get_nolock(pe); if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); @@ -886,14 +886,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* Log the event */ if (pe->type & EEH_PE_PHB) { pr_err("EEH: Recovering PHB#%x, location: %s\n", - pe->phb->global_number, eeh_pe_loc_get(pe)); + pe->phb->global_number, eeh_pe_loc_get_bus(bus)); } else { struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb); pr_err("EEH: Recovering PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); pr_err("EEH: PE location: %s, PHB location: %s\n", - eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); + eeh_pe_loc_get_bus(bus), + eeh_pe_loc_get_bus(eeh_pe_bus_get_nolock(phb_pe))); } #ifdef CONFIG_STACKTRACE @@ -1098,7 +1099,7 @@ recover_failed: eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - bus = eeh_pe_bus_get(pe); + bus = eeh_pe_bus_get_nolock(pe); if (bus) pci_hp_remove_devices(bus); else @@ -1222,7 +1223,7 @@ void eeh_handle_special_event(void) (phb_pe->state & EEH_PE_RECOVERING)) continue; - bus = eeh_pe_bus_get(phb_pe); + bus = eeh_pe_bus_get_nolock(phb_pe); if (!bus) { pr_err("%s: Cannot find PCI bus for " "PHB#%x-PE#%x\n", diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index e740101fadf3..040e8f69a4aa 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -812,6 +812,24 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) const char *eeh_pe_loc_get(struct eeh_pe *pe) { struct pci_bus *bus = eeh_pe_bus_get(pe); + return eeh_pe_loc_get_bus(bus); +} + +/** + * eeh_pe_loc_get_bus - Retrieve location code binding to the given PCI bus + * @bus: PCI bus + * + * Retrieve the location code associated with the given PCI bus. If the bus + * is a root bus, the location code is fetched from the PHB device tree node + * or root port. Otherwise, the location code is obtained from the device + * tree node of the upstream bridge of the bus. The function walks up the + * bus hierarchy if necessary, checking each node for the appropriate + * location code property ("ibm,io-base-loc-code" for root buses, + * "ibm,slot-location-code" for others). If no location code is found, + * returns "N/A". + */ +const char *eeh_pe_loc_get_bus(struct pci_bus *bus) +{ struct device_node *dn; const char *loc = NULL; @@ -838,8 +856,9 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe) } /** - * eeh_pe_bus_get - Retrieve PCI bus according to the given PE + * _eeh_pe_bus_get - Retrieve PCI bus according to the given PE * @pe: EEH PE + * @do_lock: Is the caller already held the pci_lock_rescan_remove? * * Retrieve the PCI bus according to the given PE. Basically, * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the @@ -847,7 +866,7 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe) * returned for BUS PE. However, we don't have associated PCI * bus for DEVICE PE. */ -struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) +static struct pci_bus *_eeh_pe_bus_get(struct eeh_pe *pe, bool do_lock) { struct eeh_dev *edev; struct pci_dev *pdev; @@ -862,11 +881,58 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) /* Retrieve the parent PCI bus of first (top) PCI device */ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); - pci_lock_rescan_remove(); + if (do_lock) + pci_lock_rescan_remove(); pdev = eeh_dev_to_pci_dev(edev); if (pdev) bus = pdev->bus; - pci_unlock_rescan_remove(); + if (do_lock) + pci_unlock_rescan_remove(); return bus; } + +/** + * eeh_pe_bus_get - Retrieve PCI bus associated with the given EEH PE, locking + * if needed + * @pe: Pointer to the EEH PE + * + * This function is a wrapper around _eeh_pe_bus_get(), which retrieves the PCI + * bus associated with the provided EEH PE structure. It acquires the PCI + * rescans lock to ensure safe access to shared data during the retrieval + * process. This function should be used when the caller requires the PCI bus + * while holding the rescan/remove lock, typically during operations that modify + * or inspect PCIe device state in a safe manner. + * + * RETURNS: + * A pointer to the PCI bus associated with the EEH PE, or NULL if none found. + */ + +struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) +{ + return _eeh_pe_bus_get(pe, true); +} + +/** + * eeh_pe_bus_get_nolock - Retrieve PCI bus associated with the given EEH PE + * without locking + * @pe: Pointer to the EEH PE + * + * This function is a variant of _eeh_pe_bus_get() that retrieves the PCI bus + * associated with the specified EEH PE without acquiring the + * pci_lock_rescan_remove lock. It should only be used when the caller can + * guarantee safe access to PE structures without the need for that lock, + * typically in contexts where the lock is already held locking is otherwise + * managed. + * + * RETURNS: + * pointer to the PCI bus associated with the EEH PE, or NULL if none is found. + * + * NOTE: + * Use this function carefully to avoid race conditions and data corruption. + */ + +struct pci_bus *eeh_pe_bus_get_nolock(struct eeh_pe *pe) +{ + return _eeh_pe_bus_get(pe, false); +} diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index cb2bca76be53..c1779455ea32 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -420,7 +420,7 @@ InstructionTLBMiss: lwz r2,0(r2) /* get pmd entry */ #ifdef CONFIG_EXECMEM rlwinm r3, r0, 4, 0xf - subi r3, r3, (TASK_SIZE >> 28) & 0xf + subi r3, r3, NUM_USER_SEGMENTS #endif rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ @@ -475,7 +475,7 @@ DataLoadTLBMiss: lwz r2,0(r1) /* get pmd entry */ rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - subi r3, r3, (TASK_SIZE >> 28) & 0xf + subi r3, r3, NUM_USER_SEGMENTS beq- 2f /* bail if no mapping */ 1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ @@ -554,7 +554,7 @@ DataStoreTLBMiss: lwz r2,0(r1) /* get pmd entry */ rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - subi r3, r3, (TASK_SIZE >> 28) & 0xf + subi r3, r3, NUM_USER_SEGMENTS beq- 2f /* bail if no mapping */ 1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index ec900bce0257..4111b21962eb 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/of.h> #include <asm/secvar.h> +#include <asm/plpks.h> #define NAME_MAX_SIZE 1024 @@ -145,19 +146,6 @@ static __init int update_kobj_size(void) return 0; } -static __init int secvar_sysfs_config(struct kobject *kobj) -{ - struct attribute_group config_group = { - .name = "config", - .attrs = (struct attribute **)secvar_ops->config_attrs, - }; - - if (secvar_ops->config_attrs) - return sysfs_create_group(kobj, &config_group); - - return 0; -} - static __init int add_var(const char *name) { struct kobject *kobj; @@ -260,12 +248,15 @@ static __init int secvar_sysfs_init(void) goto err; } - rc = secvar_sysfs_config(secvar_kobj); + rc = plpks_config_create_softlink(secvar_kobj); if (rc) { - pr_err("Failed to create config directory\n"); + pr_err("Failed to create softlink to PLPKS config directory"); goto err; } + pr_info("/sys/firmware/secvar/config is now deprecated.\n"); + pr_info("Will be removed in future versions.\n"); + if (secvar_ops->get_next) rc = secvar_sysfs_load(); else diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 292fee8809bc..cad3358fa4c3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -822,6 +822,8 @@ static int parse_thread_groups(struct device_node *dn, count = of_property_count_u32_elems(dn, "ibm,thread-groups"); thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); + if (!thread_group_array) + return -ENOMEM; ret = of_property_read_u32_array(dn, "ibm,thread-groups", thread_group_array, count); if (ret) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index c42ecdf94e48..07660e8badbd 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -223,9 +223,7 @@ int mmu_mark_initmem_nx(void) update_bats(); - BUILD_BUG_ON(ALIGN_DOWN(MODULES_VADDR, SZ_256M) < TASK_SIZE); - - for (i = TASK_SIZE >> 28; i < 16; i++) { + for (i = ALIGN(TASK_SIZE, SZ_256M) >> 28; i < 16; i++) { /* Do not set NX on VM space for modules */ if (is_module_segment(i << 28)) continue; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3ddbfdbfa941..bc0f1a9eb0bc 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -401,8 +401,6 @@ struct execmem_info __init *execmem_arch_setup(void) #ifdef MODULES_VADDR unsigned long limit = (unsigned long)_etext - SZ_32M; - BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - /* First try within 32M limit from _etext to avoid branch trampolines */ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) { start = limit; diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index ab1505cf42bf..a9d3f4729ead 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -209,8 +209,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, /* 8xx can only access 32MB at the moment */ memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); - - BUILD_BUG_ON(ALIGN_DOWN(MODULES_VADDR, PGDIR_SIZE) < TASK_SIZE); } int pud_clear_huge(pud_t *pud) diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c index 9df3af8d481f..c06704b18a2c 100644 --- a/arch/powerpc/mm/ptdump/segment_regs.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c @@ -31,7 +31,7 @@ static int sr_show(struct seq_file *m, void *v) int i; seq_puts(m, "---[ User Segments ]---\n"); - for (i = 0; i < TASK_SIZE >> 28; i++) + for (i = 0; i < ALIGN(TASK_SIZE, SZ_256M) >> 28; i++) seg_show(m, i); seq_puts(m, "\n---[ Kernel Segments ]---\n"); diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 8334cd667bba..82bbf63f0e57 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -24,6 +24,7 @@ #define SZL sizeof(unsigned long) #define BPF_INSN_SAFETY 64 +#define BPF_PPC_TAILCALL 8 #define PLANT_INSTR(d, idx, instr) \ do { if (d) { (d)[idx] = instr; } idx++; } while (0) @@ -51,6 +52,13 @@ EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ } while (0) +/* When constant jump offset is known prior */ +#define PPC_BCC_CONST_SHORT(cond, offset) \ + do { \ + BUILD_BUG_ON(offset < -0x8000 || offset > 0x7fff || (offset & 0x3)); \ + EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ + } while (0) + /* * Sign-extended 32-bit immediate load * @@ -72,6 +80,10 @@ } } while (0) #ifdef CONFIG_PPC64 + +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (6 * 8) + /* If dummy pass (!image), account for maximum possible instructions */ #define PPC_LI64(d, i) do { \ if (!image) \ @@ -166,6 +178,9 @@ struct codegen_context { unsigned int alt_exit_addr; u64 arena_vm_start; u64 user_vm_start; + bool is_subprog; + bool exception_boundary; + bool exception_cb; }; #define bpf_to_ppc(r) (ctx->b2p[r]) @@ -205,6 +220,7 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass struct codegen_context *ctx, int insn_idx, int jmp_off, int dst_reg, u32 code); +int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx); #endif #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 5e976730b2f5..987cd9fb0f37 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -206,6 +206,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena); cgctx.user_vm_start = bpf_arena_get_user_vm_start(fp->aux->arena); + cgctx.is_subprog = bpf_is_subprog(fp); + cgctx.exception_boundary = fp->aux->exception_boundary; + cgctx.exception_cb = fp->aux->exception_cb; /* Scouting faux-generate pass 0 */ if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { @@ -435,6 +438,16 @@ void bpf_jit_free(struct bpf_prog *fp) bpf_prog_unlock_free(fp); } +bool bpf_jit_supports_exceptions(void) +{ + return IS_ENABLED(CONFIG_PPC64); +} + +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return IS_ENABLED(CONFIG_PPC64); +} + bool bpf_jit_supports_kfunc_call(void) { return true; @@ -466,6 +479,23 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) return true; } +bool bpf_jit_supports_percpu_insn(void) +{ + return IS_ENABLED(CONFIG_PPC64); +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + case BPF_FUNC_get_current_task: + case BPF_FUNC_get_current_task_btf: + return true; + default: + return false; + } +} + void *arch_alloc_bpf_trampoline(unsigned int size) { return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns); @@ -600,15 +630,50 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context return 0; } -static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx, - int func_frame_offset, int r4_off) +/* + * Refer __arch_prepare_bpf_trampoline() for stack component details. + * + * The tailcall count/reference is present in caller's stack frame. The + * tail_call_info is saved at the same offset on the trampoline frame + * for the traced function (BPF subprog/callee) to fetch it. + */ +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx, + int func_frame_offset, + int bpf_dummy_frame_size, int r4_off) { if (IS_ENABLED(CONFIG_PPC64)) { - /* See bpf_jit_stack_tailcallcnt() */ - int tailcallcnt_offset = 7 * 8; + /* See Generated stack layout */ + int tailcallinfo_offset = BPF_PPC_TAILCALL; - EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); - EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset)); + /* + * func_frame_offset = ...(1) + * bpf_dummy_frame_size + trampoline_frame_size + */ + EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset)); + EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset)); + + /* + * Setting the tail_call_info in trampoline's frame + * depending on if previous frame had value or reference. + */ + EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT)); + PPC_BCC_CONST_SHORT(COND_GT, 8); + EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx))); + /* + * From ...(1) above: + * trampoline_frame_bottom = ...(2) + * func_frame_offset - bpf_dummy_frame_size + * + * Using ...(2) derived above: + * trampoline_tail_call_info_offset = ...(3) + * trampoline_frame_bottom - tailcallinfo_offset + * + * From ...(3): + * Use trampoline_tail_call_info_offset to write reference of main's + * tail_call_info in trampoline frame. + */ + EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size) + - tailcallinfo_offset)); } else { /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); @@ -618,14 +683,11 @@ static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_contex static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, int func_frame_offset, int r4_off) { - if (IS_ENABLED(CONFIG_PPC64)) { - /* See bpf_jit_stack_tailcallcnt() */ - int tailcallcnt_offset = 7 * 8; - - EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset)); - EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); - } else { - /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ + if (IS_ENABLED(CONFIG_PPC32)) { + /* + * Restore tailcall for 32-bit powerpc + * See bpf_jit_stack_offsetof() and BPF_PPC_TC + */ EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); } } @@ -714,6 +776,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * LR save area [ r0 save (64-bit) ] | header * [ r0 save (32-bit) ] | * dummy frame for unwind [ back chain 1 ] -- + * [ tail_call_info ] optional - 64-bit powerpc * [ padding ] align stack frame * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc * alt_lr_off [ real lr (ool stub)] optional - actual lr @@ -795,6 +858,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } } + /* + * Save tailcall count pointer at the same offset on the + * stack where subprogs expect it + */ + if ((flags & BPF_TRAMP_F_CALL_ORIG) && + (flags & BPF_TRAMP_F_TAIL_CALL_CTX)) + bpf_frame_size += BPF_PPC_TAILCALL; + /* Padding to align stack frame, if any */ bpf_frame_size = round_up(bpf_frame_size, SZL * 2); @@ -896,7 +967,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* Replicate tail_call_cnt before calling the original BPF prog */ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) - bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + bpf_trampoline_setup_tail_call_info(image, ctx, func_frame_offset, + bpf_dummy_frame_size, r4_off); /* Restore args */ bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 1fe37128c876..b1a3945ccc9f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -20,26 +20,48 @@ #include "bpf_jit.h" /* - * Stack layout: + * Stack layout with frame: + * Layout when setting up our own stack frame. + * Note: r1 at bottom, component offsets positive wrt r1. * Ensure the top half (upto local_tmp_var) stays consistent * with our redzone usage. * + * tail_call_info - stores tailcall count value in main program's + * frame, stores reference to tail_call_info of + * main's frame in sub-prog's frame. + * * [ prev sp ] <------------- - * [ nv gpr save area ] 6*8 | - * [ tail_call_cnt ] 8 | + * [ tail_call_info ] 8 | + * [ nv gpr save area ] 6*8 + (12*8) | * [ local_tmp_var ] 24 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- + * + * Additional (12*8) in 'nv gpr save area' only in case of + * exception boundary. */ -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6*8) /* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 32 +#define BPF_PPC_STACK_LOCALS 24 +/* + * for additional non volatile registers(r14-r25) to be saved + * at exception boundary + */ +#define BPF_PPC_EXC_STACK_SAVE (12*8) + /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ - BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) + BPF_PPC_STACK_LOCALS + \ + BPF_PPC_STACK_SAVE + \ + BPF_PPC_TAILCALL) + +/* + * same as BPF_PPC_STACKFRAME with save area for additional + * non volatile registers saved at exception boundary. + * This is quad-word aligned. + */ +#define BPF_PPC_EXC_STACKFRAME (BPF_PPC_STACKFRAME + BPF_PPC_EXC_STACK_SAVE) /* BPF register usage */ #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) @@ -82,40 +104,71 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - we call other functions (kernel helpers), or * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP + * + * bpf_throw() leads to exception callback from a BPF (sub)program. + * The (sub)program is always marked as SEEN_FUNC, creating a stack + * frame. The exception callback uses the frame of the exception + * boundary, so the exception boundary program must have a frame. */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)); + return ctx->seen & SEEN_FUNC || + bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)) || + ctx->exception_cb || + ctx->exception_boundary; } /* + * Stack layout with redzone: * When not setting up our own stackframe, the redzone (288 bytes) usage is: + * Note: r1 from prev frame. Component offset negative wrt r1. * * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 6*8 - * [ tail_call_cnt ] 8 + * [ tail_call_info ] 8 + * [ nv gpr save area ] 6*8 + (12*8) * [ local_tmp_var ] 24 * [ unused red zone ] 224 + * + * Additional (12*8) in 'nv gpr save area' only in case of + * exception boundary. */ static int bpf_jit_stack_local(struct codegen_context *ctx) { - if (bpf_has_stack_frame(ctx)) + if (bpf_has_stack_frame(ctx)) { + /* Stack layout with frame */ return STACK_FRAME_MIN_SIZE + ctx->stack_size; - else - return -(BPF_PPC_STACK_SAVE + 32); + } else { + /* Stack layout with redzone */ + return -(BPF_PPC_TAILCALL + +BPF_PPC_STACK_SAVE + +(ctx->exception_boundary || ctx->exception_cb ? + BPF_PPC_EXC_STACK_SAVE : 0) + +BPF_PPC_STACK_LOCALS + ); + } } -static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) +int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx) { - return bpf_jit_stack_local(ctx) + 24; + return bpf_jit_stack_local(ctx) + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE; } static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) { - if (reg >= BPF_PPC_NVR_MIN && reg < 32) + int min_valid_nvreg = BPF_PPC_NVR_MIN; + /* Default frame size for all cases except exception boundary */ + int frame_nvr_size = BPF_PPC_STACKFRAME; + + /* Consider all nv regs for handling exceptions */ + if (ctx->exception_boundary || ctx->exception_cb) { + min_valid_nvreg = _R14; + frame_nvr_size = BPF_PPC_EXC_STACKFRAME; + } + + if (reg >= min_valid_nvreg && reg < 32) return (bpf_has_stack_frame(ctx) ? - (BPF_PPC_STACKFRAME + ctx->stack_size) : 0) - - (8 * (32 - reg)); + (frame_nvr_size + ctx->stack_size) : 0) + - (8 * (32 - reg)) - BPF_PPC_TAILCALL; pr_err("BPF JIT is asking about unknown registers"); BUG(); @@ -125,6 +178,17 @@ void bpf_jit_realloc_regs(struct codegen_context *ctx) { } +/* + * For exception boundary & exception_cb progs: + * return increased size to accommodate additional NVRs. + */ +static int bpf_jit_stack_size(struct codegen_context *ctx) +{ + return ctx->exception_boundary || ctx->exception_cb ? + BPF_PPC_EXC_STACKFRAME : + BPF_PPC_STACKFRAME; +} + void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -138,21 +202,45 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) #endif /* - * Initialize tail_call_cnt if we do tail calls. - * Otherwise, put in NOPs so that it can be skipped when we are - * invoked through a tail call. + * Tail call count(tcc) is saved & updated only in main + * program's frame and the address of tcc in main program's + * frame (tcc_ptr) is saved in subprogs frame. + * + * Offset of tail_call_info on any frame will be interpreted + * as either tcc_ptr or tcc value depending on whether it is + * greater than MAX_TAIL_CALL_CNT or not. */ - if (ctx->seen & SEEN_TAILCALL) { + if (!ctx->is_subprog) { EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0)); /* this goes in the redzone */ - EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8))); - } else { - EMIT(PPC_RAW_NOP()); - EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_TAILCALL))); + } else if (!ctx->exception_cb) { + /* + * Tailcall jitting for non exception_cb progs only. + * exception_cb won't require tail_call_info to be setup. + * + * tail_call_info interpretation logic: + * + * if tail_call_info < MAX_TAIL_CALL_CNT + * main prog calling first subprog -> copy reference + * else + * subsequent subprog calling another subprog -> directly copy content + */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, 0)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), -(BPF_PPC_TAILCALL))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); + PPC_BCC_CONST_SHORT(COND_GT, 8); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), + -(BPF_PPC_TAILCALL))); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_TAILCALL))); } - if (bpf_has_stack_frame(ctx)) { + if (bpf_has_stack_frame(ctx) && !ctx->exception_cb) { /* + * exception_cb uses boundary frame after stack walk. + * It can simply use redzone, this optimization reduces + * stack walk loop by one level. + * * We need a stack frame, but we don't necessarily need to * save/restore LR unless we call other functions */ @@ -161,26 +249,50 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)); } - EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); + EMIT(PPC_RAW_STDU(_R1, _R1, + -(bpf_jit_stack_size(ctx) + ctx->stack_size))); } /* - * Back up non-volatile regs -- BPF registers 6-10 - * If we haven't created our own stack frame, we save these - * in the protected zone below the previous stack frame + * Program acting as exception boundary pushes R14..R25 in addition to + * BPF callee-saved non volatile registers. Exception callback uses + * the boundary program's stack frame, recover additionally saved + * registers in epilogue of exception callback. */ - for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) - EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); + if (ctx->exception_boundary) { + for (i = _R14; i <= _R25; i++) + EMIT(PPC_RAW_STD(i, _R1, bpf_jit_stack_offsetof(ctx, i))); + } - if (ctx->arena_vm_start) - EMIT(PPC_RAW_STD(bpf_to_ppc(ARENA_VM_START), _R1, + if (!ctx->exception_cb) { + /* + * Back up non-volatile regs -- BPF registers 6-10 + * If we haven't created our own stack frame, we save these + * in the protected zone below the previous stack frame + */ + for (i = BPF_REG_6; i <= BPF_REG_10; i++) + if (ctx->exception_boundary || bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, + bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); + + if (ctx->exception_boundary || ctx->arena_vm_start) + EMIT(PPC_RAW_STD(bpf_to_ppc(ARENA_VM_START), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START)))); + } else { + /* + * Exception callback receives Frame Pointer of boundary + * program(main prog) as third arg + */ + EMIT(PPC_RAW_MR(_R1, _R5)); + } - /* Setup frame pointer to point to the bpf stack area */ + /* + * Exception_cb not restricted from using stack area or arena. + * Setup frame pointer to point to the bpf stack area + */ if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, - STACK_FRAME_MIN_SIZE + ctx->stack_size)); + STACK_FRAME_MIN_SIZE + ctx->stack_size)); if (ctx->arena_vm_start) PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start); @@ -192,17 +304,27 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + if (ctx->exception_cb || bpf_is_seen_register(ctx, bpf_to_ppc(i))) EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); - if (ctx->arena_vm_start) + if (ctx->exception_cb || ctx->arena_vm_start) EMIT(PPC_RAW_LD(bpf_to_ppc(ARENA_VM_START), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START)))); + if (ctx->exception_cb) { + /* + * Recover additionally saved non volatile registers from stack + * frame of exception boundary program. + */ + for (i = _R14; i <= _R25; i++) + EMIT(PPC_RAW_LD(i, _R1, bpf_jit_stack_offsetof(ctx, i))); + } + /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size)); - if (ctx->seen & SEEN_FUNC) { + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_jit_stack_size(ctx) + ctx->stack_size)); + + if (ctx->seen & SEEN_FUNC || ctx->exception_cb) { EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF)); EMIT(PPC_RAW_MTLR(_R0)); } @@ -221,6 +343,47 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_build_fentry_stubs(image, ctx); } +/* + * arch_bpf_stack_walk() - BPF stack walker for PowerPC + * + * Based on arch_stack_walk() from stacktrace.c. + * PowerPC uses stack frames rather than stack pointers. See [1] for + * the equivalence between frame pointers and stack pointers. + * Additional reference at [2]. + * TODO: refactor with arch_stack_walk() + * + * [1]: https://lore.kernel.org/all/20200220115141.2707-1-mpe@ellerman.id.au/ + * [2]: https://lore.kernel.org/bpf/20260122211854.5508-5-adubey@linux.ibm.com/ + */ + +void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64), void *cookie) +{ + // callback processing always in current context + unsigned long sp = current_stack_frame(); + + for (;;) { + unsigned long *stack = (unsigned long *) sp; + unsigned long ip; + + if (!validate_sp(sp, current)) + return; + + ip = stack[STACK_FRAME_LR_SAVE]; + if (!ip) + break; + + /* + * consume_fn common code expects stack pointer in third + * argument. There is no sp in ppc64, rather pass frame + * pointer(named sp here). + */ + if (ip && !consume_fn(cookie, ip, sp, sp)) + break; + + sp = stack[0]; + } +} + int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) { unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; @@ -343,19 +506,38 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1))); PPC_BCC_SHORT(COND_GE, out); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallinfo_offset(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); + PPC_BCC_CONST_SHORT(COND_LE, 8); + + /* dereference TMP_REG_1 */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 0)); + /* - * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) + * if (tail_call_info == MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); - PPC_BCC_SHORT(COND_GE, out); + PPC_BCC_SHORT(COND_EQ, out); /* - * tail_call_cnt++; + * tail_call_info++; <- Actual value of tcc here */ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); - EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); + + /* + * Before writing updated tail_call_info, distinguish if current frame + * is storing a reference to tail_call_info or actual tcc value in + * tail_call_info. + */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, bpf_jit_stack_tailcallinfo_offset(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_2), MAX_TAIL_CALL_CNT)); + PPC_BCC_CONST_SHORT(COND_GT, 8); + + /* First get address of tail_call_info */ + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_2), _R1, bpf_jit_stack_tailcallinfo_offset(ctx))); + /* Writeback updated value to tail_call_info */ + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0)); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); @@ -918,6 +1100,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + if (insn_is_mov_percpu_addr(&insn[i])) { + if (IS_ENABLED(CONFIG_SMP)) { + EMIT(PPC_RAW_LD(tmp1_reg, _R13, offsetof(struct paca_struct, data_offset))); + EMIT(PPC_RAW_ADD(dst_reg, src_reg, tmp1_reg)); + } else if (src_reg != dst_reg) { + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + } + break; + } + if (insn_is_cast_user(&insn[i])) { EMIT(PPC_RAW_RLDICL_DOT(tmp1_reg, src_reg, 0, 32)); PPC_LI64(dst_reg, (ctx->user_vm_start & 0xffffffff00000000UL)); @@ -1390,6 +1582,17 @@ emit_clear: case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; + if (src_reg == bpf_to_ppc(BPF_REG_0)) { + if (imm == BPF_FUNC_get_smp_processor_id) { + EMIT(PPC_RAW_LHZ(src_reg, _R13, offsetof(struct paca_struct, paca_index))); + break; + } else if (imm == BPF_FUNC_get_current_task || + imm == BPF_FUNC_get_current_task_btf) { + EMIT(PPC_RAW_LD(src_reg, _R13, offsetof(struct paca_struct, __current))); + break; + } + } + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 931ebaa474c8..3ced289a675b 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -30,7 +30,7 @@ obj-$(CONFIG_PAPR_SCM) += papr_scm.o obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o -obj-$(CONFIG_PSERIES_PLPKS) += plpks.o +obj-$(CONFIG_PSERIES_PLPKS) += plpks.o plpks-sysfs.o obj-$(CONFIG_PPC_SECURE_BOOT) += plpks-secvar.o obj-$(CONFIG_PSERIES_PLPKS_SED) += plpks_sed_ops.o obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index eec333dd2e59..5497b130e026 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1769,10 +1769,8 @@ out_failed: out_unlock: mutex_unlock(&dma_win_init_mutex); - /* If we have persistent memory and the window size is not big enough - * to directly map both RAM and vPMEM, then we need to set DMA limit. - */ - if (pmem_present && direct_mapping && len != MAX_PHYSMEM_BITS) + /* For pre-mapped memory, set bus_dma_limit to the max RAM */ + if (direct_mapping) dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << max_ram_len); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 7473c7ca1db0..4cd70a8201f1 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -19,6 +19,11 @@ #include "pseries.h" +struct pseries_msi_device { + unsigned int msi_quota; + unsigned int msi_used; +}; + static int query_token, change_token; #define RTAS_QUERY_FN 0 @@ -433,8 +438,28 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev struct msi_domain_info *info = domain->host_data; struct pci_dev *pdev = to_pci_dev(dev); int type = (info->flags & MSI_FLAG_PCI_MSIX) ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI; + int ret; + + struct pseries_msi_device *pseries_dev __free(kfree) + = kmalloc(sizeof(*pseries_dev), GFP_KERNEL); + if (!pseries_dev) + return -ENOMEM; + + while (1) { + ret = rtas_prepare_msi_irqs(pdev, nvec, type, arg); + if (!ret) + break; + else if (ret > 0) + nvec = ret; + else + return ret; + } - return rtas_prepare_msi_irqs(pdev, nvec, type, arg); + pseries_dev->msi_quota = nvec; + pseries_dev->msi_used = 0; + + arg->scratchpad[0].ptr = no_free_ptr(pseries_dev); + return 0; } /* @@ -443,9 +468,13 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev */ static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) { + struct pseries_msi_device *pseries_dev = arg->scratchpad[0].ptr; struct pci_dev *pdev = to_pci_dev(domain->dev); rtas_disable_msi(pdev); + + WARN_ON(pseries_dev->msi_used); + kfree(pseries_dev); } static void pseries_msi_shutdown(struct irq_data *d) @@ -546,12 +575,18 @@ static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq unsigned int nr_irqs, void *arg) { struct pci_controller *phb = domain->host_data; + struct pseries_msi_device *pseries_dev; msi_alloc_info_t *info = arg; struct msi_desc *desc = info->desc; struct pci_dev *pdev = msi_desc_to_pci_dev(desc); int hwirq; int i, ret; + pseries_dev = info->scratchpad[0].ptr; + + if (pseries_dev->msi_used + nr_irqs > pseries_dev->msi_quota) + return -ENOSPC; + hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index); if (hwirq < 0) { dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq); @@ -567,9 +602,10 @@ static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq goto out; irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, - &pseries_msi_irq_chip, domain->host_data); + &pseries_msi_irq_chip, pseries_dev); } + pseries_dev->msi_used++; return 0; out: @@ -582,9 +618,11 @@ static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq unsigned int nr_irqs) { struct irq_data *d = irq_domain_get_irq_data(domain, virq); - struct pci_controller *phb = irq_data_get_irq_chip_data(d); + struct pseries_msi_device *pseries_dev = irq_data_get_irq_chip_data(d); + struct pci_controller *phb = domain->host_data; pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs); + pseries_dev->msi_used -= nr_irqs; irq_domain_free_irqs_parent(domain, virq, nr_irqs); } diff --git a/arch/powerpc/platforms/pseries/plpks-secvar.c b/arch/powerpc/platforms/pseries/plpks-secvar.c index f9e9cc40c9d0..a50ff6943d80 100644 --- a/arch/powerpc/platforms/pseries/plpks-secvar.c +++ b/arch/powerpc/platforms/pseries/plpks-secvar.c @@ -20,33 +20,6 @@ #include <asm/secvar.h> #include <asm/plpks.h> -// Config attributes for sysfs -#define PLPKS_CONFIG_ATTR(name, fmt, func) \ - static ssize_t name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *buf) \ - { \ - return sysfs_emit(buf, fmt, func()); \ - } \ - static struct kobj_attribute attr_##name = __ATTR_RO(name) - -PLPKS_CONFIG_ATTR(version, "%u\n", plpks_get_version); -PLPKS_CONFIG_ATTR(max_object_size, "%u\n", plpks_get_maxobjectsize); -PLPKS_CONFIG_ATTR(total_size, "%u\n", plpks_get_totalsize); -PLPKS_CONFIG_ATTR(used_space, "%u\n", plpks_get_usedspace); -PLPKS_CONFIG_ATTR(supported_policies, "%08x\n", plpks_get_supportedpolicies); -PLPKS_CONFIG_ATTR(signed_update_algorithms, "%016llx\n", plpks_get_signedupdatealgorithms); - -static const struct attribute *config_attrs[] = { - &attr_version.attr, - &attr_max_object_size.attr, - &attr_total_size.attr, - &attr_used_space.attr, - &attr_supported_policies.attr, - &attr_signed_update_algorithms.attr, - NULL, -}; - static u32 get_policy(const char *name) { if ((strcmp(name, "db") == 0) || @@ -225,7 +198,6 @@ static const struct secvar_operations plpks_secvar_ops_static = { .set = plpks_set_variable, .format = plpks_secvar_format, .max_size = plpks_max_size, - .config_attrs = config_attrs, .var_names = plpks_var_names_static, }; @@ -234,7 +206,6 @@ static const struct secvar_operations plpks_secvar_ops_dynamic = { .set = plpks_set_variable, .format = plpks_secvar_format, .max_size = plpks_max_size, - .config_attrs = config_attrs, .var_names = plpks_var_names_dynamic, }; diff --git a/arch/powerpc/platforms/pseries/plpks-sysfs.c b/arch/powerpc/platforms/pseries/plpks-sysfs.c new file mode 100644 index 000000000000..c2ebcbb41ae3 --- /dev/null +++ b/arch/powerpc/platforms/pseries/plpks-sysfs.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 IBM Corporation, Srish Srinivasan <ssrish@linux.ibm.com> + * + * This code exposes PLPKS config to user via sysfs + */ + +#define pr_fmt(fmt) "plpks-sysfs: "fmt + +#include <linux/init.h> +#include <linux/printk.h> +#include <linux/types.h> +#include <asm/machdep.h> +#include <asm/plpks.h> + +/* config attributes for sysfs */ +#define PLPKS_CONFIG_ATTR(name, fmt, func) \ + static ssize_t name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ + { \ + return sysfs_emit(buf, fmt, func()); \ + } \ + static struct kobj_attribute attr_##name = __ATTR_RO(name) + +PLPKS_CONFIG_ATTR(version, "%u\n", plpks_get_version); +PLPKS_CONFIG_ATTR(max_object_size, "%u\n", plpks_get_maxobjectsize); +PLPKS_CONFIG_ATTR(total_size, "%u\n", plpks_get_totalsize); +PLPKS_CONFIG_ATTR(used_space, "%u\n", plpks_get_usedspace); +PLPKS_CONFIG_ATTR(supported_policies, "%08x\n", plpks_get_supportedpolicies); +PLPKS_CONFIG_ATTR(signed_update_algorithms, "%016llx\n", + plpks_get_signedupdatealgorithms); +PLPKS_CONFIG_ATTR(wrapping_features, "%016llx\n", plpks_get_wrappingfeatures); + +static const struct attribute *config_attrs[] = { + &attr_version.attr, + &attr_max_object_size.attr, + &attr_total_size.attr, + &attr_used_space.attr, + &attr_supported_policies.attr, + &attr_signed_update_algorithms.attr, + &attr_wrapping_features.attr, + NULL, +}; + +static struct kobject *plpks_kobj, *plpks_config_kobj; + +int plpks_config_create_softlink(struct kobject *from) +{ + if (!plpks_config_kobj) + return -EINVAL; + return sysfs_create_link(from, plpks_config_kobj, "config"); +} + +static __init int plpks_sysfs_config(struct kobject *kobj) +{ + struct attribute_group config_group = { + .name = NULL, + .attrs = (struct attribute **)config_attrs, + }; + + return sysfs_create_group(kobj, &config_group); +} + +static __init int plpks_sysfs_init(void) +{ + int rc; + + if (!plpks_is_available()) + return -ENODEV; + + plpks_kobj = kobject_create_and_add("plpks", firmware_kobj); + if (!plpks_kobj) { + pr_err("Failed to create plpks kobj\n"); + return -ENOMEM; + } + + plpks_config_kobj = kobject_create_and_add("config", plpks_kobj); + if (!plpks_config_kobj) { + pr_err("Failed to create plpks config kobj\n"); + kobject_put(plpks_kobj); + return -ENOMEM; + } + + rc = plpks_sysfs_config(plpks_config_kobj); + if (rc) { + pr_err("Failed to create attribute group for plpks config\n"); + kobject_put(plpks_config_kobj); + kobject_put(plpks_kobj); + return rc; + } + + return 0; +} + +machine_subsys_initcall(pseries, plpks_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index b1667ed05f98..23e4e2a922fc 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -9,6 +9,32 @@ #define pr_fmt(fmt) "plpks: " fmt +#define PLPKS_WRAPKEY_COMPONENT "PLPKSWR" +#define PLPKS_WRAPKEY_NAME "default-wrapping-key" + +/* + * To 4K align the {input, output} buffers to the {UN}WRAP H_CALLs + */ +#define PLPKS_WRAPPING_BUF_ALIGN 4096 + +/* + * To ensure the output buffer's length is at least 1024 bytes greater + * than the input buffer's length during the WRAP H_CALL + */ +#define PLPKS_WRAPPING_BUF_DIFF 1024 + +#define PLPKS_WRAP_INTERFACE_BIT 3 +#define PLPKS_WRAPPING_KEY_LENGTH 32 + +#define WRAPFLAG_BE_BIT_SET(be_bit) \ + BIT_ULL(63 - (be_bit)) + +#define WRAPFLAG_BE_GENMASK(be_bit_hi, be_bit_lo) \ + GENMASK_ULL(63 - (be_bit_hi), 63 - (be_bit_lo)) + +#define WRAPFLAG_BE_FIELD_PREP(be_bit_hi, be_bit_lo, val) \ + FIELD_PREP(WRAPFLAG_BE_GENMASK(be_bit_hi, be_bit_lo), (val)) + #include <linux/delay.h> #include <linux/errno.h> #include <linux/io.h> @@ -19,6 +45,7 @@ #include <linux/of_fdt.h> #include <linux/libfdt.h> #include <linux/memblock.h> +#include <linux/bitfield.h> #include <asm/hvcall.h> #include <asm/machdep.h> #include <asm/plpks.h> @@ -38,6 +65,8 @@ static u32 usedspace; static u32 supportedpolicies; static u32 maxlargeobjectsize; static u64 signedupdatealgorithms; +static u64 wrappingfeatures; +static bool wrapsupport; struct plpks_auth { u8 version; @@ -248,6 +277,7 @@ static int _plpks_get_config(void) __be32 supportedpolicies; __be32 maxlargeobjectsize; __be64 signedupdatealgorithms; + __be64 wrappingfeatures; u8 rsvd1[476]; } __packed * config; size_t size; @@ -280,6 +310,8 @@ static int _plpks_get_config(void) supportedpolicies = be32_to_cpu(config->supportedpolicies); maxlargeobjectsize = be32_to_cpu(config->maxlargeobjectsize); signedupdatealgorithms = be64_to_cpu(config->signedupdatealgorithms); + wrappingfeatures = be64_to_cpu(config->wrappingfeatures); + wrapsupport = config->flags & PPC_BIT8(PLPKS_WRAP_INTERFACE_BIT); // Validate that the numbers we get back match the requirements of the spec if (maxpwsize < 32) { @@ -312,40 +344,107 @@ err: return rc; } +/** + * plpks_get_version() - Get the version of the PLPKS config structure. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads the PLPKS config structure version and saves it in a file local static + * version variable. + * + * Returns: On success the saved PLPKS config structure version is returned, 0 + * if not. + */ u8 plpks_get_version(void) { return version; } +/** + * plpks_get_objoverhead() - Get the hypervisor storage overhead per object. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads the per object hypervisor storage overhead in bytes into the local + * static objoverhead variable, excluding the size of the object or the label. + * This value can be treated as valid only when the PLPKS config structure + * version >= 2. + * + * Returns: If PLPKS config structure version >= 2 then the storage overhead is + * returned, 0 otherwise. + */ u16 plpks_get_objoverhead(void) { return objoverhead; } +/** + * plpks_get_maxpwsize() - Get the maximum password size. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads the maximum password size and checks if it is 32 bytes at the least + * before storing it in the local static maxpwsize variable. + * + * Returns: On success the maximum password size is returned, 0 if not. + */ u16 plpks_get_maxpwsize(void) { return maxpwsize; } +/** + * plpks_get_maxobjectsize() - Get the maximum object size supported by the + * PLPKS. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads the maximum object size into the file local static maxobjsize variable. + * + * Returns: On success the maximum object size is returned, 0 if not. + */ u16 plpks_get_maxobjectsize(void) { return maxobjsize; } +/** + * plpks_get_maxobjectlabelsize() - Get the maximum object label size supported + * by the PLPKS. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads the maximum object label size into the local static maxobjlabelsize + * variable. + * + * Returns: On success the maximum object label size is returned, 0 if not. + */ u16 plpks_get_maxobjectlabelsize(void) { return maxobjlabelsize; } +/** + * plpks_get_totalsize() - Get the total size of the PLPKS that is configured. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads the total size of the PLPKS that is configured for the LPAR into the + * file local static totalsize variable. + * + * Returns: On success the total size of the PLPKS configured is returned, 0 if + * not. + */ u32 plpks_get_totalsize(void) { return totalsize; } +/** + * plpks_get_usedspace() - Get the used space from the total size of the PLPKS. + * + * Invoke the H_PKS_GET_CONFIG HCALL to refresh the latest value for the used + * space as this keeps changing with the creation and removal of objects in the + * PLPKS. + * + * Returns: On success the used space is returned, 0 if not. + */ u32 plpks_get_usedspace(void) { - // Unlike other config values, usedspace regularly changes as objects - // are updated, so we need to refresh. int rc = _plpks_get_config(); if (rc) { pr_err("Couldn't get config, rc: %d\n", rc); @@ -354,26 +453,101 @@ u32 plpks_get_usedspace(void) return usedspace; } +/** + * plpks_get_supportedpolicies() - Get a bitmask of the policies supported by + * the hypervisor. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads a bitmask of the policies supported by the hypervisor into the file + * local static supportedpolicies variable. + * + * Returns: On success the bitmask of the policies supported by the hypervisor + * are returned, 0 if not. + */ u32 plpks_get_supportedpolicies(void) { return supportedpolicies; } +/** + * plpks_get_maxlargeobjectsize() - Get the maximum object size supported for + * PLPKS config structure version >= 3 + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads the maximum object size into the local static maxlargeobjectsize + * variable for PLPKS config structure version >= 3. This was introduced + * starting with PLPKS config structure version 3 to allow for objects of + * size >= 64K. + * + * Returns: If PLPKS config structure version >= 3 then the new maximum object + * size is returned, 0 if not. + */ u32 plpks_get_maxlargeobjectsize(void) { return maxlargeobjectsize; } +/** + * plpks_get_signedupdatealgorithms() - Get a bitmask of the signature + * algorithms supported for signed updates. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads a bitmask of the signature algorithms supported for signed updates into + * the file local static signedupdatealgorithms variable. This is valid only + * when the PLPKS config structure version >= 3. + * + * Returns: On success the bitmask of the signature algorithms supported for + * signed updates is returned, 0 if not. + */ u64 plpks_get_signedupdatealgorithms(void) { return signedupdatealgorithms; } +/** + * plpks_get_wrappingfeatures() - Returns a bitmask of the wrapping features + * supported by the hypervisor. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * reads a bitmask of the wrapping features supported by the hypervisor into the + * file local static wrappingfeatures variable. This is valid only when the + * PLPKS config structure version >= 3. + * + * Return: + * bitmask of the wrapping features supported by the hypervisor + */ +u64 plpks_get_wrappingfeatures(void) +{ + return wrappingfeatures; +} + +/** + * plpks_get_passwordlen() - Get the length of the PLPKS password in bytes. + * + * The H_PKS_GEN_PASSWORD HCALL makes the hypervisor generate a random password + * for the specified consumer, apply that password to the PLPKS and return it to + * the caller. In this process, the password length for the OS consumer is + * stored in the local static ospasswordlength variable. + * + * Returns: On success the password length for the OS consumer in bytes is + * returned, 0 if not. + */ u16 plpks_get_passwordlen(void) { return ospasswordlength; } +/** + * plpks_is_available() - Get the PLPKS availability status for the LPAR. + * + * The availability of PLPKS is inferred based upon the successful execution of + * the H_PKS_GET_CONFIG HCALL provided the firmware supports this feature. The + * H_PKS_GET_CONFIG HCALL reads the configuration and status information related + * to the PLPKS. The configuration structure provides a version number to inform + * the caller of the supported features. + * + * Returns: true is returned if PLPKS is available, false if not. + */ bool plpks_is_available(void) { int rc; @@ -425,6 +599,35 @@ static int plpks_confirm_object_flushed(struct label *label, return pseries_status_to_err(rc); } +/** + * plpks_signed_update_var() - Update the specified authenticated variable. + * @var: authenticated variable to be updated + * @flags: signed update request operation flags + * + * The H_PKS_SIGNED_UPDATE HCALL performs a signed update to an object in the + * PLPKS. The object must have the signed update policy flag set. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if PLPKS modification is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid object label parameter + * if invalid object label len parameter + * if invalid or unsupported policy declaration + * if invalid signed update flags + * if invalid input data parameter + * if invalid input data len parameter + * if invalid continue token parameter + * -EPERM if access is denied + * -ENOMEM if there is inadequate memory to perform the operation + * -EBUSY if unable to handle the request or long running operation + * initiated, retry later + * + * Returns: On success 0 is returned, a negative errno if not. + */ int plpks_signed_update_var(struct plpks_var *var, u64 flags) { unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; @@ -440,6 +643,9 @@ int plpks_signed_update_var(struct plpks_var *var, u64 flags) if (!(var->policy & PLPKS_SIGNEDUPDATE)) return -EINVAL; + if (var->policy & PLPKS_WRAPPINGKEY) + return -EINVAL; + // Signed updates need the component to be NULL. if (var->component) return -EINVAL; @@ -481,6 +687,33 @@ out: return rc; } +/** + * plpks_write_var() - Write the specified variable and its data to PLPKS. + * @var: variable to be written into the PLPKS + * + * The H_PKS_WRITE_OBJECT HCALL writes an object into the PLPKS. The caller must + * provide a valid component type for the variable, and the signed update policy + * flag must not be set. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if PLPKS modification is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid object label parameter + * if invalid object label len parameter + * if invalid or unsupported policy declaration + * if invalid input data parameter + * if invalid input data len parameter + * -EPERM if access is denied + * -ENOMEM if unable to store the requested object in the space available + * -EBUSY if unable to handle the request + * -EEXIST if the object label already exists + * + * Returns: On success 0 is returned, a negative errno if not. + */ int plpks_write_var(struct plpks_var var) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; @@ -495,6 +728,9 @@ int plpks_write_var(struct plpks_var var) if (var.policy & PLPKS_SIGNEDUPDATE) return -EINVAL; + if (var.policy & PLPKS_WRAPPINGKEY) + return -EINVAL; + auth = construct_auth(PLPKS_OS_OWNER); if (IS_ERR(auth)) return PTR_ERR(auth); @@ -520,6 +756,30 @@ out: return rc; } +/** + * plpks_remove_var() - Remove the specified variable and its data from PLPKS. + * @component: metadata prefix in the object label metadata structure + * @varos: metadata OS flags in the object label metadata structure + * @vname: object label for the object that needs to be removed + * + * The H_PKS_REMOVE_OBJECT HCALL removes an object from the PLPKS. The removal + * is independent of the policy bits that are set. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if PLPKS modification is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid object label parameter + * if invalid object label len parameter + * -EPERM if access is denied + * -ENOENT if the requested object was not found + * -EBUSY if unable to handle the request + * + * Returns: On success 0 is returned, a negative errno if not. + */ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; @@ -565,6 +825,9 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) if (var->namelen > PLPKS_MAX_NAME_SIZE) return -EINVAL; + if (var->policy & PLPKS_WRAPPINGKEY) + return -EINVAL; + auth = construct_auth(consumer); if (IS_ERR(auth)) return PTR_ERR(auth); @@ -619,21 +882,421 @@ out_free_auth: return rc; } +/** + * plpks_wrapping_is_supported() - Get the H_PKS_WRAP_OBJECT interface + * availability status for the LPAR. + * + * Successful execution of the H_PKS_GET_CONFIG HCALL during initialization + * sets bit 3 of the flags variable in the PLPKS config structure if the + * H_PKS_WRAP_OBJECT interface is supported. + * + * Returns: true if the H_PKS_WRAP_OBJECT interface is supported, false if not. + */ +bool plpks_wrapping_is_supported(void) +{ + return wrapsupport; +} +EXPORT_SYMBOL_GPL(plpks_wrapping_is_supported); + +/** + * plpks_gen_wrapping_key() - Generate a new random key with the 'wrapping key' + * policy set. + * + * The H_PKS_GEN_KEY HCALL makes the hypervisor generate a new random key and + * store the key in a PLPKS object with the provided object label. With the + * 'wrapping key' policy set, only the label to the newly generated random key + * would be visible to the user. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if PLPKS modification is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid object label parameter + * if invalid object label len parameter + * if invalid or unsupported policy declaration + * if invalid output buffer parameter + * if invalid output buffer length parameter + * -EPERM if access is denied + * -ENOMEM if there is inadequate memory to perform this operation + * -EBUSY if unable to handle the request + * -EEXIST if the object label already exists + * + * Returns: On success 0 is returned, a negative errno if not. + */ +int plpks_gen_wrapping_key(void) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + int rc = 0, pseries_status = 0; + struct plpks_var var = { + .name = PLPKS_WRAPKEY_NAME, + .namelen = strlen(var.name), + .policy = PLPKS_WRAPPINGKEY, + .os = PLPKS_VAR_LINUX, + .component = PLPKS_WRAPKEY_COMPONENT + }; + + auth = construct_auth(PLPKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var.component, var.os, var.name, var.namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + rc = plpar_hcall(H_PKS_GEN_KEY, retbuf, + virt_to_phys(auth), virt_to_phys(label), + label->size, var.policy, + NULL, PLPKS_WRAPPING_KEY_LENGTH); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + pseries_status = rc; + rc = pseries_status_to_err(rc); + + if (rc && rc != -EEXIST) { + pr_err("H_PKS_GEN_KEY failed. pseries_status=%d, rc=%d", + pseries_status, rc); + } else { + rc = 0; + } + + kfree(label); +out: + kfree(auth); + return rc; +} +EXPORT_SYMBOL_GPL(plpks_gen_wrapping_key); + +/** + * plpks_wrap_object() - Wrap an object using the default wrapping key stored in + * the PLPKS. + * @input_buf: buffer containing the data to be wrapped + * @input_len: length of the input buffer + * @wrap_flags: object wrapping flags + * @output_buf: buffer to store the wrapped data + * @output_len: length of the output buffer + * + * The H_PKS_WRAP_OBJECT HCALL wraps an object using a wrapping key stored in + * the PLPKS and returns the wrapped object to the caller. The caller provides a + * label to the wrapping key with the 'wrapping key' policy set that must have + * been previously created with the H_PKS_GEN_KEY HCALL. The provided object is + * then encrypted with the wrapping key and additional metadata and the result + * is returned to the user. The metadata includes the wrapping algorithm and the + * wrapping key name so those parameters are not required during unwrap. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if PLPKS modification is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid wrapping key label parameter + * if invalid wrapping key label length parameter + * if invalid or unsupported object wrapping flags + * if invalid input buffer parameter + * if invalid input buffer length parameter + * if invalid output buffer parameter + * if invalid output buffer length parameter + * if invalid continue token parameter + * if the wrapping key is not compatible with the wrapping + * algorithm + * -EPERM if access is denied + * -ENOENT if the requested wrapping key was not found + * -EBUSY if unable to handle the request or long running operation + * initiated, retry later. + * + * Returns: On success 0 is returned, a negative errno if not. + */ +int plpks_wrap_object(u8 **input_buf, u32 input_len, u16 wrap_flags, + u8 **output_buf, u32 *output_len) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + u64 continuetoken = 0; + u64 objwrapflags = 0; + int rc = 0, pseries_status = 0; + bool sb_audit_or_enforce_bit = wrap_flags & BIT(0); + bool sb_enforce_bit = wrap_flags & BIT(1); + struct plpks_var var = { + .name = PLPKS_WRAPKEY_NAME, + .namelen = strlen(var.name), + .os = PLPKS_VAR_LINUX, + .component = PLPKS_WRAPKEY_COMPONENT + }; + + auth = construct_auth(PLPKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var.component, var.os, var.name, var.namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + /* Set the consumer password requirement bit. A must have. */ + objwrapflags |= WRAPFLAG_BE_BIT_SET(3); + + /* Set the wrapping algorithm bit. Just one algorithm option for now */ + objwrapflags |= WRAPFLAG_BE_FIELD_PREP(60, 63, 0x1); + + if (sb_audit_or_enforce_bit & sb_enforce_bit) { + pr_err("Cannot set both audit/enforce and enforce bits."); + rc = -EINVAL; + goto out_free_label; + } else if (sb_audit_or_enforce_bit) { + objwrapflags |= WRAPFLAG_BE_BIT_SET(1); + } else if (sb_enforce_bit) { + objwrapflags |= WRAPFLAG_BE_BIT_SET(2); + } + + *output_len = input_len + PLPKS_WRAPPING_BUF_DIFF; + + *output_buf = kzalloc(ALIGN(*output_len, PLPKS_WRAPPING_BUF_ALIGN), + GFP_KERNEL); + if (!(*output_buf)) { + pr_err("Output buffer allocation failed. Returning -ENOMEM."); + rc = -ENOMEM; + goto out_free_label; + } + + do { + rc = plpar_hcall9(H_PKS_WRAP_OBJECT, retbuf, + virt_to_phys(auth), virt_to_phys(label), + label->size, objwrapflags, + virt_to_phys(*input_buf), input_len, + virt_to_phys(*output_buf), *output_len, + continuetoken); + + continuetoken = retbuf[0]; + pseries_status = rc; + rc = pseries_status_to_err(rc); + } while (rc == -EBUSY); + + if (rc) { + pr_err("H_PKS_WRAP_OBJECT failed. pseries_status=%d, rc=%d", + pseries_status, rc); + kfree(*output_buf); + *output_buf = NULL; + } else { + *output_len = retbuf[1]; + } + +out_free_label: + kfree(label); +out: + kfree(auth); + return rc; +} +EXPORT_SYMBOL_GPL(plpks_wrap_object); + +/** + * plpks_unwrap_object() - Unwrap an object using the default wrapping key + * stored in the PLPKS. + * @input_buf: buffer containing the data to be unwrapped + * @input_len: length of the input buffer + * @output_buf: buffer to store the unwrapped data + * @output_len: length of the output buffer + * + * The H_PKS_UNWRAP_OBJECT HCALL unwraps an object that was previously wrapped + * using the H_PKS_WRAP_OBJECT HCALL. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if PLPKS modification is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid or unsupported object unwrapping flags + * if invalid input buffer parameter + * if invalid input buffer length parameter + * if invalid output buffer parameter + * if invalid output buffer length parameter + * if invalid continue token parameter + * if the wrapping key is not compatible with the wrapping + * algorithm + * if the wrapped object's format is not supported + * if the wrapped object is invalid + * -EPERM if access is denied + * -ENOENT if the wrapping key for the provided object was not found + * -EBUSY if unable to handle the request or long running operation + * initiated, retry later. + * + * Returns: On success 0 is returned, a negative errno if not. + */ +int plpks_unwrap_object(u8 **input_buf, u32 input_len, u8 **output_buf, + u32 *output_len) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + u64 continuetoken = 0; + u64 objwrapflags = 0; + int rc = 0, pseries_status = 0; + + auth = construct_auth(PLPKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + *output_len = input_len - PLPKS_WRAPPING_BUF_DIFF; + *output_buf = kzalloc(ALIGN(*output_len, PLPKS_WRAPPING_BUF_ALIGN), + GFP_KERNEL); + if (!(*output_buf)) { + pr_err("Output buffer allocation failed. Returning -ENOMEM."); + rc = -ENOMEM; + goto out; + } + + do { + rc = plpar_hcall9(H_PKS_UNWRAP_OBJECT, retbuf, + virt_to_phys(auth), objwrapflags, + virt_to_phys(*input_buf), input_len, + virt_to_phys(*output_buf), *output_len, + continuetoken); + + continuetoken = retbuf[0]; + pseries_status = rc; + rc = pseries_status_to_err(rc); + } while (rc == -EBUSY); + + if (rc) { + pr_err("H_PKS_UNWRAP_OBJECT failed. pseries_status=%d, rc=%d", + pseries_status, rc); + kfree(*output_buf); + *output_buf = NULL; + } else { + *output_len = retbuf[1]; + } + +out: + kfree(auth); + return rc; +} +EXPORT_SYMBOL_GPL(plpks_unwrap_object); + +/** + * plpks_read_os_var() - Fetch the data for the specified variable that is owned + * by the OS consumer. + * @var: variable to be read from the PLPKS + * + * The consumer or the owner of the object is the os kernel. The + * H_PKS_READ_OBJECT HCALL reads an object from the PLPKS. The caller must + * allocate the buffer var->data and specify the length for this buffer in + * var->datalen. If no buffer is provided, var->datalen will be populated with + * the requested object's size. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid object label parameter + * if invalid object label len parameter + * if invalid output data parameter + * if invalid output data len parameter + * -EPERM if access is denied + * -ENOENT if the requested object was not found + * -EFBIG if the requested object couldn't be + * stored in the buffer provided + * -EBUSY if unable to handle the request + * + * Returns: On success 0 is returned, a negative errno if not. + */ int plpks_read_os_var(struct plpks_var *var) { return plpks_read_var(PLPKS_OS_OWNER, var); } +/** + * plpks_read_fw_var() - Fetch the data for the specified variable that is + * owned by the firmware consumer. + * @var: variable to be read from the PLPKS + * + * The consumer or the owner of the object is the firmware. The + * H_PKS_READ_OBJECT HCALL reads an object from the PLPKS. The caller must + * allocate the buffer var->data and specify the length for this buffer in + * var->datalen. If no buffer is provided, var->datalen will be populated with + * the requested object's size. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid object label parameter + * if invalid object label len parameter + * if invalid output data parameter + * if invalid output data len parameter + * -EPERM if access is denied + * -ENOENT if the requested object was not found + * -EFBIG if the requested object couldn't be + * stored in the buffer provided + * -EBUSY if unable to handle the request + * + * Returns: On success 0 is returned, a negative errno if not. + */ int plpks_read_fw_var(struct plpks_var *var) { return plpks_read_var(PLPKS_FW_OWNER, var); } +/** + * plpks_read_bootloader_var() - Fetch the data for the specified variable + * owned by the bootloader consumer. + * @var: variable to be read from the PLPKS + * + * The consumer or the owner of the object is the bootloader. The + * H_PKS_READ_OBJECT HCALL reads an object from the PLPKS. The caller must + * allocate the buffer var->data and specify the length for this buffer in + * var->datalen. If no buffer is provided, var->datalen will be populated with + * the requested object's size. + * + * Possible reasons for the returned errno values: + * + * -ENXIO if PLPKS is not supported + * -EIO if PLPKS access is blocked due to the LPAR's state + * if an error occurred while processing the request + * -EINVAL if invalid authorization parameter + * if invalid object label parameter + * if invalid object label len parameter + * if invalid output data parameter + * if invalid output data len parameter + * -EPERM if access is denied + * -ENOENT if the requested object was not found + * -EFBIG if the requested object couldn't be + * stored in the buffer provided + * -EBUSY if unable to handle the request + * + * Returns: On success 0 is returned, a negative errno if not. + */ int plpks_read_bootloader_var(struct plpks_var *var) { return plpks_read_var(PLPKS_BOOTLOADER_OWNER, var); } +/** + * plpks_populate_fdt(): Populates the FDT with the PLPKS password to prepare + * for kexec. + * @fdt: pointer to the device tree blob + * + * Upon confirming the existence of the chosen node, invoke fdt_setprop to + * populate the device tree with the PLPKS password in order to prepare for + * kexec. + * + * Returns: On success 0 is returned, a negative value if not. + */ int plpks_populate_fdt(void *fdt) { int chosen_offset = fdt_path_offset(fdt, "/chosen"); @@ -647,14 +1310,19 @@ int plpks_populate_fdt(void *fdt) return fdt_setprop(fdt, chosen_offset, "ibm,plpks-pw", ospassword, ospasswordlength); } -// Once a password is registered with the hypervisor it cannot be cleared without -// rebooting the LPAR, so to keep using the PLPKS across kexec boots we need to -// recover the previous password from the FDT. -// -// There are a few challenges here. We don't want the password to be visible to -// users, so we need to clear it from the FDT. This has to be done in early boot. -// Clearing it from the FDT would make the FDT's checksum invalid, so we have to -// manually cause the checksum to be recalculated. +/** + * plpks_early_init_devtree() - Retrieves and clears the PLPKS password from the + * DT in early init. + * + * Once a password is registered with the hypervisor it cannot be cleared + * without rebooting the LPAR, so to keep using the PLPKS across kexec boots we + * need to recover the previous password from the FDT. + * + * There are a few challenges here. We don't want the password to be visible to + * users, so we need to clear it from the FDT. This has to be done in early + * boot. Clearing it from the FDT would make the FDT's checksum invalid, so we + * have to manually cause the checksum to be recalculated. + */ void __init plpks_early_init_devtree(void) { void *fdt = initial_boot_params; |
