diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/base64.h | 10 | ||||
| -rw-r--r-- | include/linux/compiler.h | 6 | ||||
| -rw-r--r-- | include/linux/crash_reserve.h | 6 | ||||
| -rw-r--r-- | include/linux/dynamic_debug.h | 17 | ||||
| -rw-r--r-- | include/linux/kexec_handover.h | 57 | ||||
| -rw-r--r-- | include/linux/kho/abi/luo.h | 166 | ||||
| -rw-r--r-- | include/linux/kho/abi/memfd.h | 77 | ||||
| -rw-r--r-- | include/linux/liveupdate.h | 138 | ||||
| -rw-r--r-- | include/linux/math.h | 13 | ||||
| -rw-r--r-- | include/linux/math64.h | 59 | ||||
| -rw-r--r-- | include/linux/once_lite.h | 2 | ||||
| -rw-r--r-- | include/linux/panic.h | 1 | ||||
| -rw-r--r-- | include/linux/rbtree.h | 32 | ||||
| -rw-r--r-- | include/linux/shmem_fs.h | 23 | ||||
| -rw-r--r-- | include/linux/sys_info.h | 2 | ||||
| -rw-r--r-- | include/linux/uaccess.h | 6 | ||||
| -rw-r--r-- | include/linux/util_macros.h | 4 | ||||
| -rw-r--r-- | include/linux/vmcore_info.h | 8 | ||||
| -rw-r--r-- | include/linux/xxhash.h | 46 | ||||
| -rw-r--r-- | include/uapi/linux/liveupdate.h | 216 | ||||
| -rw-r--r-- | include/uapi/linux/vmcore.h | 9 |
21 files changed, 799 insertions, 99 deletions
diff --git a/include/linux/base64.h b/include/linux/base64.h index 660d4cb1ef31..a2c6c9222da3 100644 --- a/include/linux/base64.h +++ b/include/linux/base64.h @@ -8,9 +8,15 @@ #include <linux/types.h> +enum base64_variant { + BASE64_STD, /* RFC 4648 (standard) */ + BASE64_URLSAFE, /* RFC 4648 (base64url) */ + BASE64_IMAP, /* RFC 3501 */ +}; + #define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) -int base64_encode(const u8 *src, int len, char *dst); -int base64_decode(const char *src, int len, u8 *dst); +int base64_encode(const u8 *src, int len, char *dst, bool padding, enum base64_variant variant); +int base64_decode(const char *src, int len, u8 *dst, bool padding, enum base64_variant variant); #endif /* _LINUX_BASE64_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ab181d87d71d..ff71bebe56f5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -273,12 +273,6 @@ static inline void *offset_to_ptr(const int *off) #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_64BIT -#define ARCH_SEL(a,b) a -#else -#define ARCH_SEL(a,b) b -#endif - /* * Force the compiler to emit 'sym' as a symbol, so that we can reference * it from inline assembler. Necessary in case 'sym' could be inlined diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 7b44b41d0a20..f0dc03d94ca2 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -32,6 +32,12 @@ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, void __init reserve_crashkernel_cma(unsigned long long cma_size); #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef arch_add_crash_res_to_iomem +static inline bool arch_add_crash_res_to_iomem(void) +{ + return true; +} +#endif #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) #endif diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index ff44ec346162..05743900a116 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -38,11 +38,12 @@ struct _ddebug { #define _DPRINTK_FLAGS_INCL_LINENO (1<<3) #define _DPRINTK_FLAGS_INCL_TID (1<<4) #define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5) +#define _DPRINTK_FLAGS_INCL_STACK (1<<6) #define _DPRINTK_FLAGS_INCL_ANY \ (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\ _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID |\ - _DPRINTK_FLAGS_INCL_SOURCENAME) + _DPRINTK_FLAGS_INCL_SOURCENAME | _DPRINTK_FLAGS_INCL_STACK) #if defined DEBUG #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT @@ -160,6 +161,12 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, const struct ib_device *ibdev, const char *fmt, ...); +#define __dynamic_dump_stack(desc) \ +{ \ + if (desc.flags & _DPRINTK_FLAGS_INCL_STACK) \ + dump_stack(); \ +} + #define DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, cls, fmt) \ static struct _ddebug __aligned(8) \ __section("__dyndbg") name = { \ @@ -220,8 +227,10 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, */ #define __dynamic_func_call_cls(id, cls, fmt, func, ...) do { \ DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \ - if (DYNAMIC_DEBUG_BRANCH(id)) \ + if (DYNAMIC_DEBUG_BRANCH(id)) { \ func(&id, ##__VA_ARGS__); \ + __dynamic_dump_stack(id); \ + } \ } while (0) #define __dynamic_func_call(id, fmt, func, ...) \ __dynamic_func_call_cls(id, _DPRINTK_CLASS_DFLT, fmt, \ @@ -229,8 +238,10 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, #define __dynamic_func_call_cls_no_desc(id, cls, fmt, func, ...) do { \ DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \ - if (DYNAMIC_DEBUG_BRANCH(id)) \ + if (DYNAMIC_DEBUG_BRANCH(id)) { \ func(__VA_ARGS__); \ + __dynamic_dump_stack(id); \ + } \ } while (0) #define __dynamic_func_call_no_desc(id, fmt, func, ...) \ __dynamic_func_call_cls_no_desc(id, _DPRINTK_CLASS_DFLT, \ diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 25042c1d8d54..5f7b9de97e8d 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -2,22 +2,16 @@ #ifndef LINUX_KEXEC_HANDOVER_H #define LINUX_KEXEC_HANDOVER_H -#include <linux/types.h> +#include <linux/err.h> #include <linux/errno.h> +#include <linux/types.h> struct kho_scratch { phys_addr_t addr; phys_addr_t size; }; -/* KHO Notifier index */ -enum kho_event { - KEXEC_KHO_FINALIZE = 0, - KEXEC_KHO_ABORT = 1, -}; - struct folio; -struct notifier_block; struct page; #define DECLARE_KHOSER_PTR(name, type) \ @@ -37,8 +31,6 @@ struct page; (typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \ }) -struct kho_serialization; - struct kho_vmalloc_chunk; struct kho_vmalloc { DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *); @@ -52,17 +44,21 @@ bool kho_is_enabled(void); bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); +void kho_unpreserve_folio(struct folio *folio); int kho_preserve_pages(struct page *page, unsigned int nr_pages); +void kho_unpreserve_pages(struct page *page, unsigned int nr_pages); int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation); +void kho_unpreserve_vmalloc(struct kho_vmalloc *preservation); +void *kho_alloc_preserve(size_t size); +void kho_unpreserve_free(void *mem); +void kho_restore_free(void *mem); struct folio *kho_restore_folio(phys_addr_t phys); struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages); void *kho_restore_vmalloc(const struct kho_vmalloc *preservation); -int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); +int kho_add_subtree(const char *name, void *fdt); +void kho_remove_subtree(void *fdt); int kho_retrieve_subtree(const char *name, phys_addr_t *phys); -int register_kho_notifier(struct notifier_block *nb); -int unregister_kho_notifier(struct notifier_block *nb); - void kho_memory_init(void); void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys, @@ -83,17 +79,31 @@ static inline int kho_preserve_folio(struct folio *folio) return -EOPNOTSUPP; } +static inline void kho_unpreserve_folio(struct folio *folio) { } + static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages) { return -EOPNOTSUPP; } +static inline void kho_unpreserve_pages(struct page *page, unsigned int nr_pages) { } + static inline int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation) { return -EOPNOTSUPP; } +static inline void kho_unpreserve_vmalloc(struct kho_vmalloc *preservation) { } + +static inline void *kho_alloc_preserve(size_t size) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void kho_unpreserve_free(void *mem) { } +static inline void kho_restore_free(void *mem) { } + static inline struct folio *kho_restore_folio(phys_addr_t phys) { return NULL; @@ -110,30 +120,19 @@ static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) return NULL; } -static inline int kho_add_subtree(struct kho_serialization *ser, - const char *name, void *fdt) -{ - return -EOPNOTSUPP; -} - -static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys) +static inline int kho_add_subtree(const char *name, void *fdt) { return -EOPNOTSUPP; } -static inline int register_kho_notifier(struct notifier_block *nb) -{ - return -EOPNOTSUPP; -} +static inline void kho_remove_subtree(void *fdt) { } -static inline int unregister_kho_notifier(struct notifier_block *nb) +static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys) { return -EOPNOTSUPP; } -static inline void kho_memory_init(void) -{ -} +static inline void kho_memory_init(void) { } static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys, u64 scratch_len) diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h new file mode 100644 index 000000000000..bb099c92e469 --- /dev/null +++ b/include/linux/kho/abi/luo.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +/** + * DOC: Live Update Orchestrator ABI + * + * This header defines the stable Application Binary Interface used by the + * Live Update Orchestrator to pass state from a pre-update kernel to a + * post-update kernel. The ABI is built upon the Kexec HandOver framework + * and uses a Flattened Device Tree to describe the preserved data. + * + * This interface is a contract. Any modification to the FDT structure, node + * properties, compatible strings, or the layout of the `__packed` serialization + * structures defined here constitutes a breaking change. Such changes require + * incrementing the version number in the relevant `_COMPATIBLE` string to + * prevent a new kernel from misinterpreting data from an old kernel. + * + * Changes are allowed provided the compatibility version is incremented; + * however, backward/forward compatibility is only guaranteed for kernels + * supporting the same ABI version. + * + * FDT Structure Overview: + * The entire LUO state is encapsulated within a single KHO entry named "LUO". + * This entry contains an FDT with the following layout: + * + * .. code-block:: none + * + * / { + * compatible = "luo-v1"; + * liveupdate-number = <...>; + * + * luo-session { + * compatible = "luo-session-v1"; + * luo-session-header = <phys_addr_of_session_header_ser>; + * }; + * }; + * + * Main LUO Node (/): + * + * - compatible: "luo-v1" + * Identifies the overall LUO ABI version. + * - liveupdate-number: u64 + * A counter tracking the number of successful live updates performed. + * + * Session Node (luo-session): + * This node describes all preserved user-space sessions. + * + * - compatible: "luo-session-v1" + * Identifies the session ABI version. + * - luo-session-header: u64 + * The physical address of a `struct luo_session_header_ser`. This structure + * is the header for a contiguous block of memory containing an array of + * `struct luo_session_ser`, one for each preserved session. + * + * Serialization Structures: + * The FDT properties point to memory regions containing arrays of simple, + * `__packed` structures. These structures contain the actual preserved state. + * + * - struct luo_session_header_ser: + * Header for the session array. Contains the total page count of the + * preserved memory block and the number of `struct luo_session_ser` + * entries that follow. + * + * - struct luo_session_ser: + * Metadata for a single session, including its name and a physical pointer + * to another preserved memory block containing an array of + * `struct luo_file_ser` for all files in that session. + * + * - struct luo_file_ser: + * Metadata for a single preserved file. Contains the `compatible` string to + * find the correct handler in the new kernel, a user-provided `token` for + * identification, and an opaque `data` handle for the handler to use. + */ + +#ifndef _LINUX_KHO_ABI_LUO_H +#define _LINUX_KHO_ABI_LUO_H + +#include <uapi/linux/liveupdate.h> + +/* + * The LUO FDT hooks all LUO state for sessions, fds, etc. + * In the root it also carries "liveupdate-number" 64-bit property that + * corresponds to the number of live-updates performed on this machine. + */ +#define LUO_FDT_SIZE PAGE_SIZE +#define LUO_FDT_KHO_ENTRY_NAME "LUO" +#define LUO_FDT_COMPATIBLE "luo-v1" +#define LUO_FDT_LIVEUPDATE_NUM "liveupdate-number" + +#define LIVEUPDATE_HNDL_COMPAT_LENGTH 48 + +/** + * struct luo_file_ser - Represents the serialized preserves files. + * @compatible: File handler compatible string. + * @data: Private data + * @token: User provided token for this file + * + * If this structure is modified, LUO_SESSION_COMPATIBLE must be updated. + */ +struct luo_file_ser { + char compatible[LIVEUPDATE_HNDL_COMPAT_LENGTH]; + u64 data; + u64 token; +} __packed; + +/** + * struct luo_file_set_ser - Represents the serialized metadata for file set + * @files: The physical address of a contiguous memory block that holds + * the serialized state of files (array of luo_file_ser) in this file + * set. + * @count: The total number of files that were part of this session during + * serialization. Used for iteration and validation during + * restoration. + */ +struct luo_file_set_ser { + u64 files; + u64 count; +} __packed; + +/* + * LUO FDT session node + * LUO_FDT_SESSION_HEADER: is a u64 physical address of struct + * luo_session_header_ser + */ +#define LUO_FDT_SESSION_NODE_NAME "luo-session" +#define LUO_FDT_SESSION_COMPATIBLE "luo-session-v2" +#define LUO_FDT_SESSION_HEADER "luo-session-header" + +/** + * struct luo_session_header_ser - Header for the serialized session data block. + * @count: The number of `struct luo_session_ser` entries that immediately + * follow this header in the memory block. + * + * This structure is located at the beginning of a contiguous block of + * physical memory preserved across the kexec. It provides the necessary + * metadata to interpret the array of session entries that follow. + * + * If this structure is modified, `LUO_FDT_SESSION_COMPATIBLE` must be updated. + */ +struct luo_session_header_ser { + u64 count; +} __packed; + +/** + * struct luo_session_ser - Represents the serialized metadata for a LUO session. + * @name: The unique name of the session, provided by the userspace at + * the time of session creation. + * @file_set_ser: Serialized files belonging to this session, + * + * This structure is used to package session-specific metadata for transfer + * between kernels via Kexec Handover. An array of these structures (one per + * session) is created and passed to the new kernel, allowing it to reconstruct + * the session context. + * + * If this structure is modified, `LUO_FDT_SESSION_COMPATIBLE` must be updated. + */ +struct luo_session_ser { + char name[LIVEUPDATE_SESSION_NAME_LENGTH]; + struct luo_file_set_ser file_set_ser; +} __packed; + +#endif /* _LINUX_KHO_ABI_LUO_H */ diff --git a/include/linux/kho/abi/memfd.h b/include/linux/kho/abi/memfd.h new file mode 100644 index 000000000000..da7d063474a1 --- /dev/null +++ b/include/linux/kho/abi/memfd.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + * + * Copyright (C) 2025 Amazon.com Inc. or its affiliates. + * Pratyush Yadav <ptyadav@amazon.de> + */ + +#ifndef _LINUX_KHO_ABI_MEMFD_H +#define _LINUX_KHO_ABI_MEMFD_H + +#include <linux/types.h> +#include <linux/kexec_handover.h> + +/** + * DOC: memfd Live Update ABI + * + * This header defines the ABI for preserving the state of a memfd across a + * kexec reboot using the LUO. + * + * The state is serialized into a packed structure `struct memfd_luo_ser` + * which is handed over to the next kernel via the KHO mechanism. + * + * This interface is a contract. Any modification to the structure layout + * constitutes a breaking change. Such changes require incrementing the + * version number in the MEMFD_LUO_FH_COMPATIBLE string. + */ + +/** + * MEMFD_LUO_FOLIO_DIRTY - The folio is dirty. + * + * This flag indicates the folio contains data from user. A non-dirty folio is + * one that was allocated (say using fallocate(2)) but not written to. + */ +#define MEMFD_LUO_FOLIO_DIRTY BIT(0) + +/** + * MEMFD_LUO_FOLIO_UPTODATE - The folio is up-to-date. + * + * An up-to-date folio has been zeroed out. shmem zeroes out folios on first + * use. This flag tracks which folios need zeroing. + */ +#define MEMFD_LUO_FOLIO_UPTODATE BIT(1) + +/** + * struct memfd_luo_folio_ser - Serialized state of a single folio. + * @pfn: The page frame number of the folio. + * @flags: Flags to describe the state of the folio. + * @index: The page offset (pgoff_t) of the folio within the original file. + */ +struct memfd_luo_folio_ser { + u64 pfn:52; + u64 flags:12; + u64 index; +} __packed; + +/** + * struct memfd_luo_ser - Main serialization structure for a memfd. + * @pos: The file's current position (f_pos). + * @size: The total size of the file in bytes (i_size). + * @nr_folios: Number of folios in the folios array. + * @folios: KHO vmalloc descriptor pointing to the array of + * struct memfd_luo_folio_ser. + */ +struct memfd_luo_ser { + u64 pos; + u64 size; + u64 nr_folios; + struct kho_vmalloc folios; +} __packed; + +/* The compatibility string for memfd file handler */ +#define MEMFD_LUO_FH_COMPATIBLE "memfd-v1" + +#endif /* _LINUX_KHO_ABI_MEMFD_H */ diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h new file mode 100644 index 000000000000..a7f6ee5b6771 --- /dev/null +++ b/include/linux/liveupdate.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ +#ifndef _LINUX_LIVEUPDATE_H +#define _LINUX_LIVEUPDATE_H + +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/kho/abi/luo.h> +#include <linux/list.h> +#include <linux/types.h> +#include <uapi/linux/liveupdate.h> + +struct liveupdate_file_handler; +struct file; + +/** + * struct liveupdate_file_op_args - Arguments for file operation callbacks. + * @handler: The file handler being called. + * @retrieved: The retrieve status for the 'can_finish / finish' + * operation. + * @file: The file object. For retrieve: [OUT] The callback sets + * this to the new file. For other ops: [IN] The caller sets + * this to the file being operated on. + * @serialized_data: The opaque u64 handle, preserve/prepare/freeze may update + * this field. + * @private_data: Private data for the file used to hold runtime state that + * is not preserved. Set by the handler's .preserve() + * callback, and must be freed in the handler's + * .unpreserve() callback. + * + * This structure bundles all parameters for the file operation callbacks. + * The 'data' and 'file' fields are used for both input and output. + */ +struct liveupdate_file_op_args { + struct liveupdate_file_handler *handler; + bool retrieved; + struct file *file; + u64 serialized_data; + void *private_data; +}; + +/** + * struct liveupdate_file_ops - Callbacks for live-updatable files. + * @can_preserve: Required. Lightweight check to see if this handler is + * compatible with the given file. + * @preserve: Required. Performs state-saving for the file. + * @unpreserve: Required. Cleans up any resources allocated by @preserve. + * @freeze: Optional. Final actions just before kernel transition. + * @unfreeze: Optional. Undo freeze operations. + * @retrieve: Required. Restores the file in the new kernel. + * @can_finish: Optional. Check if this FD can finish, i.e. all restoration + * pre-requirements for this FD are satisfied. Called prior to + * finish, in order to do successful finish calls for all + * resources in the session. + * @finish: Required. Final cleanup in the new kernel. + * @owner: Module reference + * + * All operations (except can_preserve) receive a pointer to a + * 'struct liveupdate_file_op_args' containing the necessary context. + */ +struct liveupdate_file_ops { + bool (*can_preserve)(struct liveupdate_file_handler *handler, + struct file *file); + int (*preserve)(struct liveupdate_file_op_args *args); + void (*unpreserve)(struct liveupdate_file_op_args *args); + int (*freeze)(struct liveupdate_file_op_args *args); + void (*unfreeze)(struct liveupdate_file_op_args *args); + int (*retrieve)(struct liveupdate_file_op_args *args); + bool (*can_finish)(struct liveupdate_file_op_args *args); + void (*finish)(struct liveupdate_file_op_args *args); + struct module *owner; +}; + +/** + * struct liveupdate_file_handler - Represents a handler for a live-updatable file type. + * @ops: Callback functions + * @compatible: The compatibility string (e.g., "memfd-v1", "vfiofd-v1") + * that uniquely identifies the file type this handler + * supports. This is matched against the compatible string + * associated with individual &struct file instances. + * + * Modules that want to support live update for specific file types should + * register an instance of this structure. LUO uses this registration to + * determine if a given file can be preserved and to find the appropriate + * operations to manage its state across the update. + */ +struct liveupdate_file_handler { + const struct liveupdate_file_ops *ops; + const char compatible[LIVEUPDATE_HNDL_COMPAT_LENGTH]; + + /* private: */ + + /* + * Used for linking this handler instance into a global list of + * registered file handlers. + */ + struct list_head __private list; +}; + +#ifdef CONFIG_LIVEUPDATE + +/* Return true if live update orchestrator is enabled */ +bool liveupdate_enabled(void); + +/* Called during kexec to tell LUO that entered into reboot */ +int liveupdate_reboot(void); + +int liveupdate_register_file_handler(struct liveupdate_file_handler *fh); +int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh); + +#else /* CONFIG_LIVEUPDATE */ + +static inline bool liveupdate_enabled(void) +{ + return false; +} + +static inline int liveupdate_reboot(void) +{ + return 0; +} + +static inline int liveupdate_register_file_handler(struct liveupdate_file_handler *fh) +{ + return -EOPNOTSUPP; +} + +static inline int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_LIVEUPDATE */ +#endif /* _LINUX_LIVEUPDATE_H */ diff --git a/include/linux/math.h b/include/linux/math.h index 0198c92cbe3e..6dc1d1d32fbc 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -148,11 +148,16 @@ __STRUCT_FRACT(u32) /** * abs - return absolute value of an argument - * @x: the value. If it is unsigned type, it is converted to signed type first. - * char is treated as if it was signed (regardless of whether it really is) - * but the macro's return type is preserved as char. + * @x: the value. * - * Return: an absolute value of x. + * If it is unsigned type, @x is converted to signed type first. + * char is treated as if it was signed (regardless of whether it really is) + * but the macro's return type is preserved as char. + * + * NOTE, for signed type if @x is the minimum, the returned result is undefined + * as there is not enough bits to represent it as a positive number. + * + * Return: an absolute value of @x. */ #define abs(x) __abs_choose_expr(x, long long, \ __abs_choose_expr(x, long, \ diff --git a/include/linux/math64.h b/include/linux/math64.h index 6aaccc1626ab..cc305206d89f 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -158,6 +158,17 @@ static inline u64 mul_u32_u32(u32 a, u32 b) } #endif +#ifndef add_u64_u32 +/* + * Many a GCC version also messes this up. + * Zero extending b and then spilling everything to stack. + */ +static inline u64 add_u64_u32(u64 a, u32 b) +{ + return a + b; +} +#endif + #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) #ifndef mul_u64_u32_shr @@ -282,7 +293,53 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) } #endif /* mul_u64_u32_div */ -u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); +/** + * mul_u64_add_u64_div_u64 - unsigned 64bit multiply, add, and divide + * @a: first unsigned 64bit multiplicand + * @b: second unsigned 64bit multiplicand + * @c: unsigned 64bit addend + * @d: unsigned 64bit divisor + * + * Multiply two 64bit values together to generate a 128bit product + * add a third value and then divide by a fourth. + * The Generic code divides by 0 if @d is zero and returns ~0 on overflow. + * Architecture specific code may trap on zero or overflow. + * + * Return: (@a * @b + @c) / @d + */ +u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d); + +/** + * mul_u64_u64_div_u64 - unsigned 64bit multiply and divide + * @a: first unsigned 64bit multiplicand + * @b: second unsigned 64bit multiplicand + * @d: unsigned 64bit divisor + * + * Multiply two 64bit values together to generate a 128bit product + * and then divide by a third value. + * The Generic code divides by 0 if @d is zero and returns ~0 on overflow. + * Architecture specific code may trap on zero or overflow. + * + * Return: @a * @b / @d + */ +#define mul_u64_u64_div_u64(a, b, d) mul_u64_add_u64_div_u64(a, b, 0, d) + +/** + * mul_u64_u64_div_u64_roundup - unsigned 64bit multiply and divide rounded up + * @a: first unsigned 64bit multiplicand + * @b: second unsigned 64bit multiplicand + * @d: unsigned 64bit divisor + * + * Multiply two 64bit values together to generate a 128bit product + * and then divide and round up. + * The Generic code divides by 0 if @d is zero and returns ~0 on overflow. + * Architecture specific code may trap on zero or overflow. + * + * Return: (@a * @b + @d - 1) / @d + */ +#define mul_u64_u64_div_u64_roundup(a, b, d) \ + ({ u64 _tmp = (d); mul_u64_add_u64_div_u64(a, b, _tmp - 1, _tmp); }) + /** * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h index 27de7bc32a06..236592c4eeb1 100644 --- a/include/linux/once_lite.h +++ b/include/linux/once_lite.h @@ -16,7 +16,7 @@ bool __ret_cond = !!(condition); \ bool __ret_once = false; \ \ - if (unlikely(__ret_cond && !__already_done)) { \ + if (unlikely(__ret_cond) && unlikely(!__already_done)) {\ __already_done = true; \ __ret_once = true; \ } \ diff --git a/include/linux/panic.h b/include/linux/panic.h index 6f972a66c13e..a00bc0937698 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -86,7 +86,6 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) struct taint_flag { char c_true; /* character printed when tainted */ char c_false; /* character printed when not tainted */ - bool module; /* also show as a per-module taint flag */ const char *desc; /* verbose description of the set taint flag */ }; diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 8d2ba3749866..4091e978aef2 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -43,8 +43,36 @@ extern void rb_erase(struct rb_node *, struct rb_root *); /* Find logical next and previous nodes in a tree */ extern struct rb_node *rb_next(const struct rb_node *); extern struct rb_node *rb_prev(const struct rb_node *); -extern struct rb_node *rb_first(const struct rb_root *); -extern struct rb_node *rb_last(const struct rb_root *); + +/* + * This function returns the first node (in sort order) of the tree. + */ +static inline struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +/* + * This function returns the last node (in sort order) of the tree. + */ +static inline struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} /* Postorder iteration - always visit the parent after its children */ extern struct rb_node *rb_first_postorder(const struct rb_root *); diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 5e4b3c1ae5c2..e2069b3179c4 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -10,6 +10,7 @@ #include <linux/xattr.h> #include <linux/fs_parser.h> #include <linux/userfaultfd_k.h> +#include <linux/bits.h> struct swap_iocb; @@ -19,6 +20,19 @@ struct swap_iocb; #define SHMEM_MAXQUOTAS 2 #endif +/* Suppress pre-accounting of the entire object size. */ +#define SHMEM_F_NORESERVE BIT(0) +/* Disallow swapping. */ +#define SHMEM_F_LOCKED BIT(1) +/* + * Disallow growing, shrinking, or hole punching in the inode. Combined with + * folio pinning, makes sure the inode's mapping stays fixed. + * + * In some ways similar to F_SEAL_GROW | F_SEAL_SHRINK, but can be removed and + * isn't directly visible to userspace. + */ +#define SHMEM_F_MAPPING_FROZEN BIT(2) + struct shmem_inode_info { spinlock_t lock; unsigned int seals; /* shmem seals */ @@ -186,6 +200,15 @@ static inline bool shmem_file(struct file *file) return shmem_mapping(file->f_mapping); } +/* Must be called with inode lock taken exclusive. */ +static inline void shmem_freeze(struct inode *inode, bool freeze) +{ + if (freeze) + SHMEM_I(inode)->flags |= SHMEM_F_MAPPING_FROZEN; + else + SHMEM_I(inode)->flags &= ~SHMEM_F_MAPPING_FROZEN; +} + /* * If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages * beyond i_size's notion of EOF, which fallocate has committed to reserving: diff --git a/include/linux/sys_info.h b/include/linux/sys_info.h index 89d77dc4f2ed..a5bc3ea3d44b 100644 --- a/include/linux/sys_info.h +++ b/include/linux/sys_info.h @@ -14,7 +14,7 @@ #define SYS_INFO_LOCKS 0x00000008 #define SYS_INFO_FTRACE 0x00000010 #define SYS_INFO_PANIC_CONSOLE_REPLAY 0x00000020 -#define SYS_INFO_ALL_CPU_BT 0x00000040 +#define SYS_INFO_ALL_BT 0x00000040 #define SYS_INFO_BLOCKED_TASKS 0x00000080 void sys_info(unsigned long si_mask); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index be395f5f7ee3..1f3804245c06 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -161,8 +161,6 @@ __copy_to_user(void __user *to, const void *from, unsigned long n) * directly in the normal copy_to/from_user(), the other ones go * through an extern _copy_to/from_user(), which expands the same code * here. - * - * Rust code always uses the extern definition. */ static inline __must_check unsigned long _inline_copy_from_user(void *to, const void __user *from, unsigned long n) @@ -192,8 +190,10 @@ fail: memset(to + (n - res), 0, res); return res; } +#ifndef INLINE_COPY_FROM_USER extern __must_check unsigned long _copy_from_user(void *, const void __user *, unsigned long); +#endif static inline __must_check unsigned long _inline_copy_to_user(void __user *to, const void *from, unsigned long n) @@ -207,8 +207,10 @@ _inline_copy_to_user(void __user *to, const void *from, unsigned long n) } return n; } +#ifndef INLINE_COPY_TO_USER extern __must_check unsigned long _copy_to_user(void __user *, const void *, unsigned long); +#endif static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 9373962aade9..2eb528058d0d 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -136,10 +136,10 @@ #define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) /** - * to_user_ptr - cast a pointer passed as u64 from user space to void __user * + * u64_to_user_ptr - cast a pointer passed as u64 from user space to void __user * * @x: The u64 value from user space, usually via IOCTL * - * to_user_ptr() simply casts a pointer passed as u64 from user space to void + * u64_to_user_ptr() simply casts a pointer passed as u64 from user space to void * __user * correctly. Using this lets us get rid of all the tiresome casts. */ #define u64_to_user_ptr(x) \ diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h index 37e003ae5262..e71518caacdf 100644 --- a/include/linux/vmcore_info.h +++ b/include/linux/vmcore_info.h @@ -5,6 +5,7 @@ #include <linux/linkage.h> #include <linux/elfcore.h> #include <linux/elf.h> +#include <uapi/linux/vmcore.h> #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) #define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(NN_PRSTATUS), 4) @@ -77,4 +78,11 @@ extern u32 *vmcoreinfo_note; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); + +#ifdef CONFIG_VMCORE_INFO +void hwerr_log_error_type(enum hwerr_error_type src); +#else +static inline void hwerr_log_error_type(enum hwerr_error_type src) {}; +#endif + #endif /* LINUX_VMCORE_INFO_H */ diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index 27f57eca8cb1..587122e2c29c 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -141,21 +141,7 @@ static inline unsigned long xxhash(const void *input, size_t length, */ /** - * struct xxh32_state - private xxh32 state, do not use members directly - */ -struct xxh32_state { - uint32_t total_len_32; - uint32_t large_len; - uint32_t v1; - uint32_t v2; - uint32_t v3; - uint32_t v4; - uint32_t mem32[4]; - uint32_t memsize; -}; - -/** - * struct xxh32_state - private xxh64 state, do not use members directly + * struct xxh64_state - private xxh64 state, do not use members directly */ struct xxh64_state { uint64_t total_len; @@ -168,16 +154,6 @@ struct xxh64_state { }; /** - * xxh32_reset() - reset the xxh32 state to start a new hashing operation - * - * @state: The xxh32 state to reset. - * @seed: Initialize the hash state with this seed. - * - * Call this function on any xxh32_state to prepare for a new hashing operation. - */ -void xxh32_reset(struct xxh32_state *state, uint32_t seed); - -/** * xxh64_reset() - reset the xxh64 state to start a new hashing operation * * @state: The xxh64 state to reset. @@ -210,24 +186,4 @@ int xxh64_update(struct xxh64_state *state, const void *input, size_t length); */ uint64_t xxh64_digest(const struct xxh64_state *state); -/*-************************** - * Utils - ***************************/ - -/** - * xxh32_copy_state() - copy the source state into the destination state - * - * @src: The source xxh32 state. - * @dst: The destination xxh32 state. - */ -void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src); - -/** - * xxh64_copy_state() - copy the source state into the destination state - * - * @src: The source xxh64 state. - * @dst: The destination xxh64 state. - */ -void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src); - #endif /* XXHASH_H */ diff --git a/include/uapi/linux/liveupdate.h b/include/uapi/linux/liveupdate.h new file mode 100644 index 000000000000..30bc66ee9436 --- /dev/null +++ b/include/uapi/linux/liveupdate.h @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +/* + * Userspace interface for /dev/liveupdate + * Live Update Orchestrator + * + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +#ifndef _UAPI_LIVEUPDATE_H +#define _UAPI_LIVEUPDATE_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +/** + * DOC: General ioctl format + * + * The ioctl interface follows a general format to allow for extensibility. Each + * ioctl is passed in a structure pointer as the argument providing the size of + * the structure in the first u32. The kernel checks that any structure space + * beyond what it understands is 0. This allows userspace to use the backward + * compatible portion while consistently using the newer, larger, structures. + * + * ioctls use a standard meaning for common errnos: + * + * - ENOTTY: The IOCTL number itself is not supported at all + * - E2BIG: The IOCTL number is supported, but the provided structure has + * non-zero in a part the kernel does not understand. + * - EOPNOTSUPP: The IOCTL number is supported, and the structure is + * understood, however a known field has a value the kernel does not + * understand or support. + * - EINVAL: Everything about the IOCTL was understood, but a field is not + * correct. + * - ENOENT: A provided token does not exist. + * - ENOMEM: Out of memory. + * - EOVERFLOW: Mathematics overflowed. + * + * As well as additional errnos, within specific ioctls. + */ + +/* The ioctl type, documented in ioctl-number.rst */ +#define LIVEUPDATE_IOCTL_TYPE 0xBA + +/* The maximum length of session name including null termination */ +#define LIVEUPDATE_SESSION_NAME_LENGTH 64 + +/* The /dev/liveupdate ioctl commands */ +enum { + LIVEUPDATE_CMD_BASE = 0x00, + LIVEUPDATE_CMD_CREATE_SESSION = LIVEUPDATE_CMD_BASE, + LIVEUPDATE_CMD_RETRIEVE_SESSION = 0x01, +}; + +/* ioctl commands for session file descriptors */ +enum { + LIVEUPDATE_CMD_SESSION_BASE = 0x40, + LIVEUPDATE_CMD_SESSION_PRESERVE_FD = LIVEUPDATE_CMD_SESSION_BASE, + LIVEUPDATE_CMD_SESSION_RETRIEVE_FD = 0x41, + LIVEUPDATE_CMD_SESSION_FINISH = 0x42, +}; + +/** + * struct liveupdate_ioctl_create_session - ioctl(LIVEUPDATE_IOCTL_CREATE_SESSION) + * @size: Input; sizeof(struct liveupdate_ioctl_create_session) + * @fd: Output; The new file descriptor for the created session. + * @name: Input; A null-terminated string for the session name, max + * length %LIVEUPDATE_SESSION_NAME_LENGTH including termination + * character. + * + * Creates a new live update session for managing preserved resources. + * This ioctl can only be called on the main /dev/liveupdate device. + * + * Return: 0 on success, negative error code on failure. + */ +struct liveupdate_ioctl_create_session { + __u32 size; + __s32 fd; + __u8 name[LIVEUPDATE_SESSION_NAME_LENGTH]; +}; + +#define LIVEUPDATE_IOCTL_CREATE_SESSION \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_CREATE_SESSION) + +/** + * struct liveupdate_ioctl_retrieve_session - ioctl(LIVEUPDATE_IOCTL_RETRIEVE_SESSION) + * @size: Input; sizeof(struct liveupdate_ioctl_retrieve_session) + * @fd: Output; The new file descriptor for the retrieved session. + * @name: Input; A null-terminated string identifying the session to retrieve. + * The name must exactly match the name used when the session was + * created in the previous kernel. + * + * Retrieves a handle (a new file descriptor) for a preserved session by its + * name. This is the primary mechanism for a userspace agent to regain control + * of its preserved resources after a live update. + * + * The userspace application provides the null-terminated `name` of a session + * it created before the live update. If a preserved session with a matching + * name is found, the kernel instantiates it and returns a new file descriptor + * in the `fd` field. This new session FD can then be used for all file-specific + * operations, such as restoring individual file descriptors with + * LIVEUPDATE_SESSION_RETRIEVE_FD. + * + * It is the responsibility of the userspace application to know the names of + * the sessions it needs to retrieve. If no session with the given name is + * found, the ioctl will fail with -ENOENT. + * + * This ioctl can only be called on the main /dev/liveupdate device when the + * system is in the LIVEUPDATE_STATE_UPDATED state. + */ +struct liveupdate_ioctl_retrieve_session { + __u32 size; + __s32 fd; + __u8 name[LIVEUPDATE_SESSION_NAME_LENGTH]; +}; + +#define LIVEUPDATE_IOCTL_RETRIEVE_SESSION \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_RETRIEVE_SESSION) + +/* Session specific IOCTLs */ + +/** + * struct liveupdate_session_preserve_fd - ioctl(LIVEUPDATE_SESSION_PRESERVE_FD) + * @size: Input; sizeof(struct liveupdate_session_preserve_fd) + * @fd: Input; The user-space file descriptor to be preserved. + * @token: Input; An opaque, unique token for preserved resource. + * + * Holds parameters for preserving a file descriptor. + * + * User sets the @fd field identifying the file descriptor to preserve + * (e.g., memfd, kvm, iommufd, VFIO). The kernel validates if this FD type + * and its dependencies are supported for preservation. If validation passes, + * the kernel marks the FD internally and *initiates the process* of preparing + * its state for saving. The actual snapshotting of the state typically occurs + * during the subsequent %LIVEUPDATE_IOCTL_PREPARE execution phase, though + * some finalization might occur during freeze. + * On successful validation and initiation, the kernel uses the @token + * field with an opaque identifier representing the resource being preserved. + * This token confirms the FD is targeted for preservation and is required for + * the subsequent %LIVEUPDATE_SESSION_RETRIEVE_FD call after the live update. + * + * Return: 0 on success (validation passed, preservation initiated), negative + * error code on failure (e.g., unsupported FD type, dependency issue, + * validation failed). + */ +struct liveupdate_session_preserve_fd { + __u32 size; + __s32 fd; + __aligned_u64 token; +}; + +#define LIVEUPDATE_SESSION_PRESERVE_FD \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_PRESERVE_FD) + +/** + * struct liveupdate_session_retrieve_fd - ioctl(LIVEUPDATE_SESSION_RETRIEVE_FD) + * @size: Input; sizeof(struct liveupdate_session_retrieve_fd) + * @fd: Output; The new file descriptor representing the fully restored + * kernel resource. + * @token: Input; An opaque, token that was used to preserve the resource. + * + * Retrieve a previously preserved file descriptor. + * + * User sets the @token field to the value obtained from a successful + * %LIVEUPDATE_IOCTL_FD_PRESERVE call before the live update. On success, + * the kernel restores the state (saved during the PREPARE/FREEZE phases) + * associated with the token and populates the @fd field with a new file + * descriptor referencing the restored resource in the current (new) kernel. + * This operation must be performed *before* signaling completion via + * %LIVEUPDATE_IOCTL_FINISH. + * + * Return: 0 on success, negative error code on failure (e.g., invalid token). + */ +struct liveupdate_session_retrieve_fd { + __u32 size; + __s32 fd; + __aligned_u64 token; +}; + +#define LIVEUPDATE_SESSION_RETRIEVE_FD \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_RETRIEVE_FD) + +/** + * struct liveupdate_session_finish - ioctl(LIVEUPDATE_SESSION_FINISH) + * @size: Input; sizeof(struct liveupdate_session_finish) + * @reserved: Input; Must be zero. Reserved for future use. + * + * Signals the completion of the restoration process for a retrieved session. + * This is the final operation that should be performed on a session file + * descriptor after a live update. + * + * This ioctl must be called once all required file descriptors for the session + * have been successfully retrieved (using %LIVEUPDATE_SESSION_RETRIEVE_FD) and + * are fully restored from the userspace and kernel perspective. + * + * Upon success, the kernel releases its ownership of the preserved resources + * associated with this session. This allows internal resources to be freed, + * typically by decrementing reference counts on the underlying preserved + * objects. + * + * If this operation fails, the resources remain preserved in memory. Userspace + * may attempt to call finish again. The resources will otherwise be reset + * during the next live update cycle. + * + * Return: 0 on success, negative error code on failure. + */ +struct liveupdate_session_finish { + __u32 size; + __u32 reserved; +}; + +#define LIVEUPDATE_SESSION_FINISH \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_FINISH) + +#endif /* _UAPI_LIVEUPDATE_H */ diff --git a/include/uapi/linux/vmcore.h b/include/uapi/linux/vmcore.h index 3e9da91866ff..2ba89fafa518 100644 --- a/include/uapi/linux/vmcore.h +++ b/include/uapi/linux/vmcore.h @@ -15,4 +15,13 @@ struct vmcoredd_header { __u8 dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Device dump's name */ }; +enum hwerr_error_type { + HWERR_RECOV_CPU, + HWERR_RECOV_MEMORY, + HWERR_RECOV_PCI, + HWERR_RECOV_CXL, + HWERR_RECOV_OTHERS, + HWERR_RECOV_MAX, +}; + #endif /* _UAPI_VMCORE_H */ |
