diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-12 12:13:01 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-12 12:13:01 -0800 |
| commit | 136114e0abf03005e182d75761ab694648e6d388 (patch) | |
| tree | 05c61b103fc9cb72a7cae99680a4b524347e9616 /kernel/liveupdate | |
| parent | 4cff5c05e076d2ee4e34122aa956b84a2eaac587 (diff) | |
| parent | 0dddf20b4fd4afd59767acc144ad4da60259f21f (diff) | |
Merge tag 'mm-nonmm-stable-2026-02-12-10-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton:
- "ocfs2: give ocfs2 the ability to reclaim suballocator free bg" saves
disk space by teaching ocfs2 to reclaim suballocator block group
space (Heming Zhao)
- "Add ARRAY_END(), and use it to fix off-by-one bugs" adds the
ARRAY_END() macro and uses it in various places (Alejandro Colomar)
- "vmcoreinfo: support VMCOREINFO_BYTES larger than PAGE_SIZE" makes
the vmcore code future-safe, if VMCOREINFO_BYTES ever exceeds the
page size (Pnina Feder)
- "kallsyms: Prevent invalid access when showing module buildid" cleans
up kallsyms code related to module buildid and fixes an invalid
access crash when printing backtraces (Petr Mladek)
- "Address page fault in ima_restore_measurement_list()" fixes a
kexec-related crash that can occur when booting the second-stage
kernel on x86 (Harshit Mogalapalli)
- "kho: ABI headers and Documentation updates" updates the kexec
handover ABI documentation (Mike Rapoport)
- "Align atomic storage" adds the __aligned attribute to atomic_t and
atomic64_t definitions to get natural alignment of both types on
csky, m68k, microblaze, nios2, openrisc and sh (Finn Thain)
- "kho: clean up page initialization logic" simplifies the page
initialization logic in kho_restore_page() (Pratyush Yadav)
- "Unload linux/kernel.h" moves several things out of kernel.h and into
more appropriate places (Yury Norov)
- "don't abuse task_struct.group_leader" removes the usage of
->group_leader when it is "obviously unnecessary" (Oleg Nesterov)
- "list private v2 & luo flb" adds some infrastructure improvements to
the live update orchestrator (Pasha Tatashin)
* tag 'mm-nonmm-stable-2026-02-12-10-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (107 commits)
watchdog/hardlockup: simplify perf event probe and remove per-cpu dependency
procfs: fix missing RCU protection when reading real_parent in do_task_stat()
watchdog/softlockup: fix sample ring index wrap in need_counting_irqs()
kcsan, compiler_types: avoid duplicate type issues in BPF Type Format
kho: fix doc for kho_restore_pages()
tests/liveupdate: add in-kernel liveupdate test
liveupdate: luo_flb: introduce File-Lifecycle-Bound global state
liveupdate: luo_file: Use private list
list: add kunit test for private list primitives
list: add primitives for private list manipulations
delayacct: fix uapi timespec64 definition
panic: add panic_force_cpu= parameter to redirect panic to a specific CPU
netclassid: use thread_group_leader(p) in update_classid_task()
RDMA/umem: don't abuse current->group_leader
drm/pan*: don't abuse current->group_leader
drm/amd: kill the outdated "Only the pthreads threading model is supported" checks
drm/amdgpu: don't abuse current->group_leader
android/binder: use same_thread_group(proc->tsk, current) in binder_mmap()
android/binder: don't abuse current->group_leader
kho: skip memoryless NUMA nodes when reserving scratch areas
...
Diffstat (limited to 'kernel/liveupdate')
| -rw-r--r-- | kernel/liveupdate/Kconfig | 17 | ||||
| -rw-r--r-- | kernel/liveupdate/Makefile | 1 | ||||
| -rw-r--r-- | kernel/liveupdate/kexec_handover.c | 147 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_core.c | 10 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_file.c | 39 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_flb.c | 654 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_internal.h | 22 |
7 files changed, 801 insertions, 89 deletions
diff --git a/kernel/liveupdate/Kconfig b/kernel/liveupdate/Kconfig index d2aeaf13c3ac..1a8513f16ef7 100644 --- a/kernel/liveupdate/Kconfig +++ b/kernel/liveupdate/Kconfig @@ -54,7 +54,6 @@ config KEXEC_HANDOVER_ENABLE_DEFAULT config LIVEUPDATE bool "Live Update Orchestrator" depends on KEXEC_HANDOVER - depends on SHMEM help Enable the Live Update Orchestrator. Live Update is a mechanism, typically based on kexec, that allows the kernel to be updated @@ -73,4 +72,20 @@ config LIVEUPDATE If unsure, say N. +config LIVEUPDATE_MEMFD + bool "Live update support for memfd" + depends on LIVEUPDATE + depends on MEMFD_CREATE + depends on SHMEM + default LIVEUPDATE + help + Enable live update support for memfd regions. This allows preserving + memfd-backed memory across kernel live updates. + + This can be used to back VM memory with memfds, allowing the guest + memory to persist, or for other user workloads needing to preserve + pages. + + If unsure, say N. + endmenu diff --git a/kernel/liveupdate/Makefile b/kernel/liveupdate/Makefile index 7cad2eece32d..d2f779cbe279 100644 --- a/kernel/liveupdate/Makefile +++ b/kernel/liveupdate/Makefile @@ -3,6 +3,7 @@ luo-y := \ luo_core.o \ luo_file.o \ + luo_flb.o \ luo_session.o obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index 90d411a59f76..fb3a7b67676e 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -15,6 +15,7 @@ #include <linux/count_zeros.h> #include <linux/kexec.h> #include <linux/kexec_handover.h> +#include <linux/kho/abi/kexec_handover.h> #include <linux/libfdt.h> #include <linux/list.h> #include <linux/memblock.h> @@ -24,7 +25,6 @@ #include <asm/early_ioremap.h> -#include "kexec_handover_internal.h" /* * KHO is tightly coupled with mm init and needs access to some of mm * internal APIs. @@ -33,10 +33,7 @@ #include "../kexec_internal.h" #include "kexec_handover_internal.h" -#define KHO_FDT_COMPATIBLE "kho-v1" -#define PROP_PRESERVED_MEMORY_MAP "preserved-memory-map" -#define PROP_SUB_FDT "fdt" - +/* The magic token for preserved pages */ #define KHO_PAGE_MAGIC 0x4b484f50U /* ASCII for 'KHOP' */ /* @@ -219,10 +216,32 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, return 0; } +/* For physically contiguous 0-order pages. */ +static void kho_init_pages(struct page *page, unsigned long nr_pages) +{ + for (unsigned long i = 0; i < nr_pages; i++) + set_page_count(page + i, 1); +} + +static void kho_init_folio(struct page *page, unsigned int order) +{ + unsigned long nr_pages = (1 << order); + + /* Head page gets refcount of 1. */ + set_page_count(page, 1); + + /* For higher order folios, tail pages get a page count of zero. */ + for (unsigned long i = 1; i < nr_pages; i++) + set_page_count(page + i, 0); + + if (order > 0) + prep_compound_page(page, order); +} + static struct page *kho_restore_page(phys_addr_t phys, bool is_folio) { struct page *page = pfn_to_online_page(PHYS_PFN(phys)); - unsigned int nr_pages, ref_cnt; + unsigned long nr_pages; union kho_page_info info; if (!page) @@ -240,20 +259,11 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio) /* Clear private to make sure later restores on this page error out. */ page->private = 0; - /* Head page gets refcount of 1. */ - set_page_count(page, 1); - /* - * For higher order folios, tail pages get a page count of zero. - * For physically contiguous order-0 pages every pages gets a page - * count of 1 - */ - ref_cnt = is_folio ? 0 : 1; - for (unsigned int i = 1; i < nr_pages; i++) - set_page_count(page + i, ref_cnt); - - if (is_folio && info.order) - prep_compound_page(page, info.order); + if (is_folio) + kho_init_folio(page, info.order); + else + kho_init_pages(page, nr_pages); /* Always mark headpage's codetag as empty to avoid accounting mismatch */ clear_page_tag_ref(page); @@ -289,9 +299,9 @@ EXPORT_SYMBOL_GPL(kho_restore_folio); * Restore a contiguous list of order 0 pages that was preserved with * kho_preserve_pages(). * - * Return: 0 on success, error code on failure + * Return: the first page on success, NULL on failure. */ -struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages) +struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages) { const unsigned long start_pfn = PHYS_PFN(phys); const unsigned long end_pfn = start_pfn + nr_pages; @@ -386,7 +396,7 @@ static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk) void *ptr; u64 phys; - ptr = fdt_getprop_w(kho_out.fdt, 0, PROP_PRESERVED_MEMORY_MAP, NULL); + ptr = fdt_getprop_w(kho_out.fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, NULL); /* Check and discard previous memory map */ phys = get_unaligned((u64 *)ptr); @@ -474,7 +484,7 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt) const void *mem_ptr; int len; - mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len); + mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len); if (!mem_ptr || len != sizeof(u64)) { pr_err("failed to get preserved memory bitmaps\n"); return 0; @@ -645,11 +655,13 @@ static void __init kho_reserve_scratch(void) scratch_size_update(); /* FIXME: deal with node hot-plug/remove */ - kho_scratch_cnt = num_online_nodes() + 2; + kho_scratch_cnt = nodes_weight(node_states[N_MEMORY]) + 2; size = kho_scratch_cnt * sizeof(*kho_scratch); kho_scratch = memblock_alloc(size, PAGE_SIZE); - if (!kho_scratch) + if (!kho_scratch) { + pr_err("Failed to reserve scratch array\n"); goto err_disable_kho; + } /* * reserve scratch area in low memory for lowmem allocations in the @@ -658,8 +670,10 @@ static void __init kho_reserve_scratch(void) size = scratch_size_lowmem; addr = memblock_phys_alloc_range(size, CMA_MIN_ALIGNMENT_BYTES, 0, ARCH_LOW_ADDRESS_LIMIT); - if (!addr) + if (!addr) { + pr_err("Failed to reserve lowmem scratch buffer\n"); goto err_free_scratch_desc; + } kho_scratch[i].addr = addr; kho_scratch[i].size = size; @@ -668,20 +682,28 @@ static void __init kho_reserve_scratch(void) /* reserve large contiguous area for allocations without nid */ size = scratch_size_global; addr = memblock_phys_alloc(size, CMA_MIN_ALIGNMENT_BYTES); - if (!addr) + if (!addr) { + pr_err("Failed to reserve global scratch buffer\n"); goto err_free_scratch_areas; + } kho_scratch[i].addr = addr; kho_scratch[i].size = size; i++; - for_each_online_node(nid) { + /* + * Loop over nodes that have both memory and are online. Skip + * memoryless nodes, as we can not allocate scratch areas there. + */ + for_each_node_state(nid, N_MEMORY) { size = scratch_size_node(nid); addr = memblock_alloc_range_nid(size, CMA_MIN_ALIGNMENT_BYTES, 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid, true); - if (!addr) + if (!addr) { + pr_err("Failed to reserve nid %d scratch buffer\n", nid); goto err_free_scratch_areas; + } kho_scratch[i].addr = addr; kho_scratch[i].size = size; @@ -735,7 +757,8 @@ int kho_add_subtree(const char *name, void *fdt) goto out_pack; } - err = fdt_setprop(root_fdt, off, PROP_SUB_FDT, &phys, sizeof(phys)); + err = fdt_setprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, + &phys, sizeof(phys)); if (err < 0) goto out_pack; @@ -766,7 +789,7 @@ void kho_remove_subtree(void *fdt) const u64 *val; int len; - val = fdt_getprop(root_fdt, off, PROP_SUB_FDT, &len); + val = fdt_getprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, &len); if (!val || len != sizeof(phys_addr_t)) continue; @@ -831,7 +854,7 @@ EXPORT_SYMBOL_GPL(kho_unpreserve_folio); * * Return: 0 on success, error code on failure */ -int kho_preserve_pages(struct page *page, unsigned int nr_pages) +int kho_preserve_pages(struct page *page, unsigned long nr_pages) { struct kho_mem_track *track = &kho_out.track; const unsigned long start_pfn = page_to_pfn(page); @@ -875,7 +898,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_pages); * kho_preserve_pages() call. Unpreserving arbitrary sub-ranges of larger * preserved blocks is not supported. */ -void kho_unpreserve_pages(struct page *page, unsigned int nr_pages) +void kho_unpreserve_pages(struct page *page, unsigned long nr_pages) { struct kho_mem_track *track = &kho_out.track; const unsigned long start_pfn = page_to_pfn(page); @@ -885,21 +908,6 @@ void kho_unpreserve_pages(struct page *page, unsigned int nr_pages) } EXPORT_SYMBOL_GPL(kho_unpreserve_pages); -struct kho_vmalloc_hdr { - DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *); -}; - -#define KHO_VMALLOC_SIZE \ - ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \ - sizeof(phys_addr_t)) - -struct kho_vmalloc_chunk { - struct kho_vmalloc_hdr hdr; - phys_addr_t phys[KHO_VMALLOC_SIZE]; -}; - -static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE); - /* vmalloc flags KHO supports */ #define KHO_VMALLOC_SUPPORTED_FLAGS (VM_ALLOC | VM_ALLOW_HUGE_VMAP) @@ -1315,7 +1323,7 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys) if (offset < 0) return -ENOENT; - val = fdt_getprop(fdt, offset, PROP_SUB_FDT, &len); + val = fdt_getprop(fdt, offset, KHO_FDT_SUB_TREE_PROP_NAME, &len); if (!val || len != sizeof(*val)) return -EINVAL; @@ -1335,7 +1343,7 @@ static __init int kho_out_fdt_setup(void) err |= fdt_finish_reservemap(root); err |= fdt_begin_node(root, ""); err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE); - err |= fdt_property(root, PROP_PRESERVED_MEMORY_MAP, &empty_mem_map, + err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, &empty_mem_map, sizeof(empty_mem_map)); err |= fdt_end_node(root); err |= fdt_finish(root); @@ -1451,46 +1459,40 @@ void __init kho_memory_init(void) void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys, u64 scratch_len) { + unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch); struct kho_scratch *scratch = NULL; phys_addr_t mem_map_phys; void *fdt = NULL; - int err = 0; - unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch); + int err; /* Validate the input FDT */ fdt = early_memremap(fdt_phys, fdt_len); if (!fdt) { pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys); - err = -EFAULT; - goto out; + goto err_report; } err = fdt_check_header(fdt); if (err) { pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n", fdt_phys, err); - err = -EINVAL; - goto out; + goto err_unmap_fdt; } err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE); if (err) { pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n", fdt_phys, KHO_FDT_COMPATIBLE, err); - err = -EINVAL; - goto out; + goto err_unmap_fdt; } mem_map_phys = kho_get_mem_map_phys(fdt); - if (!mem_map_phys) { - err = -ENOENT; - goto out; - } + if (!mem_map_phys) + goto err_unmap_fdt; scratch = early_memremap(scratch_phys, scratch_len); if (!scratch) { pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n", scratch_phys, scratch_len); - err = -EFAULT; - goto out; + goto err_unmap_fdt; } /* @@ -1507,7 +1509,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, if (WARN_ON(err)) { pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe", &area->addr, &size, ERR_PTR(err)); - goto out; + goto err_unmap_scratch; } pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size); } @@ -1529,13 +1531,14 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, kho_scratch_cnt = scratch_cnt; pr_info("found kexec handover data.\n"); -out: - if (fdt) - early_memunmap(fdt, fdt_len); - if (scratch) - early_memunmap(scratch, scratch_len); - if (err) - pr_warn("disabling KHO revival: %d\n", err); + return; + +err_unmap_scratch: + early_memunmap(scratch, scratch_len); +err_unmap_fdt: + early_memunmap(fdt, fdt_len); +err_report: + pr_warn("disabling KHO revival\n"); } /* Helper functions for kexec_file_load */ diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c index 944663d99dd9..dda7bb57d421 100644 --- a/kernel/liveupdate/luo_core.c +++ b/kernel/liveupdate/luo_core.c @@ -35,8 +35,7 @@ * iommu, interrupts, vfio, participating filesystems, and memory management. * * LUO uses Kexec Handover to transfer memory state from the current kernel to - * the next kernel. For more details see - * Documentation/core-api/kho/concepts.rst. + * the next kernel. For more details see Documentation/core-api/kho/index.rst. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -128,7 +127,9 @@ static int __init luo_early_startup(void) if (err) return err; - return 0; + err = luo_flb_setup_incoming(luo_global.fdt_in); + + return err; } static int __init liveupdate_early_init(void) @@ -165,6 +166,7 @@ static int __init luo_fdt_setup(void) err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE); err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln)); err |= luo_session_setup_outgoing(fdt_out); + err |= luo_flb_setup_outgoing(fdt_out); err |= fdt_end_node(fdt_out); err |= fdt_finish(fdt_out); if (err) @@ -226,6 +228,8 @@ int liveupdate_reboot(void) if (err) return err; + luo_flb_serialize(); + err = kho_finalize(); if (err) { pr_err("kho_finalize failed %d\n", err); diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c index 9f7283379ebc..4c7df52a6507 100644 --- a/kernel/liveupdate/luo_file.c +++ b/kernel/liveupdate/luo_file.c @@ -104,6 +104,7 @@ #include <linux/io.h> #include <linux/kexec_handover.h> #include <linux/kho/abi/luo.h> +#include <linux/list_private.h> #include <linux/liveupdate.h> #include <linux/module.h> #include <linux/sizes.h> @@ -273,7 +274,7 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) goto err_fput; err = -ENOENT; - luo_list_for_each_private(fh, &luo_file_handler_list, list) { + list_private_for_each_entry(fh, &luo_file_handler_list, list) { if (fh->ops->can_preserve(fh, file)) { err = 0; break; @@ -284,10 +285,14 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) if (err) goto err_free_files_mem; + err = luo_flb_file_preserve(fh); + if (err) + goto err_free_files_mem; + luo_file = kzalloc(sizeof(*luo_file), GFP_KERNEL); if (!luo_file) { err = -ENOMEM; - goto err_free_files_mem; + goto err_flb_unpreserve; } luo_file->file = file; @@ -311,6 +316,8 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) err_kfree: kfree(luo_file); +err_flb_unpreserve: + luo_flb_file_unpreserve(fh); err_free_files_mem: luo_free_files_mem(file_set); err_fput: @@ -352,6 +359,7 @@ void luo_file_unpreserve_files(struct luo_file_set *file_set) args.serialized_data = luo_file->serialized_data; args.private_data = luo_file->private_data; luo_file->fh->ops->unpreserve(&args); + luo_flb_file_unpreserve(luo_file->fh); list_del(&luo_file->list); file_set->count--; @@ -627,6 +635,7 @@ static void luo_file_finish_one(struct luo_file_set *file_set, args.retrieved = luo_file->retrieved; luo_file->fh->ops->finish(&args); + luo_flb_file_finish(luo_file->fh); } /** @@ -758,7 +767,7 @@ int luo_file_deserialize(struct luo_file_set *file_set, bool handler_found = false; struct luo_file *luo_file; - luo_list_for_each_private(fh, &luo_file_handler_list, list) { + list_private_for_each_entry(fh, &luo_file_handler_list, list) { if (!strcmp(fh->compatible, file_ser[i].compatible)) { handler_found = true; break; @@ -833,7 +842,7 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh) return -EBUSY; /* Check for duplicate compatible strings */ - luo_list_for_each_private(fh_iter, &luo_file_handler_list, list) { + list_private_for_each_entry(fh_iter, &luo_file_handler_list, list) { if (!strcmp(fh_iter->compatible, fh->compatible)) { pr_err("File handler registration failed: Compatible string '%s' already registered.\n", fh->compatible); @@ -848,10 +857,13 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh) goto err_resume; } + INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, flb_list)); INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, list)); list_add_tail(&ACCESS_PRIVATE(fh, list), &luo_file_handler_list); luo_session_resume(); + liveupdate_test_register(fh); + return 0; err_resume: @@ -868,23 +880,38 @@ err_resume: * * It ensures safe removal by checking that: * No live update session is currently in progress. + * No FLB registered with this file handler. * * If the unregistration fails, the internal test state is reverted. * * Return: 0 Success. -EOPNOTSUPP when live update is not enabled. -EBUSY A live - * update is in progress, can't quiesce live update. + * update is in progress, can't quiesce live update or FLB is registred with + * this file handler. */ int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh) { + int err = -EBUSY; + if (!liveupdate_enabled()) return -EOPNOTSUPP; + liveupdate_test_unregister(fh); + if (!luo_session_quiesce()) - return -EBUSY; + goto err_register; + + if (!list_empty(&ACCESS_PRIVATE(fh, flb_list))) + goto err_resume; list_del(&ACCESS_PRIVATE(fh, list)); module_put(fh->ops->owner); luo_session_resume(); return 0; + +err_resume: + luo_session_resume(); +err_register: + liveupdate_test_register(fh); + return err; } diff --git a/kernel/liveupdate/luo_flb.c b/kernel/liveupdate/luo_flb.c new file mode 100644 index 000000000000..4c437de5c0b0 --- /dev/null +++ b/kernel/liveupdate/luo_flb.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +/** + * DOC: LUO File Lifecycle Bound Global Data + * + * File-Lifecycle-Bound (FLB) objects provide a mechanism for managing global + * state that is shared across multiple live-updatable files. The lifecycle of + * this shared state is tied to the preservation of the files that depend on it. + * + * An FLB represents a global resource, such as the IOMMU core state, that is + * required by multiple file descriptors (e.g., all VFIO fds). + * + * The preservation of the FLB's state is triggered when the *first* file + * depending on it is preserved. The cleanup of this state (unpreserve or + * finish) is triggered when the *last* file depending on it is unpreserved or + * finished. + * + * Handler Dependency: A file handler declares its dependency on one or more + * FLBs by registering them via liveupdate_register_flb(). + * + * Callback Model: Each FLB is defined by a set of operations + * (&struct liveupdate_flb_ops) that LUO invokes at key points: + * + * - .preserve(): Called for the first file. Saves global state. + * - .unpreserve(): Called for the last file (if aborted pre-reboot). + * - .retrieve(): Called on-demand in the new kernel to restore the state. + * - .finish(): Called for the last file in the new kernel for cleanup. + * + * This reference-counted approach ensures that shared state is saved exactly + * once and restored exactly once, regardless of how many files depend on it, + * and that its lifecycle is correctly managed across the kexec transition. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/cleanup.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/kexec_handover.h> +#include <linux/kho/abi/luo.h> +#include <linux/libfdt.h> +#include <linux/list_private.h> +#include <linux/liveupdate.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/unaligned.h> +#include "luo_internal.h" + +#define LUO_FLB_PGCNT 1ul +#define LUO_FLB_MAX (((LUO_FLB_PGCNT << PAGE_SHIFT) - \ + sizeof(struct luo_flb_header_ser)) / sizeof(struct luo_flb_ser)) + +struct luo_flb_header { + struct luo_flb_header_ser *header_ser; + struct luo_flb_ser *ser; + bool active; +}; + +struct luo_flb_global { + struct luo_flb_header incoming; + struct luo_flb_header outgoing; + struct list_head list; + long count; +}; + +static struct luo_flb_global luo_flb_global = { + .list = LIST_HEAD_INIT(luo_flb_global.list), +}; + +/* + * struct luo_flb_link - Links an FLB definition to a file handler's internal + * list of dependencies. + * @flb: A pointer to the registered &struct liveupdate_flb definition. + * @list: The list_head for linking. + */ +struct luo_flb_link { + struct liveupdate_flb *flb; + struct list_head list; +}; + +/* luo_flb_get_private - Access private field, and if needed initialize it. */ +static struct luo_flb_private *luo_flb_get_private(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = &ACCESS_PRIVATE(flb, private); + + if (!private->initialized) { + mutex_init(&private->incoming.lock); + mutex_init(&private->outgoing.lock); + INIT_LIST_HEAD(&private->list); + private->users = 0; + private->initialized = true; + } + + return private; +} + +static int luo_flb_file_preserve_one(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + + scoped_guard(mutex, &private->outgoing.lock) { + if (!private->outgoing.count) { + struct liveupdate_flb_op_args args = {0}; + int err; + + args.flb = flb; + err = flb->ops->preserve(&args); + if (err) + return err; + private->outgoing.data = args.data; + private->outgoing.obj = args.obj; + } + private->outgoing.count++; + } + + return 0; +} + +static void luo_flb_file_unpreserve_one(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + + scoped_guard(mutex, &private->outgoing.lock) { + private->outgoing.count--; + if (!private->outgoing.count) { + struct liveupdate_flb_op_args args = {0}; + + args.flb = flb; + args.data = private->outgoing.data; + args.obj = private->outgoing.obj; + + if (flb->ops->unpreserve) + flb->ops->unpreserve(&args); + + private->outgoing.data = 0; + private->outgoing.obj = NULL; + } + } +} + +static int luo_flb_retrieve_one(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + struct luo_flb_header *fh = &luo_flb_global.incoming; + struct liveupdate_flb_op_args args = {0}; + bool found = false; + int err; + + guard(mutex)(&private->incoming.lock); + + if (private->incoming.finished) + return -ENODATA; + + if (private->incoming.retrieved) + return 0; + + if (!fh->active) + return -ENODATA; + + for (int i = 0; i < fh->header_ser->count; i++) { + if (!strcmp(fh->ser[i].name, flb->compatible)) { + private->incoming.data = fh->ser[i].data; + private->incoming.count = fh->ser[i].count; + found = true; + break; + } + } + + if (!found) + return -ENOENT; + + args.flb = flb; + args.data = private->incoming.data; + + err = flb->ops->retrieve(&args); + if (err) + return err; + + private->incoming.obj = args.obj; + private->incoming.retrieved = true; + + return 0; +} + +static void luo_flb_file_finish_one(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + u64 count; + + scoped_guard(mutex, &private->incoming.lock) + count = --private->incoming.count; + + if (!count) { + struct liveupdate_flb_op_args args = {0}; + + if (!private->incoming.retrieved) { + int err = luo_flb_retrieve_one(flb); + + if (WARN_ON(err)) + return; + } + + scoped_guard(mutex, &private->incoming.lock) { + args.flb = flb; + args.obj = private->incoming.obj; + flb->ops->finish(&args); + + private->incoming.data = 0; + private->incoming.obj = NULL; + private->incoming.finished = true; + } + } +} + +/** + * luo_flb_file_preserve - Notifies FLBs that a file is about to be preserved. + * @fh: The file handler for the preserved file. + * + * This function iterates through all FLBs associated with the given file + * handler. It increments the reference count for each FLB. If the count becomes + * 1, it triggers the FLB's .preserve() callback to save the global state. + * + * This operation is atomic. If any FLB's .preserve() op fails, it will roll + * back by calling .unpreserve() on any FLBs that were successfully preserved + * during this call. + * + * Context: Called from luo_preserve_file() + * Return: 0 on success, or a negative errno on failure. + */ +int luo_flb_file_preserve(struct liveupdate_file_handler *fh) +{ + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + int err = 0; + + list_for_each_entry(iter, flb_list, list) { + err = luo_flb_file_preserve_one(iter->flb); + if (err) + goto exit_err; + } + + return 0; + +exit_err: + list_for_each_entry_continue_reverse(iter, flb_list, list) + luo_flb_file_unpreserve_one(iter->flb); + + return err; +} + +/** + * luo_flb_file_unpreserve - Notifies FLBs that a dependent file was unpreserved. + * @fh: The file handler for the unpreserved file. + * + * This function iterates through all FLBs associated with the given file + * handler, in reverse order of registration. It decrements the reference count + * for each FLB. If the count becomes 0, it triggers the FLB's .unpreserve() + * callback to clean up the global state. + * + * Context: Called when a preserved file is being cleaned up before reboot + * (e.g., from luo_file_unpreserve_files()). + */ +void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh) +{ + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + + list_for_each_entry_reverse(iter, flb_list, list) + luo_flb_file_unpreserve_one(iter->flb); +} + +/** + * luo_flb_file_finish - Notifies FLBs that a dependent file has been finished. + * @fh: The file handler for the finished file. + * + * This function iterates through all FLBs associated with the given file + * handler, in reverse order of registration. It decrements the incoming + * reference count for each FLB. If the count becomes 0, it triggers the FLB's + * .finish() callback for final cleanup in the new kernel. + * + * Context: Called from luo_file_finish() for each file being finished. + */ +void luo_flb_file_finish(struct liveupdate_file_handler *fh) +{ + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + + list_for_each_entry_reverse(iter, flb_list, list) + luo_flb_file_finish_one(iter->flb); +} + +/** + * liveupdate_register_flb - Associate an FLB with a file handler and register it globally. + * @fh: The file handler that will now depend on the FLB. + * @flb: The File-Lifecycle-Bound object to associate. + * + * Establishes a dependency, informing the LUO core that whenever a file of + * type @fh is preserved, the state of @flb must also be managed. + * + * On the first registration of a given @flb object, it is added to a global + * registry. This function checks for duplicate registrations, both for a + * specific handler and globally, and ensures the total number of unique + * FLBs does not exceed the system limit. + * + * Context: Typically called from a subsystem's module init function after + * both the handler and the FLB have been defined and initialized. + * Return: 0 on success. Returns a negative errno on failure: + * -EINVAL if arguments are NULL or not initialized. + * -ENOMEM on memory allocation failure. + * -EEXIST if this FLB is already registered with this handler. + * -ENOSPC if the maximum number of global FLBs has been reached. + * -EOPNOTSUPP if live update is disabled or not configured. + */ +int liveupdate_register_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *link __free(kfree) = NULL; + struct liveupdate_flb *gflb; + struct luo_flb_link *iter; + int err; + + if (!liveupdate_enabled()) + return -EOPNOTSUPP; + + if (WARN_ON(!flb->ops->preserve || !flb->ops->unpreserve || + !flb->ops->retrieve || !flb->ops->finish)) { + return -EINVAL; + } + + /* + * File handler must already be registered, as it initializes the + * flb_list + */ + if (WARN_ON(list_empty(&ACCESS_PRIVATE(fh, list)))) + return -EINVAL; + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + /* + * Ensure the system is quiescent (no active sessions). + * This acts as a global lock for registration: no other thread can + * be in this section, and no sessions can be creating/using FDs. + */ + if (!luo_session_quiesce()) + return -EBUSY; + + /* Check that this FLB is not already linked to this file handler */ + err = -EEXIST; + list_for_each_entry(iter, flb_list, list) { + if (iter->flb == flb) + goto err_resume; + } + + /* + * If this FLB is not linked to global list it's the first time the FLB + * is registered + */ + if (!private->users) { + if (WARN_ON(!list_empty(&private->list))) { + err = -EINVAL; + goto err_resume; + } + + if (luo_flb_global.count == LUO_FLB_MAX) { + err = -ENOSPC; + goto err_resume; + } + + /* Check that compatible string is unique in global list */ + list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) { + if (!strcmp(gflb->compatible, flb->compatible)) + goto err_resume; + } + + if (!try_module_get(flb->ops->owner)) { + err = -EAGAIN; + goto err_resume; + } + + list_add_tail(&private->list, &luo_flb_global.list); + luo_flb_global.count++; + } + + /* Finally, link the FLB to the file handler */ + private->users++; + link->flb = flb; + list_add_tail(&no_free_ptr(link)->list, flb_list); + luo_session_resume(); + + return 0; + +err_resume: + luo_session_resume(); + return err; +} + +/** + * liveupdate_unregister_flb - Remove an FLB dependency from a file handler. + * @fh: The file handler that is currently depending on the FLB. + * @flb: The File-Lifecycle-Bound object to remove. + * + * Removes the association between the specified file handler and the FLB + * previously established by liveupdate_register_flb(). + * + * This function manages the global lifecycle of the FLB. It decrements the + * FLB's usage count. If this was the last file handler referencing this FLB, + * the FLB is removed from the global registry and the reference to its + * owner module (acquired during registration) is released. + * + * Context: This function ensures the session is quiesced (no active FDs + * being created) during the update. It is typically called from a + * subsystem's module exit function. + * Return: 0 on success. + * -EOPNOTSUPP if live update is disabled. + * -EBUSY if the live update session is active and cannot be quiesced. + * -ENOENT if the FLB was not found in the file handler's list. + */ +int liveupdate_unregister_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + int err = -ENOENT; + + if (!liveupdate_enabled()) + return -EOPNOTSUPP; + + /* + * Ensure the system is quiescent (no active sessions). + * This acts as a global lock for unregistration. + */ + if (!luo_session_quiesce()) + return -EBUSY; + + /* Find and remove the link from the file handler's list */ + list_for_each_entry(iter, flb_list, list) { + if (iter->flb == flb) { + list_del(&iter->list); + kfree(iter); + err = 0; + break; + } + } + + if (err) + goto err_resume; + + private->users--; + /* + * If this is the last file-handler with which we are registred, remove + * from the global list, and relese module reference. + */ + if (!private->users) { + list_del_init(&private->list); + luo_flb_global.count--; + module_put(flb->ops->owner); + } + + luo_session_resume(); + + return 0; + +err_resume: + luo_session_resume(); + return err; +} + +/** + * liveupdate_flb_get_incoming - Retrieve the incoming FLB object. + * @flb: The FLB definition. + * @objp: Output parameter; will be populated with the live shared object. + * + * Returns a pointer to its shared live object for the incoming (post-reboot) + * path. + * + * If this is the first time the object is requested in the new kernel, this + * function will trigger the FLB's .retrieve() callback to reconstruct the + * object from its preserved state. Subsequent calls will return the same + * cached object. + * + * Return: 0 on success, or a negative errno on failure. -ENODATA means no + * incoming FLB data, -ENOENT means specific flb not found in the incoming + * data, and -EOPNOTSUPP when live update is disabled or not configured. + */ +int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, void **objp) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + + if (!liveupdate_enabled()) + return -EOPNOTSUPP; + + if (!private->incoming.obj) { + int err = luo_flb_retrieve_one(flb); + + if (err) + return err; + } + + guard(mutex)(&private->incoming.lock); + *objp = private->incoming.obj; + + return 0; +} + +/** + * liveupdate_flb_get_outgoing - Retrieve the outgoing FLB object. + * @flb: The FLB definition. + * @objp: Output parameter; will be populated with the live shared object. + * + * Returns a pointer to its shared live object for the outgoing (pre-reboot) + * path. + * + * This function assumes the object has already been created by the FLB's + * .preserve() callback, which is triggered when the first dependent file + * is preserved. + * + * Return: 0 on success, or a negative errno on failure. + */ +int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb, void **objp) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + + if (!liveupdate_enabled()) + return -EOPNOTSUPP; + + guard(mutex)(&private->outgoing.lock); + *objp = private->outgoing.obj; + + return 0; +} + +int __init luo_flb_setup_outgoing(void *fdt_out) +{ + struct luo_flb_header_ser *header_ser; + u64 header_ser_pa; + int err; + + header_ser = kho_alloc_preserve(LUO_FLB_PGCNT << PAGE_SHIFT); + if (IS_ERR(header_ser)) + return PTR_ERR(header_ser); + + header_ser_pa = virt_to_phys(header_ser); + + err = fdt_begin_node(fdt_out, LUO_FDT_FLB_NODE_NAME); + err |= fdt_property_string(fdt_out, "compatible", + LUO_FDT_FLB_COMPATIBLE); + err |= fdt_property(fdt_out, LUO_FDT_FLB_HEADER, &header_ser_pa, + sizeof(header_ser_pa)); + err |= fdt_end_node(fdt_out); + + if (err) + goto err_unpreserve; + + header_ser->pgcnt = LUO_FLB_PGCNT; + luo_flb_global.outgoing.header_ser = header_ser; + luo_flb_global.outgoing.ser = (void *)(header_ser + 1); + luo_flb_global.outgoing.active = true; + + return 0; + +err_unpreserve: + kho_unpreserve_free(header_ser); + + return err; +} + +int __init luo_flb_setup_incoming(void *fdt_in) +{ + struct luo_flb_header_ser *header_ser; + int err, header_size, offset; + const void *ptr; + u64 header_ser_pa; + + offset = fdt_subnode_offset(fdt_in, 0, LUO_FDT_FLB_NODE_NAME); + if (offset < 0) { + pr_err("Unable to get FLB node [%s]\n", LUO_FDT_FLB_NODE_NAME); + + return -ENOENT; + } + + err = fdt_node_check_compatible(fdt_in, offset, + LUO_FDT_FLB_COMPATIBLE); + if (err) { + pr_err("FLB node is incompatible with '%s' [%d]\n", + LUO_FDT_FLB_COMPATIBLE, err); + + return -EINVAL; + } + + header_size = 0; + ptr = fdt_getprop(fdt_in, offset, LUO_FDT_FLB_HEADER, &header_size); + if (!ptr || header_size != sizeof(u64)) { + pr_err("Unable to get FLB header property '%s' [%d]\n", + LUO_FDT_FLB_HEADER, header_size); + + return -EINVAL; + } + + header_ser_pa = get_unaligned((u64 *)ptr); + header_ser = phys_to_virt(header_ser_pa); + + luo_flb_global.incoming.header_ser = header_ser; + luo_flb_global.incoming.ser = (void *)(header_ser + 1); + luo_flb_global.incoming.active = true; + + return 0; +} + +/** + * luo_flb_serialize - Serializes all active FLB objects for KHO. + * + * This function is called from the reboot path. It iterates through all + * registered File-Lifecycle-Bound (FLB) objects. For each FLB that has been + * preserved (i.e., its reference count is greater than zero), it writes its + * metadata into the memory region designated for Kexec Handover. + * + * The serialized data includes the FLB's compatibility string, its opaque + * data handle, and the final reference count. This allows the new kernel to + * find the appropriate handler and reconstruct the FLB's state. + * + * Context: Called from liveupdate_reboot() just before kho_finalize(). + */ +void luo_flb_serialize(void) +{ + struct luo_flb_header *fh = &luo_flb_global.outgoing; + struct liveupdate_flb *gflb; + int i = 0; + + list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) { + struct luo_flb_private *private = luo_flb_get_private(gflb); + + if (private->outgoing.count > 0) { + strscpy(fh->ser[i].name, gflb->compatible, + sizeof(fh->ser[i].name)); + fh->ser[i].data = private->outgoing.data; + fh->ser[i].count = private->outgoing.count; + i++; + } + } + + fh->header_ser->count = i; +} diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h index c8973b543d1d..8083d8739b09 100644 --- a/kernel/liveupdate/luo_internal.h +++ b/kernel/liveupdate/luo_internal.h @@ -40,13 +40,6 @@ static inline int luo_ucmd_respond(struct luo_ucmd *ucmd, */ #define luo_restore_fail(__fmt, ...) panic(__fmt, ##__VA_ARGS__) -/* Mimics list_for_each_entry() but for private list head entries */ -#define luo_list_for_each_private(pos, head, member) \ - for (struct list_head *__iter = (head)->next; \ - __iter != (head) && \ - ({ pos = container_of(__iter, typeof(*(pos)), member); 1; }); \ - __iter = __iter->next) - /** * struct luo_file_set - A set of files that belong to the same sessions. * @files_list: An ordered list of files associated with this session, it is @@ -107,4 +100,19 @@ int luo_file_deserialize(struct luo_file_set *file_set, void luo_file_set_init(struct luo_file_set *file_set); void luo_file_set_destroy(struct luo_file_set *file_set); +int luo_flb_file_preserve(struct liveupdate_file_handler *fh); +void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh); +void luo_flb_file_finish(struct liveupdate_file_handler *fh); +int __init luo_flb_setup_outgoing(void *fdt); +int __init luo_flb_setup_incoming(void *fdt); +void luo_flb_serialize(void); + +#ifdef CONFIG_LIVEUPDATE_TEST +void liveupdate_test_register(struct liveupdate_file_handler *fh); +void liveupdate_test_unregister(struct liveupdate_file_handler *fh); +#else +static inline void liveupdate_test_register(struct liveupdate_file_handler *fh) { } +static inline void liveupdate_test_unregister(struct liveupdate_file_handler *fh) { } +#endif + #endif /* _LINUX_LUO_INTERNAL_H */ |
