diff options
Diffstat (limited to 'kernel/liveupdate')
| -rw-r--r-- | kernel/liveupdate/kexec_handover.c | 837 | ||||
| -rw-r--r-- | kernel/liveupdate/kexec_handover_debugfs.c | 79 | ||||
| -rw-r--r-- | kernel/liveupdate/kexec_handover_internal.h | 18 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_core.c | 23 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_file.c | 112 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_flb.c | 182 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_internal.h | 7 | ||||
| -rw-r--r-- | kernel/liveupdate/luo_session.c | 55 |
8 files changed, 713 insertions, 600 deletions
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index cc68a3692905..94762de1fe5f 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -5,6 +5,7 @@ * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org> * Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com> * Copyright (C) 2025 Pasha Tatashin <pasha.tatashin@soleen.com> + * Copyright (C) 2026 Google LLC, Jason Miu <jasonmiu@google.com> */ #define pr_fmt(fmt) "KHO: " fmt @@ -13,9 +14,13 @@ #include <linux/cma.h> #include <linux/kmemleak.h> #include <linux/count_zeros.h> +#include <linux/kasan.h> #include <linux/kexec.h> #include <linux/kexec_handover.h> +#include <linux/kho_radix_tree.h> +#include <linux/utsname.h> #include <linux/kho/abi/kexec_handover.h> +#include <linux/kho/abi/kexec_metadata.h> #include <linux/libfdt.h> #include <linux/list.h> #include <linux/memblock.h> @@ -64,163 +69,316 @@ static int __init kho_parse_enable(char *p) } early_param("kho", kho_parse_enable); -/* - * Keep track of memory that is to be preserved across KHO. - * - * The serializing side uses two levels of xarrays to manage chunks of per-order - * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order - * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0 - * allocations each bitmap will cover 128M of address space. Thus, for 16G of - * memory at most 512K of bitmap memory will be needed for order 0. - * - * This approach is fully incremental, as the serialization progresses folios - * can continue be aggregated to the tracker. The final step, immediately prior - * to kexec would serialize the xarray information into a linked list for the - * successor kernel to parse. - */ - -#define PRESERVE_BITS (PAGE_SIZE * 8) - -struct kho_mem_phys_bits { - DECLARE_BITMAP(preserve, PRESERVE_BITS); -}; - -static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE); - -struct kho_mem_phys { - /* - * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized - * to order. - */ - struct xarray phys_bits; -}; - -struct kho_mem_track { - /* Points to kho_mem_phys, each order gets its own bitmap tree */ - struct xarray orders; -}; - -struct khoser_mem_chunk; - struct kho_out { void *fdt; - bool finalized; - struct mutex lock; /* protects KHO FDT finalization */ + struct mutex lock; /* protects KHO FDT */ - struct kho_mem_track track; + struct kho_radix_tree radix_tree; struct kho_debugfs dbg; }; static struct kho_out kho_out = { .lock = __MUTEX_INITIALIZER(kho_out.lock), - .track = { - .orders = XARRAY_INIT(kho_out.track.orders, 0), + .radix_tree = { + .lock = __MUTEX_INITIALIZER(kho_out.radix_tree.lock), }, - .finalized = false, }; -static void *xa_load_or_alloc(struct xarray *xa, unsigned long index) +/** + * kho_radix_encode_key - Encodes a physical address and order into a radix key. + * @phys: The physical address of the page. + * @order: The order of the page. + * + * This function combines a page's physical address and its order into a + * single unsigned long, which is used as a key for all radix tree + * operations. + * + * Return: The encoded unsigned long radix key. + */ +static unsigned long kho_radix_encode_key(phys_addr_t phys, unsigned int order) { - void *res = xa_load(xa, index); + /* Order bits part */ + unsigned long h = 1UL << (KHO_ORDER_0_LOG2 - order); + /* Shifted physical address part */ + unsigned long l = phys >> (PAGE_SHIFT + order); - if (res) - return res; + return h | l; +} - void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL); +/** + * kho_radix_decode_key - Decodes a radix key back into a physical address and order. + * @key: The unsigned long key to decode. + * @order: An output parameter, a pointer to an unsigned int where the decoded + * page order will be stored. + * + * This function reverses the encoding performed by kho_radix_encode_key(), + * extracting the original physical address and page order from a given key. + * + * Return: The decoded physical address. + */ +static phys_addr_t kho_radix_decode_key(unsigned long key, unsigned int *order) +{ + unsigned int order_bit = fls64(key); + phys_addr_t phys; - if (!elm) - return ERR_PTR(-ENOMEM); + /* order_bit is numbered starting at 1 from fls64 */ + *order = KHO_ORDER_0_LOG2 - order_bit + 1; + /* The order is discarded by the shift */ + phys = key << (PAGE_SHIFT + *order); - if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE))) - return ERR_PTR(-EINVAL); + return phys; +} + +static unsigned long kho_radix_get_bitmap_index(unsigned long key) +{ + return key % (1 << KHO_BITMAP_SIZE_LOG2); +} - res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL); - if (xa_is_err(res)) - return ERR_PTR(xa_err(res)); - else if (res) - return res; +static unsigned long kho_radix_get_table_index(unsigned long key, + unsigned int level) +{ + int s; - return no_free_ptr(elm); + s = ((level - 1) * KHO_TABLE_SIZE_LOG2) + KHO_BITMAP_SIZE_LOG2; + return (key >> s) % (1 << KHO_TABLE_SIZE_LOG2); } -static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn, - unsigned int order) +/** + * kho_radix_add_page - Marks a page as preserved in the radix tree. + * @tree: The KHO radix tree. + * @pfn: The page frame number of the page to preserve. + * @order: The order of the page. + * + * This function traverses the radix tree based on the key derived from @pfn + * and @order. It sets the corresponding bit in the leaf bitmap to mark the + * page for preservation. If intermediate nodes do not exist along the path, + * they are allocated and added to the tree. + * + * Return: 0 on success, or a negative error code on failure. + */ +int kho_radix_add_page(struct kho_radix_tree *tree, + unsigned long pfn, unsigned int order) { - struct kho_mem_phys_bits *bits; - struct kho_mem_phys *physxa; - const unsigned long pfn_high = pfn >> order; + /* Newly allocated nodes for error cleanup */ + struct kho_radix_node *intermediate_nodes[KHO_TREE_MAX_DEPTH] = { 0 }; + unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order); + struct kho_radix_node *anchor_node = NULL; + struct kho_radix_node *node = tree->root; + struct kho_radix_node *new_node; + unsigned int i, idx, anchor_idx; + struct kho_radix_leaf *leaf; + int err = 0; - physxa = xa_load(&track->orders, order); - if (WARN_ON_ONCE(!physxa)) - return; + if (WARN_ON_ONCE(!tree->root)) + return -EINVAL; - bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS); - if (WARN_ON_ONCE(!bits)) - return; + might_sleep(); + + guard(mutex)(&tree->lock); + + /* Go from high levels to low levels */ + for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { + idx = kho_radix_get_table_index(key, i); - clear_bit(pfn_high % PRESERVE_BITS, bits->preserve); + if (node->table[idx]) { + node = phys_to_virt(node->table[idx]); + continue; + } + + /* Next node is empty, create a new node for it */ + new_node = (struct kho_radix_node *)get_zeroed_page(GFP_KERNEL); + if (!new_node) { + err = -ENOMEM; + goto err_free_nodes; + } + + node->table[idx] = virt_to_phys(new_node); + + /* + * Capture the node where the new branch starts for cleanup + * if allocation fails. + */ + if (!anchor_node) { + anchor_node = node; + anchor_idx = idx; + } + intermediate_nodes[i] = new_node; + + node = new_node; + } + + /* Handle the leaf level bitmap (level 0) */ + idx = kho_radix_get_bitmap_index(key); + leaf = (struct kho_radix_leaf *)node; + __set_bit(idx, leaf->bitmap); + + return 0; + +err_free_nodes: + for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { + if (intermediate_nodes[i]) + free_page((unsigned long)intermediate_nodes[i]); + } + if (anchor_node) + anchor_node->table[anchor_idx] = 0; + + return err; } +EXPORT_SYMBOL_GPL(kho_radix_add_page); -static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn, - unsigned long end_pfn) +/** + * kho_radix_del_page - Removes a page's preservation status from the radix tree. + * @tree: The KHO radix tree. + * @pfn: The page frame number of the page to unpreserve. + * @order: The order of the page. + * + * This function traverses the radix tree and clears the bit corresponding to + * the page, effectively removing its "preserved" status. It does not free + * the tree's intermediate nodes, even if they become empty. + */ +void kho_radix_del_page(struct kho_radix_tree *tree, unsigned long pfn, + unsigned int order) { - unsigned int order; + unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order); + struct kho_radix_node *node = tree->root; + struct kho_radix_leaf *leaf; + unsigned int i, idx; - while (pfn < end_pfn) { - order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); + if (WARN_ON_ONCE(!tree->root)) + return; - __kho_unpreserve_order(track, pfn, order); + might_sleep(); - pfn += 1 << order; + guard(mutex)(&tree->lock); + + /* Go from high levels to low levels */ + for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) { + idx = kho_radix_get_table_index(key, i); + + /* + * Attempting to delete a page that has not been preserved, + * return with a warning. + */ + if (WARN_ON(!node->table[idx])) + return; + + node = phys_to_virt(node->table[idx]); } + + /* Handle the leaf level bitmap (level 0) */ + leaf = (struct kho_radix_leaf *)node; + idx = kho_radix_get_bitmap_index(key); + __clear_bit(idx, leaf->bitmap); } +EXPORT_SYMBOL_GPL(kho_radix_del_page); -static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, - unsigned int order) +static int kho_radix_walk_leaf(struct kho_radix_leaf *leaf, + unsigned long key, + kho_radix_tree_walk_callback_t cb) { - struct kho_mem_phys_bits *bits; - struct kho_mem_phys *physxa, *new_physxa; - const unsigned long pfn_high = pfn >> order; + unsigned long *bitmap = (unsigned long *)leaf; + unsigned int order; + phys_addr_t phys; + unsigned int i; + int err; - might_sleep(); - physxa = xa_load(&track->orders, order); - if (!physxa) { - int err; + for_each_set_bit(i, bitmap, PAGE_SIZE * BITS_PER_BYTE) { + phys = kho_radix_decode_key(key | i, &order); + err = cb(phys, order); + if (err) + return err; + } - new_physxa = kzalloc_obj(*physxa); - if (!new_physxa) - return -ENOMEM; + return 0; +} - xa_init(&new_physxa->phys_bits); - physxa = xa_cmpxchg(&track->orders, order, NULL, new_physxa, - GFP_KERNEL); +static int __kho_radix_walk_tree(struct kho_radix_node *root, + unsigned int level, unsigned long start, + kho_radix_tree_walk_callback_t cb) +{ + struct kho_radix_node *node; + struct kho_radix_leaf *leaf; + unsigned long key, i; + unsigned int shift; + int err; + + for (i = 0; i < PAGE_SIZE / sizeof(phys_addr_t); i++) { + if (!root->table[i]) + continue; - err = xa_err(physxa); - if (err || physxa) { - xa_destroy(&new_physxa->phys_bits); - kfree(new_physxa); + shift = ((level - 1) * KHO_TABLE_SIZE_LOG2) + + KHO_BITMAP_SIZE_LOG2; + key = start | (i << shift); - if (err) - return err; + node = phys_to_virt(root->table[i]); + + if (level == 1) { + /* + * we are at level 1, + * node is pointing to the level 0 bitmap. + */ + leaf = (struct kho_radix_leaf *)node; + err = kho_radix_walk_leaf(leaf, key, cb); } else { - physxa = new_physxa; + err = __kho_radix_walk_tree(node, level - 1, + key, cb); } + + if (err) + return err; } - bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS); - if (IS_ERR(bits)) - return PTR_ERR(bits); + return 0; +} - set_bit(pfn_high % PRESERVE_BITS, bits->preserve); +/** + * kho_radix_walk_tree - Traverses the radix tree and calls a callback for each preserved page. + * @tree: A pointer to the KHO radix tree to walk. + * @cb: A callback function of type kho_radix_tree_walk_callback_t that will be + * invoked for each preserved page found in the tree. The callback receives + * the physical address and order of the preserved page. + * + * This function walks the radix tree, searching from the specified top level + * down to the lowest level (level 0). For each preserved page found, it invokes + * the provided callback, passing the page's physical address and order. + * + * Return: 0 if the walk completed the specified tree, or the non-zero return + * value from the callback that stopped the walk. + */ +int kho_radix_walk_tree(struct kho_radix_tree *tree, + kho_radix_tree_walk_callback_t cb) +{ + if (WARN_ON_ONCE(!tree->root)) + return -EINVAL; - return 0; + guard(mutex)(&tree->lock); + + return __kho_radix_walk_tree(tree->root, KHO_TREE_MAX_DEPTH - 1, 0, cb); +} +EXPORT_SYMBOL_GPL(kho_radix_walk_tree); + +static void __kho_unpreserve(struct kho_radix_tree *tree, + unsigned long pfn, unsigned long end_pfn) +{ + unsigned int order; + + while (pfn < end_pfn) { + order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); + + kho_radix_del_page(tree, pfn, order); + + pfn += 1 << order; + } } /* For physically contiguous 0-order pages. */ static void kho_init_pages(struct page *page, unsigned long nr_pages) { - for (unsigned long i = 0; i < nr_pages; i++) + for (unsigned long i = 0; i < nr_pages; i++) { set_page_count(page + i, 1); + /* Clear each page's codetag to avoid accounting mismatch. */ + clear_page_tag_ref(page + i); + } } static void kho_init_folio(struct page *page, unsigned int order) @@ -229,6 +387,8 @@ static void kho_init_folio(struct page *page, unsigned int order) /* Head page gets refcount of 1. */ set_page_count(page, 1); + /* Clear head page's codetag to avoid accounting mismatch. */ + clear_page_tag_ref(page); /* For higher order folios, tail pages get a page count of zero. */ for (unsigned long i = 1; i < nr_pages; i++) @@ -253,7 +413,7 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio) * check also implicitly makes sure phys is order-aligned since for * non-order-aligned phys addresses, magic will never be set. */ - if (WARN_ON_ONCE(info.magic != KHO_PAGE_MAGIC || info.order > MAX_PAGE_ORDER)) + if (WARN_ON_ONCE(info.magic != KHO_PAGE_MAGIC)) return NULL; nr_pages = (1 << info.order); @@ -265,14 +425,6 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio) else kho_init_pages(page, nr_pages); - /* Always mark headpage's codetag as empty to avoid accounting mismatch */ - clear_page_tag_ref(page); - if (!is_folio) { - /* Also do that for the non-compound tail pages */ - for (unsigned int i = 1; i < nr_pages; i++) - clear_page_tag_ref(page + i); - } - adjust_managed_page_count(page, nr_pages); return page; } @@ -321,161 +473,24 @@ struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages) } EXPORT_SYMBOL_GPL(kho_restore_pages); -/* Serialize and deserialize struct kho_mem_phys across kexec - * - * Record all the bitmaps in a linked list of pages for the next kernel to - * process. Each chunk holds bitmaps of the same order and each block of bitmaps - * starts at a given physical address. This allows the bitmaps to be sparse. The - * xarray is used to store them in a tree while building up the data structure, - * but the KHO successor kernel only needs to process them once in order. - * - * All of this memory is normal kmalloc() memory and is not marked for - * preservation. The successor kernel will remain isolated to the scratch space - * until it completes processing this list. Once processed all the memory - * storing these ranges will be marked as free. - */ - -struct khoser_mem_bitmap_ptr { - phys_addr_t phys_start; - DECLARE_KHOSER_PTR(bitmap, struct kho_mem_phys_bits *); -}; - -struct khoser_mem_chunk_hdr { - DECLARE_KHOSER_PTR(next, struct khoser_mem_chunk *); - unsigned int order; - unsigned int num_elms; -}; - -#define KHOSER_BITMAP_SIZE \ - ((PAGE_SIZE - sizeof(struct khoser_mem_chunk_hdr)) / \ - sizeof(struct khoser_mem_bitmap_ptr)) - -struct khoser_mem_chunk { - struct khoser_mem_chunk_hdr hdr; - struct khoser_mem_bitmap_ptr bitmaps[KHOSER_BITMAP_SIZE]; -}; - -static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE); - -static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk, - unsigned long order) -{ - struct khoser_mem_chunk *chunk __free(free_page) = NULL; - - chunk = (void *)get_zeroed_page(GFP_KERNEL); - if (!chunk) - return ERR_PTR(-ENOMEM); - - if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE))) - return ERR_PTR(-EINVAL); - - chunk->hdr.order = order; - if (cur_chunk) - KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk); - return no_free_ptr(chunk); -} - -static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk) +static int __init kho_preserved_memory_reserve(phys_addr_t phys, + unsigned int order) { - struct khoser_mem_chunk *chunk = first_chunk; - - while (chunk) { - struct khoser_mem_chunk *tmp = chunk; - - chunk = KHOSER_LOAD_PTR(chunk->hdr.next); - free_page((unsigned long)tmp); - } -} - -/* - * Update memory map property, if old one is found discard it via - * kho_mem_ser_free(). - */ -static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk) -{ - void *ptr; - u64 phys; - - ptr = fdt_getprop_w(kho_out.fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, NULL); - - /* Check and discard previous memory map */ - phys = get_unaligned((u64 *)ptr); - if (phys) - kho_mem_ser_free((struct khoser_mem_chunk *)phys_to_virt(phys)); - - /* Update with the new value */ - phys = first_chunk ? (u64)virt_to_phys(first_chunk) : 0; - put_unaligned(phys, (u64 *)ptr); -} - -static int kho_mem_serialize(struct kho_out *kho_out) -{ - struct khoser_mem_chunk *first_chunk = NULL; - struct khoser_mem_chunk *chunk = NULL; - struct kho_mem_phys *physxa; - unsigned long order; - int err = -ENOMEM; - - xa_for_each(&kho_out->track.orders, order, physxa) { - struct kho_mem_phys_bits *bits; - unsigned long phys; - - chunk = new_chunk(chunk, order); - if (IS_ERR(chunk)) { - err = PTR_ERR(chunk); - goto err_free; - } - - if (!first_chunk) - first_chunk = chunk; + union kho_page_info info; + struct page *page; + u64 sz; - xa_for_each(&physxa->phys_bits, phys, bits) { - struct khoser_mem_bitmap_ptr *elm; + sz = 1 << (order + PAGE_SHIFT); + page = phys_to_page(phys); - if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) { - chunk = new_chunk(chunk, order); - if (IS_ERR(chunk)) { - err = PTR_ERR(chunk); - goto err_free; - } - } - - elm = &chunk->bitmaps[chunk->hdr.num_elms]; - chunk->hdr.num_elms++; - elm->phys_start = (phys * PRESERVE_BITS) - << (order + PAGE_SHIFT); - KHOSER_STORE_PTR(elm->bitmap, bits); - } - } - - kho_update_memory_map(first_chunk); + /* Reserve the memory preserved in KHO in memblock */ + memblock_reserve(phys, sz); + memblock_reserved_mark_noinit(phys, sz); + info.magic = KHO_PAGE_MAGIC; + info.order = order; + page->private = info.page_private; return 0; - -err_free: - kho_mem_ser_free(first_chunk); - return err; -} - -static void __init deserialize_bitmap(unsigned int order, - struct khoser_mem_bitmap_ptr *elm) -{ - struct kho_mem_phys_bits *bitmap = KHOSER_LOAD_PTR(elm->bitmap); - unsigned long bit; - - for_each_set_bit(bit, bitmap->preserve, PRESERVE_BITS) { - int sz = 1 << (order + PAGE_SHIFT); - phys_addr_t phys = - elm->phys_start + (bit << (order + PAGE_SHIFT)); - struct page *page = phys_to_page(phys); - union kho_page_info info; - - memblock_reserve(phys, sz); - memblock_reserved_mark_noinit(phys, sz); - info.magic = KHO_PAGE_MAGIC; - info.order = order; - page->private = info.page_private; - } } /* Returns physical address of the preserved memory map from FDT */ @@ -486,25 +501,13 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt) mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len); if (!mem_ptr || len != sizeof(u64)) { - pr_err("failed to get preserved memory bitmaps\n"); + pr_err("failed to get preserved memory map\n"); return 0; } return get_unaligned((const u64 *)mem_ptr); } -static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk) -{ - while (chunk) { - unsigned int i; - - for (i = 0; i != chunk->hdr.num_elms; i++) - deserialize_bitmap(chunk->hdr.order, - &chunk->bitmaps[i]); - chunk = KHOSER_LOAD_PTR(chunk->hdr.next); - } -} - /* * With KHO enabled, memory can become fragmented because KHO regions may * be anywhere in physical address space. The scratch regions give us a @@ -723,12 +726,13 @@ err_disable_kho: } /** - * kho_add_subtree - record the physical address of a sub FDT in KHO root tree. + * kho_add_subtree - record the physical address of a sub blob in KHO root tree. * @name: name of the sub tree. - * @fdt: the sub tree blob. + * @blob: the sub tree blob. + * @size: size of the blob in bytes. * * Creates a new child node named @name in KHO root FDT and records - * the physical address of @fdt. The pages of @fdt must also be preserved + * the physical address of @blob. The pages of @blob must also be preserved * by KHO for the new kernel to retrieve it after kexec. * * A debugfs blob entry is also created at @@ -737,10 +741,11 @@ err_disable_kho: * * Return: 0 on success, error code on failure */ -int kho_add_subtree(const char *name, void *fdt) +int kho_add_subtree(const char *name, void *blob, size_t size) { - phys_addr_t phys = virt_to_phys(fdt); + phys_addr_t phys = virt_to_phys(blob); void *root_fdt = kho_out.fdt; + u64 size_u64 = size; int err = -ENOMEM; int off, fdt_err; @@ -757,12 +762,18 @@ int kho_add_subtree(const char *name, void *fdt) goto out_pack; } - err = fdt_setprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, + err = fdt_setprop(root_fdt, off, KHO_SUB_TREE_PROP_NAME, &phys, sizeof(phys)); if (err < 0) goto out_pack; - WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false)); + err = fdt_setprop(root_fdt, off, KHO_SUB_TREE_SIZE_PROP_NAME, + &size_u64, sizeof(size_u64)); + if (err < 0) + goto out_pack; + + WARN_ON_ONCE(kho_debugfs_blob_add(&kho_out.dbg, name, blob, + size, false)); out_pack: fdt_pack(root_fdt); @@ -771,9 +782,9 @@ out_pack: } EXPORT_SYMBOL_GPL(kho_add_subtree); -void kho_remove_subtree(void *fdt) +void kho_remove_subtree(void *blob) { - phys_addr_t target_phys = virt_to_phys(fdt); + phys_addr_t target_phys = virt_to_phys(blob); void *root_fdt = kho_out.fdt; int off; int err; @@ -789,13 +800,13 @@ void kho_remove_subtree(void *fdt) const u64 *val; int len; - val = fdt_getprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, &len); + val = fdt_getprop(root_fdt, off, KHO_SUB_TREE_PROP_NAME, &len); if (!val || len != sizeof(phys_addr_t)) continue; if ((phys_addr_t)*val == target_phys) { fdt_del_node(root_fdt, off); - kho_debugfs_fdt_remove(&kho_out.dbg, fdt); + kho_debugfs_blob_remove(&kho_out.dbg, blob); break; } } @@ -815,14 +826,14 @@ EXPORT_SYMBOL_GPL(kho_remove_subtree); */ int kho_preserve_folio(struct folio *folio) { + struct kho_radix_tree *tree = &kho_out.radix_tree; const unsigned long pfn = folio_pfn(folio); const unsigned int order = folio_order(folio); - struct kho_mem_track *track = &kho_out.track; if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order))) return -EINVAL; - return __kho_preserve_order(track, pfn, order); + return kho_radix_add_page(tree, pfn, order); } EXPORT_SYMBOL_GPL(kho_preserve_folio); @@ -836,11 +847,11 @@ EXPORT_SYMBOL_GPL(kho_preserve_folio); */ void kho_unpreserve_folio(struct folio *folio) { + struct kho_radix_tree *tree = &kho_out.radix_tree; const unsigned long pfn = folio_pfn(folio); const unsigned int order = folio_order(folio); - struct kho_mem_track *track = &kho_out.track; - __kho_unpreserve_order(track, pfn, order); + kho_radix_del_page(tree, pfn, order); } EXPORT_SYMBOL_GPL(kho_unpreserve_folio); @@ -856,7 +867,7 @@ EXPORT_SYMBOL_GPL(kho_unpreserve_folio); */ int kho_preserve_pages(struct page *page, unsigned long nr_pages) { - struct kho_mem_track *track = &kho_out.track; + struct kho_radix_tree *tree = &kho_out.radix_tree; const unsigned long start_pfn = page_to_pfn(page); const unsigned long end_pfn = start_pfn + nr_pages; unsigned long pfn = start_pfn; @@ -869,10 +880,18 @@ int kho_preserve_pages(struct page *page, unsigned long nr_pages) } while (pfn < end_pfn) { - const unsigned int order = + unsigned int order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); - err = __kho_preserve_order(track, pfn, order); + /* + * Make sure all the pages in a single preservation are in the + * same NUMA node. The restore machinery can not cope with a + * preservation spanning multiple NUMA nodes. + */ + while (pfn_to_nid(pfn) != pfn_to_nid(pfn + (1UL << order) - 1)) + order--; + + err = kho_radix_add_page(tree, pfn, order); if (err) { failed_pfn = pfn; break; @@ -882,7 +901,7 @@ int kho_preserve_pages(struct page *page, unsigned long nr_pages) } if (err) - __kho_unpreserve(track, start_pfn, failed_pfn); + __kho_unpreserve(tree, start_pfn, failed_pfn); return err; } @@ -900,11 +919,11 @@ EXPORT_SYMBOL_GPL(kho_preserve_pages); */ void kho_unpreserve_pages(struct page *page, unsigned long nr_pages) { - struct kho_mem_track *track = &kho_out.track; + struct kho_radix_tree *tree = &kho_out.radix_tree; const unsigned long start_pfn = page_to_pfn(page); const unsigned long end_pfn = start_pfn + nr_pages; - __kho_unpreserve(track, start_pfn, end_pfn); + __kho_unpreserve(tree, start_pfn, end_pfn); } EXPORT_SYMBOL_GPL(kho_unpreserve_pages); @@ -963,14 +982,14 @@ err_free: static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk, unsigned short order) { - struct kho_mem_track *track = &kho_out.track; + struct kho_radix_tree *tree = &kho_out.radix_tree; unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); - __kho_unpreserve(track, pfn, pfn + 1); + __kho_unpreserve(tree, pfn, pfn + 1); for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { pfn = PHYS_PFN(chunk->phys[i]); - __kho_unpreserve(track, pfn, pfn + (1 << order)); + __kho_unpreserve(tree, pfn, pfn + (1 << order)); } } @@ -1077,6 +1096,7 @@ EXPORT_SYMBOL_GPL(kho_unpreserve_vmalloc); void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) { struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(preservation->first); + kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_PROT_NORMAL; unsigned int align, order, shift, vm_flags; unsigned long total_pages, contig_pages; unsigned long addr, size; @@ -1128,7 +1148,8 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) goto err_free_pages_array; area = __get_vm_area_node(total_pages * PAGE_SIZE, align, shift, - vm_flags, VMALLOC_START, VMALLOC_END, + vm_flags | VM_UNINITIALIZED, + VMALLOC_START, VMALLOC_END, NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0)); if (!area) @@ -1143,6 +1164,13 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) area->nr_pages = total_pages; area->pages = pages; + if (vm_flags & VM_ALLOC) + kasan_flags |= KASAN_VMALLOC_VM_ALLOC; + + area->addr = kasan_unpoison_vmalloc(area->addr, total_pages * PAGE_SIZE, + kasan_flags); + clear_vm_uninitialized_flag(area); + return area->addr; err_free_vm_area: @@ -1239,33 +1267,11 @@ void kho_restore_free(void *mem) } EXPORT_SYMBOL_GPL(kho_restore_free); -int kho_finalize(void) -{ - int ret; - - if (!kho_enable) - return -EOPNOTSUPP; - - guard(mutex)(&kho_out.lock); - ret = kho_mem_serialize(&kho_out); - if (ret) - return ret; - - kho_out.finalized = true; - - return 0; -} - -bool kho_finalized(void) -{ - guard(mutex)(&kho_out.lock); - return kho_out.finalized; -} - struct kho_in { phys_addr_t fdt_phys; phys_addr_t scratch_phys; - phys_addr_t mem_map_phys; + char previous_release[__NEW_UTS_LEN + 1]; + u32 kexec_count; struct kho_debugfs dbg; }; @@ -1298,16 +1304,17 @@ bool is_kho_boot(void) EXPORT_SYMBOL_GPL(is_kho_boot); /** - * kho_retrieve_subtree - retrieve a preserved sub FDT by its name. - * @name: the name of the sub FDT passed to kho_add_subtree(). - * @phys: if found, the physical address of the sub FDT is stored in @phys. + * kho_retrieve_subtree - retrieve a preserved sub blob by its name. + * @name: the name of the sub blob passed to kho_add_subtree(). + * @phys: if found, the physical address of the sub blob is stored in @phys. + * @size: if not NULL and found, the size of the sub blob is stored in @size. * - * Retrieve a preserved sub FDT named @name and store its physical - * address in @phys. + * Retrieve a preserved sub blob named @name and store its physical + * address in @phys and optionally its size in @size. * * Return: 0 on success, error code on failure */ -int kho_retrieve_subtree(const char *name, phys_addr_t *phys) +int kho_retrieve_subtree(const char *name, phys_addr_t *phys, size_t *size) { const void *fdt = kho_get_fdt(); const u64 *val; @@ -1323,46 +1330,181 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys) if (offset < 0) return -ENOENT; - val = fdt_getprop(fdt, offset, KHO_FDT_SUB_TREE_PROP_NAME, &len); + val = fdt_getprop(fdt, offset, KHO_SUB_TREE_PROP_NAME, &len); if (!val || len != sizeof(*val)) return -EINVAL; *phys = (phys_addr_t)*val; + val = fdt_getprop(fdt, offset, KHO_SUB_TREE_SIZE_PROP_NAME, &len); + if (!val || len != sizeof(*val)) { + pr_warn("broken KHO subnode '%s': missing or invalid blob-size property\n", + name); + return -EINVAL; + } + + if (size) + *size = (size_t)*val; + return 0; } EXPORT_SYMBOL_GPL(kho_retrieve_subtree); +static int __init kho_mem_retrieve(const void *fdt) +{ + struct kho_radix_tree tree; + const phys_addr_t *mem; + int len; + + /* Retrieve the KHO radix tree from passed-in FDT. */ + mem = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len); + + if (!mem || len != sizeof(*mem)) { + pr_err("failed to get preserved KHO memory tree\n"); + return -ENOENT; + } + + if (!*mem) + return -EINVAL; + + tree.root = phys_to_virt(*mem); + mutex_init(&tree.lock); + return kho_radix_walk_tree(&tree, kho_preserved_memory_reserve); +} + static __init int kho_out_fdt_setup(void) { + struct kho_radix_tree *tree = &kho_out.radix_tree; void *root = kho_out.fdt; - u64 empty_mem_map = 0; + u64 preserved_mem_tree_pa; int err; err = fdt_create(root, PAGE_SIZE); err |= fdt_finish_reservemap(root); err |= fdt_begin_node(root, ""); err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE); - err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, &empty_mem_map, - sizeof(empty_mem_map)); + + preserved_mem_tree_pa = virt_to_phys(tree->root); + + err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, + &preserved_mem_tree_pa, + sizeof(preserved_mem_tree_pa)); + err |= fdt_end_node(root); err |= fdt_finish(root); return err; } +static void __init kho_in_kexec_metadata(void) +{ + struct kho_kexec_metadata *metadata; + phys_addr_t metadata_phys; + size_t blob_size; + int err; + + err = kho_retrieve_subtree(KHO_METADATA_NODE_NAME, &metadata_phys, + &blob_size); + if (err) + /* This is fine, previous kernel didn't export metadata */ + return; + + /* Check that, at least, "version" is present */ + if (blob_size < sizeof(u32)) { + pr_warn("kexec-metadata blob too small (%zu bytes)\n", + blob_size); + return; + } + + metadata = phys_to_virt(metadata_phys); + + if (metadata->version != KHO_KEXEC_METADATA_VERSION) { + pr_warn("kexec-metadata version %u not supported (expected %u)\n", + metadata->version, KHO_KEXEC_METADATA_VERSION); + return; + } + + if (blob_size < sizeof(*metadata)) { + pr_warn("kexec-metadata blob too small for v%u (%zu < %zu)\n", + metadata->version, blob_size, sizeof(*metadata)); + return; + } + + /* + * Copy data to the kernel structure that will persist during + * kernel lifetime. + */ + kho_in.kexec_count = metadata->kexec_count; + strscpy(kho_in.previous_release, metadata->previous_release, + sizeof(kho_in.previous_release)); + + pr_info("exec from: %s (count %u)\n", + kho_in.previous_release, kho_in.kexec_count); +} + +/* + * Create kexec metadata to pass kernel version and boot count to the + * next kernel. This keeps the core KHO ABI minimal and allows the + * metadata format to evolve independently. + */ +static __init int kho_out_kexec_metadata(void) +{ + struct kho_kexec_metadata *metadata; + int err; + + metadata = kho_alloc_preserve(sizeof(*metadata)); + if (IS_ERR(metadata)) + return PTR_ERR(metadata); + + metadata->version = KHO_KEXEC_METADATA_VERSION; + strscpy(metadata->previous_release, init_uts_ns.name.release, + sizeof(metadata->previous_release)); + /* kho_in.kexec_count is set to 0 on cold boot */ + metadata->kexec_count = kho_in.kexec_count + 1; + + err = kho_add_subtree(KHO_METADATA_NODE_NAME, metadata, + sizeof(*metadata)); + if (err) + kho_unpreserve_free(metadata); + + return err; +} + +static int __init kho_kexec_metadata_init(const void *fdt) +{ + int err; + + if (fdt) + kho_in_kexec_metadata(); + + /* Populate kexec metadata for the possible next kexec */ + err = kho_out_kexec_metadata(); + if (err) + pr_warn("failed to initialize kexec-metadata subtree: %d\n", + err); + + return err; +} + static __init int kho_init(void) { + struct kho_radix_tree *tree = &kho_out.radix_tree; const void *fdt = kho_get_fdt(); int err = 0; if (!kho_enable) return 0; + tree->root = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!tree->root) { + err = -ENOMEM; + goto err_free_scratch; + } + kho_out.fdt = kho_alloc_preserve(PAGE_SIZE); if (IS_ERR(kho_out.fdt)) { err = PTR_ERR(kho_out.fdt); - goto err_free_scratch; + goto err_free_kho_radix_tree_root; } err = kho_debugfs_init(); @@ -1377,6 +1519,10 @@ static __init int kho_init(void) if (err) goto err_free_fdt; + err = kho_kexec_metadata_init(fdt); + if (err) + goto err_free_fdt; + if (fdt) { kho_in_debugfs_init(&kho_in.dbg, fdt); return 0; @@ -1401,13 +1547,17 @@ static __init int kho_init(void) init_cma_reserved_pageblock(pfn_to_page(pfn)); } - WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt", - kho_out.fdt, true)); + WARN_ON_ONCE(kho_debugfs_blob_add(&kho_out.dbg, "fdt", + kho_out.fdt, + fdt_totalsize(kho_out.fdt), true)); return 0; err_free_fdt: kho_unpreserve_free(kho_out.fdt); +err_free_kho_radix_tree_root: + kfree(tree->root); + tree->root = NULL; err_free_scratch: kho_out.fdt = NULL; for (int i = 0; i < kho_scratch_cnt; i++) { @@ -1447,10 +1597,12 @@ static void __init kho_release_scratch(void) void __init kho_memory_init(void) { - if (kho_in.mem_map_phys) { + if (kho_in.scratch_phys) { kho_scratch = phys_to_virt(kho_in.scratch_phys); kho_release_scratch(); - kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys)); + + if (kho_mem_retrieve(kho_get_fdt())) + kho_in.fdt_phys = 0; } else { kho_reserve_scratch(); } @@ -1528,7 +1680,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, kho_in.fdt_phys = fdt_phys; kho_in.scratch_phys = scratch_phys; - kho_in.mem_map_phys = mem_map_phys; kho_scratch_cnt = scratch_cnt; populated = true; diff --git a/kernel/liveupdate/kexec_handover_debugfs.c b/kernel/liveupdate/kexec_handover_debugfs.c index 2f93939168ab..257ee8a52be6 100644 --- a/kernel/liveupdate/kexec_handover_debugfs.c +++ b/kernel/liveupdate/kexec_handover_debugfs.c @@ -13,6 +13,7 @@ #include <linux/io.h> #include <linux/libfdt.h> #include <linux/mm.h> +#include <linux/kho/abi/kexec_handover.h> #include "kexec_handover_internal.h" static struct dentry *debugfs_root; @@ -23,8 +24,9 @@ struct fdt_debugfs { struct dentry *file; }; -static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir, - const char *name, const void *fdt) +static int __kho_debugfs_blob_add(struct list_head *list, struct dentry *dir, + const char *name, const void *blob, + size_t size) { struct fdt_debugfs *f; struct dentry *file; @@ -33,8 +35,8 @@ static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir, if (!f) return -ENOMEM; - f->wrapper.data = (void *)fdt; - f->wrapper.size = fdt_totalsize(fdt); + f->wrapper.data = (void *)blob; + f->wrapper.size = size; file = debugfs_create_blob(name, 0400, dir, &f->wrapper); if (IS_ERR(file)) { @@ -48,8 +50,8 @@ static int __kho_debugfs_fdt_add(struct list_head *list, struct dentry *dir, return 0; } -int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name, - const void *fdt, bool root) +int kho_debugfs_blob_add(struct kho_debugfs *dbg, const char *name, + const void *blob, size_t size, bool root) { struct dentry *dir; @@ -58,15 +60,15 @@ int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name, else dir = dbg->sub_fdt_dir; - return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt); + return __kho_debugfs_blob_add(&dbg->fdt_list, dir, name, blob, size); } -void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt) +void kho_debugfs_blob_remove(struct kho_debugfs *dbg, void *blob) { struct fdt_debugfs *ff; list_for_each_entry(ff, &dbg->fdt_list, list) { - if (ff->wrapper.data == fdt) { + if (ff->wrapper.data == blob) { debugfs_remove(ff->file); list_del(&ff->list); kfree(ff); @@ -75,24 +77,6 @@ void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt) } } -static int kho_out_finalize_get(void *data, u64 *val) -{ - *val = kho_finalized(); - - return 0; -} - -static int kho_out_finalize_set(void *data, u64 val) -{ - if (val) - return kho_finalize(); - else - return -EINVAL; -} - -DEFINE_DEBUGFS_ATTRIBUTE(kho_out_finalize_fops, kho_out_finalize_get, - kho_out_finalize_set, "%llu\n"); - static int scratch_phys_show(struct seq_file *m, void *v) { for (int i = 0; i < kho_scratch_cnt; i++) @@ -130,28 +114,42 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt) goto err_rmdir; } - err = __kho_debugfs_fdt_add(&dbg->fdt_list, dir, "fdt", fdt); + err = __kho_debugfs_blob_add(&dbg->fdt_list, dir, "fdt", fdt, + fdt_totalsize(fdt)); if (err) goto err_rmdir; fdt_for_each_subnode(child, fdt, 0) { int len = 0; const char *name = fdt_get_name(fdt, child, NULL); - const u64 *fdt_phys; + const u64 *blob_phys; + const u64 *blob_size; + void *blob; - fdt_phys = fdt_getprop(fdt, child, "fdt", &len); - if (!fdt_phys) + blob_phys = fdt_getprop(fdt, child, + KHO_SUB_TREE_PROP_NAME, &len); + if (!blob_phys) + continue; + if (len != sizeof(*blob_phys)) { + pr_warn("node %s prop %s has invalid length: %d\n", + name, KHO_SUB_TREE_PROP_NAME, len); continue; - if (len != sizeof(*fdt_phys)) { - pr_warn("node %s prop fdt has invalid length: %d\n", - name, len); + } + + blob_size = fdt_getprop(fdt, child, + KHO_SUB_TREE_SIZE_PROP_NAME, &len); + if (!blob_size || len != sizeof(*blob_size)) { + pr_warn("node %s missing or invalid %s property\n", + name, KHO_SUB_TREE_SIZE_PROP_NAME); continue; } - err = __kho_debugfs_fdt_add(&dbg->fdt_list, sub_fdt_dir, name, - phys_to_virt(*fdt_phys)); + + blob = phys_to_virt(*blob_phys); + err = __kho_debugfs_blob_add(&dbg->fdt_list, sub_fdt_dir, name, + blob, *blob_size); if (err) { - pr_warn("failed to add fdt %s to debugfs: %pe\n", name, - ERR_PTR(err)); + pr_warn("failed to add blob %s to debugfs: %pe\n", + name, ERR_PTR(err)); continue; } } @@ -198,11 +196,6 @@ __init int kho_out_debugfs_init(struct kho_debugfs *dbg) if (IS_ERR(f)) goto err_rmdir; - f = debugfs_create_file("finalize", 0600, dir, NULL, - &kho_out_finalize_fops); - if (IS_ERR(f)) - goto err_rmdir; - dbg->dir = dir; dbg->sub_fdt_dir = sub_fdt_dir; return 0; diff --git a/kernel/liveupdate/kexec_handover_internal.h b/kernel/liveupdate/kexec_handover_internal.h index 0202c85ad14f..0399ff107775 100644 --- a/kernel/liveupdate/kexec_handover_internal.h +++ b/kernel/liveupdate/kexec_handover_internal.h @@ -22,25 +22,23 @@ struct kho_debugfs {}; extern struct kho_scratch *kho_scratch; extern unsigned int kho_scratch_cnt; -bool kho_finalized(void); -int kho_finalize(void); - #ifdef CONFIG_KEXEC_HANDOVER_DEBUGFS int kho_debugfs_init(void); void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt); int kho_out_debugfs_init(struct kho_debugfs *dbg); -int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name, - const void *fdt, bool root); -void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt); +int kho_debugfs_blob_add(struct kho_debugfs *dbg, const char *name, + const void *blob, size_t size, bool root); +void kho_debugfs_blob_remove(struct kho_debugfs *dbg, void *blob); #else static inline int kho_debugfs_init(void) { return 0; } static inline void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt) { } static inline int kho_out_debugfs_init(struct kho_debugfs *dbg) { return 0; } -static inline int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name, - const void *fdt, bool root) { return 0; } -static inline void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, - void *fdt) { } +static inline int kho_debugfs_blob_add(struct kho_debugfs *dbg, + const char *name, const void *blob, + size_t size, bool root) { return 0; } +static inline void kho_debugfs_blob_remove(struct kho_debugfs *dbg, + void *blob) { } #endif /* CONFIG_KEXEC_HANDOVER_DEBUGFS */ #ifdef CONFIG_KEXEC_HANDOVER_DEBUG diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c index dda7bb57d421..803f51c84275 100644 --- a/kernel/liveupdate/luo_core.c +++ b/kernel/liveupdate/luo_core.c @@ -54,6 +54,7 @@ #include <linux/liveupdate.h> #include <linux/miscdevice.h> #include <linux/mm.h> +#include <linux/rwsem.h> #include <linux/sizes.h> #include <linux/string.h> #include <linux/unaligned.h> @@ -68,6 +69,11 @@ static struct { u64 liveupdate_num; } luo_global; +/* + * luo_register_rwlock - Protects registration of file handlers and FLBs. + */ +DECLARE_RWSEM(luo_register_rwlock); + static int __init early_liveupdate_param(char *buf) { return kstrtobool(buf, &luo_global.enabled); @@ -88,7 +94,7 @@ static int __init luo_early_startup(void) } /* Retrieve LUO subtree, and verify its format. */ - err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys); + err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys, NULL); if (err) { if (err != -ENOENT) { pr_err("failed to retrieve FDT '%s' from KHO: %pe\n", @@ -172,7 +178,8 @@ static int __init luo_fdt_setup(void) if (err) goto exit_free; - err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out); + err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out, + fdt_totalsize(fdt_out)); if (err) goto exit_free; luo_global.fdt_out = fdt_out; @@ -230,17 +237,7 @@ int liveupdate_reboot(void) luo_flb_serialize(); - err = kho_finalize(); - if (err) { - pr_err("kho_finalize failed %d\n", err); - /* - * kho_finalize() may return libfdt errors, to aboid passing to - * userspace unknown errors, change this to EAGAIN. - */ - err = -EAGAIN; - } - - return err; + return 0; } /** diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c index 5acee4174bf0..a0a419085e28 100644 --- a/kernel/liveupdate/luo_file.c +++ b/kernel/liveupdate/luo_file.c @@ -108,12 +108,16 @@ #include <linux/liveupdate.h> #include <linux/module.h> #include <linux/sizes.h> +#include <linux/xarray.h> #include <linux/slab.h> #include <linux/string.h> #include "luo_internal.h" static LIST_HEAD(luo_file_handler_list); +/* Keep track of files being preserved by LUO */ +static DEFINE_XARRAY(luo_preserved_files); + /* 2 4K pages, give space for 128 files per file_set */ #define LUO_FILE_PGCNT 2ul #define LUO_FILE_MAX \ @@ -203,6 +207,12 @@ static void luo_free_files_mem(struct luo_file_set *file_set) file_set->files = NULL; } +static unsigned long luo_get_id(struct liveupdate_file_handler *fh, + struct file *file) +{ + return fh->ops->get_id ? fh->ops->get_id(file) : (unsigned long)file; +} + static bool luo_token_is_used(struct luo_file_set *file_set, u64 token) { struct luo_file *iter; @@ -248,6 +258,7 @@ static bool luo_token_is_used(struct luo_file_set *file_set, u64 token) * Context: Can be called from an ioctl handler during normal system operation. * Return: 0 on success. Returns a negative errno on failure: * -EEXIST if the token is already used. + * -EBUSY if the file descriptor is already preserved by another session. * -EBADF if the file descriptor is invalid. * -ENOSPC if the file_set is full. * -ENOENT if no compatible handler is found. @@ -277,20 +288,28 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) goto err_fput; err = -ENOENT; + down_read(&luo_register_rwlock); list_private_for_each_entry(fh, &luo_file_handler_list, list) { if (fh->ops->can_preserve(fh, file)) { - err = 0; + if (try_module_get(fh->ops->owner)) + err = 0; break; } } + up_read(&luo_register_rwlock); /* err is still -ENOENT if no handler was found */ if (err) goto err_free_files_mem; + err = xa_insert(&luo_preserved_files, luo_get_id(fh, file), + file, GFP_KERNEL); + if (err) + goto err_module_put; + err = luo_flb_file_preserve(fh); if (err) - goto err_free_files_mem; + goto err_erase_xa; luo_file = kzalloc_obj(*luo_file); if (!luo_file) { @@ -320,6 +339,10 @@ err_kfree: kfree(luo_file); err_flb_unpreserve: luo_flb_file_unpreserve(fh); +err_erase_xa: + xa_erase(&luo_preserved_files, luo_get_id(fh, file)); +err_module_put: + module_put(fh->ops->owner); err_free_files_mem: luo_free_files_mem(file_set); err_fput: @@ -362,7 +385,10 @@ void luo_file_unpreserve_files(struct luo_file_set *file_set) args.private_data = luo_file->private_data; luo_file->fh->ops->unpreserve(&args); luo_flb_file_unpreserve(luo_file->fh); + module_put(luo_file->fh->ops->owner); + xa_erase(&luo_preserved_files, + luo_get_id(luo_file->fh, luo_file->file)); list_del(&luo_file->list); file_set->count--; @@ -606,6 +632,11 @@ int luo_retrieve_file(struct luo_file_set *file_set, u64 token, luo_file->file = args.file; /* Get reference so we can keep this file in LUO until finish */ get_file(luo_file->file); + + WARN_ON(xa_insert(&luo_preserved_files, + luo_get_id(luo_file->fh, luo_file->file), + luo_file->file, GFP_KERNEL)); + *filep = luo_file->file; luo_file->retrieve_status = 1; @@ -646,6 +677,7 @@ static void luo_file_finish_one(struct luo_file_set *file_set, luo_file->fh->ops->finish(&args); luo_flb_file_finish(luo_file->fh); + module_put(luo_file->fh->ops->owner); } /** @@ -701,8 +733,11 @@ int luo_file_finish(struct luo_file_set *file_set) luo_file_finish_one(file_set, luo_file); - if (luo_file->file) + if (luo_file->file) { + xa_erase(&luo_preserved_files, + luo_get_id(luo_file->fh, luo_file->file)); fput(luo_file->file); + } list_del(&luo_file->list); file_set->count--; mutex_destroy(&luo_file->mutex); @@ -777,22 +812,28 @@ int luo_file_deserialize(struct luo_file_set *file_set, bool handler_found = false; struct luo_file *luo_file; + down_read(&luo_register_rwlock); list_private_for_each_entry(fh, &luo_file_handler_list, list) { if (!strcmp(fh->compatible, file_ser[i].compatible)) { - handler_found = true; + if (try_module_get(fh->ops->owner)) + handler_found = true; break; } } + up_read(&luo_register_rwlock); if (!handler_found) { - pr_warn("No registered handler for compatible '%s'\n", + pr_warn("No registered handler for compatible '%.*s'\n", + (int)sizeof(file_ser[i].compatible), file_ser[i].compatible); return -ENOENT; } luo_file = kzalloc_obj(*luo_file); - if (!luo_file) + if (!luo_file) { + module_put(fh->ops->owner); return -ENOMEM; + } luo_file->fh = fh; luo_file->file = NULL; @@ -842,41 +883,28 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh) return -EINVAL; } - /* - * Ensure the system is quiescent (no active sessions). - * This prevents registering new handlers while sessions are active or - * while deserialization is in progress. - */ - if (!luo_session_quiesce()) - return -EBUSY; - + down_write(&luo_register_rwlock); /* Check for duplicate compatible strings */ list_private_for_each_entry(fh_iter, &luo_file_handler_list, list) { if (!strcmp(fh_iter->compatible, fh->compatible)) { pr_err("File handler registration failed: Compatible string '%s' already registered.\n", fh->compatible); err = -EEXIST; - goto err_resume; + goto err_unlock; } } - /* Pin the module implementing the handler */ - if (!try_module_get(fh->ops->owner)) { - err = -EAGAIN; - goto err_resume; - } - INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, flb_list)); INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, list)); list_add_tail(&ACCESS_PRIVATE(fh, list), &luo_file_handler_list); - luo_session_resume(); + up_write(&luo_register_rwlock); liveupdate_test_register(fh); return 0; -err_resume: - luo_session_resume(); +err_unlock: + up_write(&luo_register_rwlock); return err; } @@ -886,41 +914,13 @@ err_resume: * * Unregisters the file handler from the liveupdate core. This function * reverses the operations of liveupdate_register_file_handler(). - * - * It ensures safe removal by checking that: - * No live update session is currently in progress. - * No FLB registered with this file handler. - * - * If the unregistration fails, the internal test state is reverted. - * - * Return: 0 Success. -EOPNOTSUPP when live update is not enabled. -EBUSY A live - * update is in progress, can't quiesce live update or FLB is registred with - * this file handler. */ -int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh) +void liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh) { - int err = -EBUSY; - if (!liveupdate_enabled()) - return -EOPNOTSUPP; - - liveupdate_test_unregister(fh); - - if (!luo_session_quiesce()) - goto err_register; - - if (!list_empty(&ACCESS_PRIVATE(fh, flb_list))) - goto err_resume; + return; + guard(rwsem_write)(&luo_register_rwlock); + luo_flb_unregister_all(fh); list_del(&ACCESS_PRIVATE(fh, list)); - module_put(fh->ops->owner); - luo_session_resume(); - - return 0; - -err_resume: - luo_session_resume(); -err_register: - liveupdate_test_register(fh); - return err; } diff --git a/kernel/liveupdate/luo_flb.c b/kernel/liveupdate/luo_flb.c index f52e8114837e..00f5494812c4 100644 --- a/kernel/liveupdate/luo_flb.c +++ b/kernel/liveupdate/luo_flb.c @@ -89,13 +89,18 @@ struct luo_flb_link { static struct luo_flb_private *luo_flb_get_private(struct liveupdate_flb *flb) { struct luo_flb_private *private = &ACCESS_PRIVATE(flb, private); + static DEFINE_SPINLOCK(luo_flb_init_lock); + if (smp_load_acquire(&private->initialized)) + return private; + + guard(spinlock)(&luo_flb_init_lock); if (!private->initialized) { mutex_init(&private->incoming.lock); mutex_init(&private->outgoing.lock); INIT_LIST_HEAD(&private->list); private->users = 0; - private->initialized = true; + smp_store_release(&private->initialized, true); } return private; @@ -110,10 +115,15 @@ static int luo_flb_file_preserve_one(struct liveupdate_flb *flb) struct liveupdate_flb_op_args args = {0}; int err; + if (!try_module_get(flb->ops->owner)) + return -ENODEV; + args.flb = flb; err = flb->ops->preserve(&args); - if (err) + if (err) { + module_put(flb->ops->owner); return err; + } private->outgoing.data = args.data; private->outgoing.obj = args.obj; } @@ -141,6 +151,7 @@ static void luo_flb_file_unpreserve_one(struct liveupdate_flb *flb) private->outgoing.data = 0; private->outgoing.obj = NULL; + module_put(flb->ops->owner); } } } @@ -176,12 +187,17 @@ static int luo_flb_retrieve_one(struct liveupdate_flb *flb) if (!found) return -ENOENT; + if (!try_module_get(flb->ops->owner)) + return -ENODEV; + args.flb = flb; args.data = private->incoming.data; err = flb->ops->retrieve(&args); - if (err) + if (err) { + module_put(flb->ops->owner); return err; + } private->incoming.obj = args.obj; private->incoming.retrieved = true; @@ -215,6 +231,7 @@ static void luo_flb_file_finish_one(struct liveupdate_flb *flb) private->incoming.data = 0; private->incoming.obj = NULL; private->incoming.finished = true; + module_put(flb->ops->owner); } } } @@ -240,17 +257,20 @@ int luo_flb_file_preserve(struct liveupdate_file_handler *fh) struct luo_flb_link *iter; int err = 0; + down_read(&luo_register_rwlock); list_for_each_entry(iter, flb_list, list) { err = luo_flb_file_preserve_one(iter->flb); if (err) goto exit_err; } + up_read(&luo_register_rwlock); return 0; exit_err: list_for_each_entry_continue_reverse(iter, flb_list, list) luo_flb_file_unpreserve_one(iter->flb); + up_read(&luo_register_rwlock); return err; } @@ -272,6 +292,7 @@ void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh) struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); struct luo_flb_link *iter; + guard(rwsem_read)(&luo_register_rwlock); list_for_each_entry_reverse(iter, flb_list, list) luo_flb_file_unpreserve_one(iter->flb); } @@ -292,10 +313,67 @@ void luo_flb_file_finish(struct liveupdate_file_handler *fh) struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); struct luo_flb_link *iter; + guard(rwsem_read)(&luo_register_rwlock); list_for_each_entry_reverse(iter, flb_list, list) luo_flb_file_finish_one(iter->flb); } +static void luo_flb_unregister_one(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + bool found = false; + + /* Find and remove the link from the file handler's list */ + list_for_each_entry(iter, flb_list, list) { + if (iter->flb == flb) { + list_del(&iter->list); + kfree(iter); + found = true; + break; + } + } + + if (!found) { + pr_warn("Failed to unregister FLB '%s': not found in file handler '%s'\n", + flb->compatible, fh->compatible); + return; + } + + private->users--; + + /* + * If this is the last file-handler with which we are registred, remove + * from the global list. + */ + if (!private->users) { + list_del_init(&private->list); + luo_flb_global.count--; + } +} + +/** + * luo_flb_unregister_all - Unregister all FLBs associated with a file handler. + * @fh: The file handler whose FLBs should be unregistered. + * + * This function iterates through the list of FLBs associated with the given + * file handler and unregisters them all one by one. + */ +void luo_flb_unregister_all(struct liveupdate_file_handler *fh) +{ + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter, *tmp; + + if (!liveupdate_enabled()) + return; + + lockdep_assert_held_write(&luo_register_rwlock); + list_for_each_entry_safe(iter, tmp, flb_list, list) + luo_flb_unregister_one(fh, iter->flb); +} + /** * liveupdate_register_flb - Associate an FLB with a file handler and register it globally. * @fh: The file handler that will now depend on the FLB. @@ -326,7 +404,6 @@ int liveupdate_register_flb(struct liveupdate_file_handler *fh, struct luo_flb_link *link __free(kfree) = NULL; struct liveupdate_flb *gflb; struct luo_flb_link *iter; - int err; if (!liveupdate_enabled()) return -EOPNOTSUPP; @@ -347,19 +424,12 @@ int liveupdate_register_flb(struct liveupdate_file_handler *fh, if (!link) return -ENOMEM; - /* - * Ensure the system is quiescent (no active sessions). - * This acts as a global lock for registration: no other thread can - * be in this section, and no sessions can be creating/using FDs. - */ - if (!luo_session_quiesce()) - return -EBUSY; + guard(rwsem_write)(&luo_register_rwlock); /* Check that this FLB is not already linked to this file handler */ - err = -EEXIST; list_for_each_entry(iter, flb_list, list) { if (iter->flb == flb) - goto err_resume; + return -EEXIST; } /* @@ -367,25 +437,16 @@ int liveupdate_register_flb(struct liveupdate_file_handler *fh, * is registered */ if (!private->users) { - if (WARN_ON(!list_empty(&private->list))) { - err = -EINVAL; - goto err_resume; - } + if (WARN_ON(!list_empty(&private->list))) + return -EINVAL; - if (luo_flb_global.count == LUO_FLB_MAX) { - err = -ENOSPC; - goto err_resume; - } + if (luo_flb_global.count == LUO_FLB_MAX) + return -ENOSPC; /* Check that compatible string is unique in global list */ list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) { if (!strcmp(gflb->compatible, flb->compatible)) - goto err_resume; - } - - if (!try_module_get(flb->ops->owner)) { - err = -EAGAIN; - goto err_resume; + return -EEXIST; } list_add_tail(&private->list, &luo_flb_global.list); @@ -396,13 +457,8 @@ int liveupdate_register_flb(struct liveupdate_file_handler *fh, private->users++; link->flb = flb; list_add_tail(&no_free_ptr(link)->list, flb_list); - luo_session_resume(); return 0; - -err_resume: - luo_session_resume(); - return err; } /** @@ -418,63 +474,17 @@ err_resume: * the FLB is removed from the global registry and the reference to its * owner module (acquired during registration) is released. * - * Context: This function ensures the session is quiesced (no active FDs - * being created) during the update. It is typically called from a - * subsystem's module exit function. - * Return: 0 on success. - * -EOPNOTSUPP if live update is disabled. - * -EBUSY if the live update session is active and cannot be quiesced. - * -ENOENT if the FLB was not found in the file handler's list. + * Context: It is typically called from a subsystem's module exit function. */ -int liveupdate_unregister_flb(struct liveupdate_file_handler *fh, - struct liveupdate_flb *flb) +void liveupdate_unregister_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb) { - struct luo_flb_private *private = luo_flb_get_private(flb); - struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); - struct luo_flb_link *iter; - int err = -ENOENT; - if (!liveupdate_enabled()) - return -EOPNOTSUPP; + return; - /* - * Ensure the system is quiescent (no active sessions). - * This acts as a global lock for unregistration. - */ - if (!luo_session_quiesce()) - return -EBUSY; + guard(rwsem_write)(&luo_register_rwlock); - /* Find and remove the link from the file handler's list */ - list_for_each_entry(iter, flb_list, list) { - if (iter->flb == flb) { - list_del(&iter->list); - kfree(iter); - err = 0; - break; - } - } - - if (err) - goto err_resume; - - private->users--; - /* - * If this is the last file-handler with which we are registred, remove - * from the global list, and relese module reference. - */ - if (!private->users) { - list_del_init(&private->list); - luo_flb_global.count--; - module_put(flb->ops->owner); - } - - luo_session_resume(); - - return 0; - -err_resume: - luo_session_resume(); - return err; + luo_flb_unregister_one(fh, flb); } /** @@ -492,7 +502,8 @@ err_resume: * * Return: 0 on success, or a negative errno on failure. -ENODATA means no * incoming FLB data, -ENOENT means specific flb not found in the incoming - * data, and -EOPNOTSUPP when live update is disabled or not configured. + * data, -ENODEV if the FLB's module is unloading, and -EOPNOTSUPP when + * live update is disabled or not configured. */ int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, void **objp) { @@ -638,6 +649,7 @@ void luo_flb_serialize(void) struct liveupdate_flb *gflb; int i = 0; + guard(rwsem_read)(&luo_register_rwlock); list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) { struct luo_flb_private *private = luo_flb_get_private(gflb); diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h index 8083d8739b09..875844d7a41d 100644 --- a/kernel/liveupdate/luo_internal.h +++ b/kernel/liveupdate/luo_internal.h @@ -77,14 +77,14 @@ struct luo_session { struct mutex mutex; }; +extern struct rw_semaphore luo_register_rwlock; + int luo_session_create(const char *name, struct file **filep); int luo_session_retrieve(const char *name, struct file **filep); int __init luo_session_setup_outgoing(void *fdt); int __init luo_session_setup_incoming(void *fdt); int luo_session_serialize(void); int luo_session_deserialize(void); -bool luo_session_quiesce(void); -void luo_session_resume(void); int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd); void luo_file_unpreserve_files(struct luo_file_set *file_set); @@ -103,16 +103,15 @@ void luo_file_set_destroy(struct luo_file_set *file_set); int luo_flb_file_preserve(struct liveupdate_file_handler *fh); void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh); void luo_flb_file_finish(struct liveupdate_file_handler *fh); +void luo_flb_unregister_all(struct liveupdate_file_handler *fh); int __init luo_flb_setup_outgoing(void *fdt); int __init luo_flb_setup_incoming(void *fdt); void luo_flb_serialize(void); #ifdef CONFIG_LIVEUPDATE_TEST void liveupdate_test_register(struct liveupdate_file_handler *fh); -void liveupdate_test_unregister(struct liveupdate_file_handler *fh); #else static inline void liveupdate_test_register(struct liveupdate_file_handler *fh) { } -static inline void liveupdate_test_unregister(struct liveupdate_file_handler *fh) { } #endif #endif /* _LINUX_LUO_INTERNAL_H */ diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c index 783677295640..a3327a28fc1f 100644 --- a/kernel/liveupdate/luo_session.c +++ b/kernel/liveupdate/luo_session.c @@ -544,7 +544,8 @@ int luo_session_deserialize(void) session = luo_session_alloc(sh->ser[i].name); if (IS_ERR(session)) { - pr_warn("Failed to allocate session [%s] during deserialization %pe\n", + pr_warn("Failed to allocate session [%.*s] during deserialization %pe\n", + (int)sizeof(sh->ser[i].name), sh->ser[i].name, session); return PTR_ERR(session); } @@ -558,8 +559,13 @@ int luo_session_deserialize(void) } scoped_guard(mutex, &session->mutex) { - luo_file_deserialize(&session->file_set, - &sh->ser[i].file_set_ser); + err = luo_file_deserialize(&session->file_set, + &sh->ser[i].file_set_ser); + } + if (err) { + pr_warn("Failed to deserialize files for session [%s] %pe\n", + session->name, ERR_PTR(err)); + return err; } } @@ -601,46 +607,3 @@ err_undo: return err; } -/** - * luo_session_quiesce - Ensure no active sessions exist and lock session lists. - * - * Acquires exclusive write locks on both incoming and outgoing session lists. - * It then validates no sessions exist in either list. - * - * This mechanism is used during file handler un/registration to ensure that no - * sessions are currently using the handler, and no new sessions can be created - * while un/registration is in progress. - * - * This prevents registering new handlers while sessions are active or - * while deserialization is in progress. - * - * Return: - * true - System is quiescent (0 sessions) and locked. - * false - Active sessions exist. The locks are released internally. - */ -bool luo_session_quiesce(void) -{ - down_write(&luo_session_global.incoming.rwsem); - down_write(&luo_session_global.outgoing.rwsem); - - if (luo_session_global.incoming.count || - luo_session_global.outgoing.count) { - up_write(&luo_session_global.outgoing.rwsem); - up_write(&luo_session_global.incoming.rwsem); - return false; - } - - return true; -} - -/** - * luo_session_resume - Unlock session lists and resume normal activity. - * - * Releases the exclusive locks acquired by a successful call to - * luo_session_quiesce(). - */ -void luo_session_resume(void) -{ - up_write(&luo_session_global.outgoing.rwsem); - up_write(&luo_session_global.incoming.rwsem); -} |
