diff options
| author | Mike Rapoport (Microsoft) <rppt@kernel.org> | 2025-11-01 10:23:18 -0400 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-11-27 14:24:32 -0800 |
| commit | 70f9133096c833922c3b63461480248cefa7bb0f (patch) | |
| tree | 6771ec1f0c950e656f539733d361eab63481b83e /kernel | |
| parent | 03d3963464a43654703938a66503cd686c5fc54e (diff) | |
kho: drop notifiers
The KHO framework uses a notifier chain as the mechanism for clients to
participate in the finalization process. While this works for a single,
central state machine, it is too restrictive for kernel-internal
components like pstore/reserve_mem or IMA. These components need a
simpler, direct way to register their state for preservation (e.g., during
their initcall) without being part of a complex, shutdown-time notifier
sequence. The notifier model forces all participants into a single
finalization flow and makes direct preservation from an arbitrary context
difficult. This patch refactors the client participation model by
removing the notifier chain and introducing a direct API for managing FDT
subtrees.
The core kho_finalize() and kho_abort() state machine remains, but clients
now register their data with KHO beforehand.
Link: https://lkml.kernel.org/r/20251101142325.1326536-3-pasha.tatashin@soleen.com
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Co-developed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Simon Horman <horms@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/kexec_handover.c | 166 | ||||
| -rw-r--r-- | kernel/kexec_handover_debugfs.c | 17 | ||||
| -rw-r--r-- | kernel/kexec_handover_internal.h | 5 |
3 files changed, 104 insertions, 84 deletions
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index befa6ceab574..3dd917bfedcc 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -16,7 +16,6 @@ #include <linux/libfdt.h> #include <linux/list.h> #include <linux/memblock.h> -#include <linux/notifier.h> #include <linux/page-isolation.h> #include <linux/vmalloc.h> @@ -103,29 +102,34 @@ struct kho_mem_track { struct khoser_mem_chunk; -struct kho_serialization { - struct page *fdt; - struct kho_mem_track track; - /* First chunk of serialized preserved memory map */ - struct khoser_mem_chunk *preserved_mem_map; +struct kho_sub_fdt { + struct list_head l; + const char *name; + void *fdt; }; struct kho_out { - struct blocking_notifier_head chain_head; - struct mutex lock; /* protects KHO FDT finalization */ - struct kho_serialization ser; + void *fdt; bool finalized; + struct mutex lock; /* protects KHO FDT finalization */ + + struct list_head sub_fdts; + struct mutex fdts_lock; + + struct kho_mem_track track; + /* First chunk of serialized preserved memory map */ + struct khoser_mem_chunk *preserved_mem_map; + struct kho_debugfs dbg; }; static struct kho_out kho_out = { - .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head), .lock = __MUTEX_INITIALIZER(kho_out.lock), - .ser = { - .track = { - .orders = XARRAY_INIT(kho_out.ser.track.orders, 0), - }, + .track = { + .orders = XARRAY_INIT(kho_out.track.orders, 0), }, + .sub_fdts = LIST_HEAD_INIT(kho_out.sub_fdts), + .fdts_lock = __MUTEX_INITIALIZER(kho_out.fdts_lock), .finalized = false, }; @@ -369,7 +373,7 @@ static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk) } } -static int kho_mem_serialize(struct kho_serialization *ser) +static int kho_mem_serialize(struct kho_out *kho_out) { struct khoser_mem_chunk *first_chunk = NULL; struct khoser_mem_chunk *chunk = NULL; @@ -377,7 +381,7 @@ static int kho_mem_serialize(struct kho_serialization *ser) unsigned long order; int err = -ENOMEM; - xa_for_each(&ser->track.orders, order, physxa) { + xa_for_each(&kho_out->track.orders, order, physxa) { struct kho_mem_phys_bits *bits; unsigned long phys; @@ -409,7 +413,7 @@ static int kho_mem_serialize(struct kho_serialization *ser) } } - ser->preserved_mem_map = first_chunk; + kho_out->preserved_mem_map = first_chunk; return 0; @@ -670,7 +674,6 @@ err_disable_kho: /** * kho_add_subtree - record the physical address of a sub FDT in KHO root tree. - * @ser: serialization control object passed by KHO notifiers. * @name: name of the sub tree. * @fdt: the sub tree blob. * @@ -684,34 +687,41 @@ err_disable_kho: * * Return: 0 on success, error code on failure */ -int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt) +int kho_add_subtree(const char *name, void *fdt) { - int err = 0; - u64 phys = (u64)virt_to_phys(fdt); - void *root = page_to_virt(ser->fdt); + struct kho_sub_fdt *sub_fdt; - err |= fdt_begin_node(root, name); - err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys)); - err |= fdt_end_node(root); + sub_fdt = kmalloc(sizeof(*sub_fdt), GFP_KERNEL); + if (!sub_fdt) + return -ENOMEM; - if (err) - return err; + INIT_LIST_HEAD(&sub_fdt->l); + sub_fdt->name = name; + sub_fdt->fdt = fdt; - return kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false); -} -EXPORT_SYMBOL_GPL(kho_add_subtree); + guard(mutex)(&kho_out.fdts_lock); + list_add_tail(&sub_fdt->l, &kho_out.sub_fdts); + WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, name, fdt, false)); -int register_kho_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&kho_out.chain_head, nb); + return 0; } -EXPORT_SYMBOL_GPL(register_kho_notifier); +EXPORT_SYMBOL_GPL(kho_add_subtree); -int unregister_kho_notifier(struct notifier_block *nb) +void kho_remove_subtree(void *fdt) { - return blocking_notifier_chain_unregister(&kho_out.chain_head, nb); + struct kho_sub_fdt *sub_fdt; + + guard(mutex)(&kho_out.fdts_lock); + list_for_each_entry(sub_fdt, &kho_out.sub_fdts, l) { + if (sub_fdt->fdt == fdt) { + list_del(&sub_fdt->l); + kfree(sub_fdt); + kho_debugfs_fdt_remove(&kho_out.dbg, fdt); + break; + } + } } -EXPORT_SYMBOL_GPL(unregister_kho_notifier); +EXPORT_SYMBOL_GPL(kho_remove_subtree); /** * kho_preserve_folio - preserve a folio across kexec. @@ -726,7 +736,7 @@ int kho_preserve_folio(struct folio *folio) { const unsigned long pfn = folio_pfn(folio); const unsigned int order = folio_order(folio); - struct kho_mem_track *track = &kho_out.ser.track; + struct kho_mem_track *track = &kho_out.track; if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order))) return -EINVAL; @@ -747,7 +757,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_folio); */ int kho_preserve_pages(struct page *page, unsigned int nr_pages) { - struct kho_mem_track *track = &kho_out.ser.track; + struct kho_mem_track *track = &kho_out.track; const unsigned long start_pfn = page_to_pfn(page); const unsigned long end_pfn = start_pfn + nr_pages; unsigned long pfn = start_pfn; @@ -849,7 +859,7 @@ err_free: static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk, unsigned short order) { - struct kho_mem_track *track = &kho_out.ser.track; + struct kho_mem_track *track = &kho_out.track; unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); __kho_unpreserve(track, pfn, pfn + 1); @@ -1031,11 +1041,11 @@ EXPORT_SYMBOL_GPL(kho_restore_vmalloc); static int __kho_abort(void) { - int err; + int err = 0; unsigned long order; struct kho_mem_phys *physxa; - xa_for_each(&kho_out.ser.track.orders, order, physxa) { + xa_for_each(&kho_out.track.orders, order, physxa) { struct kho_mem_phys_bits *bits; unsigned long phys; @@ -1045,17 +1055,13 @@ static int __kho_abort(void) xa_destroy(&physxa->phys_bits); kfree(physxa); } - xa_destroy(&kho_out.ser.track.orders); + xa_destroy(&kho_out.track.orders); - if (kho_out.ser.preserved_mem_map) { - kho_mem_ser_free(kho_out.ser.preserved_mem_map); - kho_out.ser.preserved_mem_map = NULL; + if (kho_out.preserved_mem_map) { + kho_mem_ser_free(kho_out.preserved_mem_map); + kho_out.preserved_mem_map = NULL; } - err = blocking_notifier_call_chain(&kho_out.chain_head, KEXEC_KHO_ABORT, - NULL); - err = notifier_to_errno(err); - if (err) pr_err("Failed to abort KHO finalization: %d\n", err); @@ -1078,7 +1084,8 @@ int kho_abort(void) return ret; kho_out.finalized = false; - kho_debugfs_cleanup(&kho_out.dbg); + + kho_debugfs_fdt_remove(&kho_out.dbg, kho_out.fdt); return 0; } @@ -1087,41 +1094,46 @@ static int __kho_finalize(void) { int err = 0; u64 *preserved_mem_map; - void *fdt = page_to_virt(kho_out.ser.fdt); + void *root = kho_out.fdt; + struct kho_sub_fdt *fdt; - err |= fdt_create(fdt, PAGE_SIZE); - err |= fdt_finish_reservemap(fdt); - err |= fdt_begin_node(fdt, ""); - err |= fdt_property_string(fdt, "compatible", KHO_FDT_COMPATIBLE); + err |= fdt_create(root, PAGE_SIZE); + err |= fdt_finish_reservemap(root); + err |= fdt_begin_node(root, ""); + err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE); /** * Reserve the preserved-memory-map property in the root FDT, so * that all property definitions will precede subnodes created by * KHO callers. */ - err |= fdt_property_placeholder(fdt, PROP_PRESERVED_MEMORY_MAP, + err |= fdt_property_placeholder(root, PROP_PRESERVED_MEMORY_MAP, sizeof(*preserved_mem_map), (void **)&preserved_mem_map); if (err) goto abort; - err = kho_preserve_folio(page_folio(kho_out.ser.fdt)); + err = kho_preserve_folio(virt_to_folio(kho_out.fdt)); if (err) goto abort; - err = blocking_notifier_call_chain(&kho_out.chain_head, - KEXEC_KHO_FINALIZE, &kho_out.ser); - err = notifier_to_errno(err); + err = kho_mem_serialize(&kho_out); if (err) goto abort; - err = kho_mem_serialize(&kho_out.ser); - if (err) - goto abort; + *preserved_mem_map = (u64)virt_to_phys(kho_out.preserved_mem_map); + + mutex_lock(&kho_out.fdts_lock); + list_for_each_entry(fdt, &kho_out.sub_fdts, l) { + phys_addr_t phys = virt_to_phys(fdt->fdt); - *preserved_mem_map = (u64)virt_to_phys(kho_out.ser.preserved_mem_map); + err |= fdt_begin_node(root, fdt->name); + err |= fdt_property(root, PROP_SUB_FDT, &phys, sizeof(phys)); + err |= fdt_end_node(root); + } + mutex_unlock(&kho_out.fdts_lock); - err |= fdt_end_node(fdt); - err |= fdt_finish(fdt); + err |= fdt_end_node(root); + err |= fdt_finish(root); abort: if (err) { @@ -1149,8 +1161,10 @@ int kho_finalize(void) kho_out.finalized = true; - return kho_debugfs_fdt_add(&kho_out.dbg, "fdt", - page_to_virt(kho_out.ser.fdt), true); + WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt", + kho_out.fdt, true)); + + return 0; } bool kho_finalized(void) @@ -1233,15 +1247,17 @@ static __init int kho_init(void) { int err = 0; const void *fdt = kho_get_fdt(); + struct page *fdt_page; if (!kho_enable) return 0; - kho_out.ser.fdt = alloc_page(GFP_KERNEL); - if (!kho_out.ser.fdt) { + fdt_page = alloc_page(GFP_KERNEL); + if (!fdt_page) { err = -ENOMEM; goto err_free_scratch; } + kho_out.fdt = page_to_virt(fdt_page); err = kho_debugfs_init(); if (err) @@ -1269,8 +1285,8 @@ static __init int kho_init(void) return 0; err_free_fdt: - put_page(kho_out.ser.fdt); - kho_out.ser.fdt = NULL; + put_page(fdt_page); + kho_out.fdt = NULL; err_free_scratch: for (int i = 0; i < kho_scratch_cnt; i++) { void *start = __va(kho_scratch[i].addr); @@ -1281,7 +1297,7 @@ err_free_scratch: kho_enable = false; return err; } -late_initcall(kho_init); +fs_initcall(kho_init); static void __init kho_release_scratch(void) { @@ -1417,7 +1433,7 @@ int kho_fill_kimage(struct kimage *image) if (!kho_out.finalized) return 0; - image->kho.fdt = page_to_phys(kho_out.ser.fdt); + image->kho.fdt = virt_to_phys(kho_out.fdt); scratch_size = sizeof(*kho_scratch) * kho_scratch_cnt; scratch = (struct kexec_buf){ diff --git a/kernel/kexec_handover_debugfs.c b/kernel/kexec_handover_debugfs.c index a91b279f1b23..46e9e6c0791f 100644 --- a/kernel/kexec_handover_debugfs.c +++ b/kernel/kexec_handover_debugfs.c @@ -61,14 +61,17 @@ int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name, return __kho_debugfs_fdt_add(&dbg->fdt_list, dir, name, fdt); } -void kho_debugfs_cleanup(struct kho_debugfs *dbg) +void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt) { - struct fdt_debugfs *ff, *tmp; - - list_for_each_entry_safe(ff, tmp, &dbg->fdt_list, list) { - debugfs_remove(ff->file); - list_del(&ff->list); - kfree(ff); + struct fdt_debugfs *ff; + + list_for_each_entry(ff, &dbg->fdt_list, list) { + if (ff->wrapper.data == fdt) { + debugfs_remove(ff->file); + list_del(&ff->list); + kfree(ff); + break; + } } } diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h index 217b8b25a542..52ed73659fe6 100644 --- a/kernel/kexec_handover_internal.h +++ b/kernel/kexec_handover_internal.h @@ -32,7 +32,7 @@ void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt); int kho_out_debugfs_init(struct kho_debugfs *dbg); int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name, const void *fdt, bool root); -void kho_debugfs_cleanup(struct kho_debugfs *dbg); +void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, void *fdt); #else static inline int kho_debugfs_init(void) { return 0; } static inline void kho_in_debugfs_init(struct kho_debugfs *dbg, @@ -40,7 +40,8 @@ static inline void kho_in_debugfs_init(struct kho_debugfs *dbg, static inline int kho_out_debugfs_init(struct kho_debugfs *dbg) { return 0; } static inline int kho_debugfs_fdt_add(struct kho_debugfs *dbg, const char *name, const void *fdt, bool root) { return 0; } -static inline void kho_debugfs_cleanup(struct kho_debugfs *dbg) {} +static inline void kho_debugfs_fdt_remove(struct kho_debugfs *dbg, + void *fdt) { } #endif /* CONFIG_KEXEC_HANDOVER_DEBUGFS */ #ifdef CONFIG_KEXEC_HANDOVER_DEBUG |
