From 1aece821004f67f46ef4db7199bbeca87cf22bdd Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 25 Nov 2025 11:58:32 -0500 Subject: liveupdate: luo_core: integrate with KHO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integrate the LUO with the KHO framework to enable passing LUO state across a kexec reboot. This patch implements the lifecycle integration with KHO: 1. Incoming State: During early boot (`early_initcall`), LUO checks if KHO is active. If so, it retrieves the "LUO" subtree, verifies the "luo-v1" compatibility string, and reads the `liveupdate-number` to track the update count. 2. Outgoing State: During late initialization (`late_initcall`), LUO allocates a new FDT for the next kernel, populates it with the basic header (compatible string and incremented update number), and registers it with KHO (`kho_add_subtree`). 3. Finalization: The `liveupdate_reboot()` notifier is updated to invoke `kho_finalize()`. This ensures that all memory segments marked for preservation are properly serialized before the kexec jump. Link: https://lkml.kernel.org/r/20251125165850.3389713-3-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Reviewed-by: Pratyush Yadav Tested-by: David Matlack Reviewed-by: Mike Rapoport (Microsoft) Cc: Aleksander Lobakin Cc: Alexander Graf Cc: Alice Ryhl Cc: Andriy Shevchenko Cc: anish kumar Cc: Anna Schumaker Cc: Bartosz Golaszewski Cc: Bjorn Helgaas Cc: Borislav Betkov Cc: Chanwoo Choi Cc: Chen Ridong Cc: Chris Li Cc: Christian Brauner Cc: Daniel Wagner Cc: Danilo Krummrich Cc: Dan Williams Cc: David Hildenbrand Cc: David Jeffery Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Guixin Liu Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ilpo Järvinen Cc: Ingo Molnar Cc: Ira Weiny Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Joanthan Cameron Cc: Joel Granados Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lennart Poettering Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Lukas Wunner Cc: Marc Rutland Cc: Masahiro Yamada Cc: Matthew Maurer Cc: Miguel Ojeda Cc: Myugnjoo Ham Cc: Parav Pandit Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Roman Gushchin Cc: Saeed Mahameed Cc: Samiullah Khawaja Cc: Song Liu Cc: Steven Rostedt Cc: Stuart Hayes Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Weißschuh Cc: Vincent Guittot Cc: William Tu Cc: Yoann Congal Cc: Zhu Yanjun Cc: Zijun Hu Signed-off-by: Andrew Morton --- include/linux/kho/abi/luo.h | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 include/linux/kho/abi/luo.h (limited to 'include/linux/kho') diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h new file mode 100644 index 000000000000..2099b51929e5 --- /dev/null +++ b/include/linux/kho/abi/luo.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin + */ + +/** + * DOC: Live Update Orchestrator ABI + * + * This header defines the stable Application Binary Interface used by the + * Live Update Orchestrator to pass state from a pre-update kernel to a + * post-update kernel. The ABI is built upon the Kexec HandOver framework + * and uses a Flattened Device Tree to describe the preserved data. + * + * This interface is a contract. Any modification to the FDT structure, node + * properties, compatible strings, or the layout of the `__packed` serialization + * structures defined here constitutes a breaking change. Such changes require + * incrementing the version number in the relevant `_COMPATIBLE` string to + * prevent a new kernel from misinterpreting data from an old kernel. + * + * Changes are allowed provided the compatibility version is incremented; + * however, backward/forward compatibility is only guaranteed for kernels + * supporting the same ABI version. + * + * FDT Structure Overview: + * The entire LUO state is encapsulated within a single KHO entry named "LUO". + * This entry contains an FDT with the following layout: + * + * .. code-block:: none + * + * / { + * compatible = "luo-v1"; + * liveupdate-number = <...>; + * }; + * + * Main LUO Node (/): + * + * - compatible: "luo-v1" + * Identifies the overall LUO ABI version. + * - liveupdate-number: u64 + * A counter tracking the number of successful live updates performed. + */ + +#ifndef _LINUX_KHO_ABI_LUO_H +#define _LINUX_KHO_ABI_LUO_H + +/* + * The LUO FDT hooks all LUO state for sessions, fds, etc. + * In the root it also carries "liveupdate-number" 64-bit property that + * corresponds to the number of live-updates performed on this machine. + */ +#define LUO_FDT_SIZE PAGE_SIZE +#define LUO_FDT_KHO_ENTRY_NAME "LUO" +#define LUO_FDT_COMPATIBLE "luo-v1" +#define LUO_FDT_LIVEUPDATE_NUM "liveupdate-number" + +#endif /* _LINUX_KHO_ABI_LUO_H */ -- cgit v1.2.3 From 0153094d03df5a2e834a19c59b255649a258ae46 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 25 Nov 2025 11:58:34 -0500 Subject: liveupdate: luo_session: add sessions support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce concept of "Live Update Sessions" within the LUO framework. LUO sessions provide a mechanism to group and manage `struct file *` instances (representing file descriptors) that need to be preserved across a kexec-based live update. Each session is identified by a unique name and acts as a container for file objects whose state is critical to a userspace workload, such as a virtual machine or a high-performance database, aiming to maintain their functionality across a kernel transition. This groundwork establishes the framework for preserving file-backed state across kernel updates, with the actual file data preservation mechanisms to be implemented in subsequent patches. [dan.carpenter@linaro.org: fix use after free in luo_session_deserialize()] Link: https://lkml.kernel.org/r/c5dd637d7eed3a3be48c5e9fedb881596a3b1f5a.1764163896.git.dan.carpenter@linaro.org Link: https://lkml.kernel.org/r/20251125165850.3389713-5-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Signed-off-by: Dan Carpenter Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Tested-by: David Matlack Cc: Aleksander Lobakin Cc: Alexander Graf Cc: Alice Ryhl Cc: Andriy Shevchenko Cc: anish kumar Cc: Anna Schumaker Cc: Bartosz Golaszewski Cc: Bjorn Helgaas Cc: Borislav Betkov Cc: Chanwoo Choi Cc: Chen Ridong Cc: Chris Li Cc: Christian Brauner Cc: Daniel Wagner Cc: Danilo Krummrich Cc: Dan Williams Cc: David Hildenbrand Cc: David Jeffery Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Guixin Liu Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ilpo Järvinen Cc: Ingo Molnar Cc: Ira Weiny Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Joanthan Cameron Cc: Joel Granados Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lennart Poettering Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Lukas Wunner Cc: Marc Rutland Cc: Masahiro Yamada Cc: Matthew Maurer Cc: Miguel Ojeda Cc: Myugnjoo Ham Cc: Parav Pandit Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Roman Gushchin Cc: Saeed Mahameed Cc: Samiullah Khawaja Cc: Song Liu Cc: Steven Rostedt Cc: Stuart Hayes Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Weißschuh Cc: Vincent Guittot Cc: William Tu Cc: Yoann Congal Cc: Zhu Yanjun Cc: Zijun Hu Signed-off-by: Andrew Morton --- include/linux/kho/abi/luo.h | 71 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'include/linux/kho') diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h index 2099b51929e5..bf1ab2910959 100644 --- a/include/linux/kho/abi/luo.h +++ b/include/linux/kho/abi/luo.h @@ -32,6 +32,11 @@ * / { * compatible = "luo-v1"; * liveupdate-number = <...>; + * + * luo-session { + * compatible = "luo-session-v1"; + * luo-session-header = ; + * }; * }; * * Main LUO Node (/): @@ -40,11 +45,37 @@ * Identifies the overall LUO ABI version. * - liveupdate-number: u64 * A counter tracking the number of successful live updates performed. + * + * Session Node (luo-session): + * This node describes all preserved user-space sessions. + * + * - compatible: "luo-session-v1" + * Identifies the session ABI version. + * - luo-session-header: u64 + * The physical address of a `struct luo_session_header_ser`. This structure + * is the header for a contiguous block of memory containing an array of + * `struct luo_session_ser`, one for each preserved session. + * + * Serialization Structures: + * The FDT properties point to memory regions containing arrays of simple, + * `__packed` structures. These structures contain the actual preserved state. + * + * - struct luo_session_header_ser: + * Header for the session array. Contains the total page count of the + * preserved memory block and the number of `struct luo_session_ser` + * entries that follow. + * + * - struct luo_session_ser: + * Metadata for a single session, including its name and a physical pointer + * to another preserved memory block containing an array of + * `struct luo_file_ser` for all files in that session. */ #ifndef _LINUX_KHO_ABI_LUO_H #define _LINUX_KHO_ABI_LUO_H +#include + /* * The LUO FDT hooks all LUO state for sessions, fds, etc. * In the root it also carries "liveupdate-number" 64-bit property that @@ -55,4 +86,44 @@ #define LUO_FDT_COMPATIBLE "luo-v1" #define LUO_FDT_LIVEUPDATE_NUM "liveupdate-number" +/* + * LUO FDT session node + * LUO_FDT_SESSION_HEADER: is a u64 physical address of struct + * luo_session_header_ser + */ +#define LUO_FDT_SESSION_NODE_NAME "luo-session" +#define LUO_FDT_SESSION_COMPATIBLE "luo-session-v1" +#define LUO_FDT_SESSION_HEADER "luo-session-header" + +/** + * struct luo_session_header_ser - Header for the serialized session data block. + * @count: The number of `struct luo_session_ser` entries that immediately + * follow this header in the memory block. + * + * This structure is located at the beginning of a contiguous block of + * physical memory preserved across the kexec. It provides the necessary + * metadata to interpret the array of session entries that follow. + * + * If this structure is modified, `LUO_FDT_SESSION_COMPATIBLE` must be updated. + */ +struct luo_session_header_ser { + u64 count; +} __packed; + +/** + * struct luo_session_ser - Represents the serialized metadata for a LUO session. + * @name: The unique name of the session, provided by the userspace at + * the time of session creation. + * + * This structure is used to package session-specific metadata for transfer + * between kernels via Kexec Handover. An array of these structures (one per + * session) is created and passed to the new kernel, allowing it to reconstruct + * the session context. + * + * If this structure is modified, `LUO_FDT_SESSION_COMPATIBLE` must be updated. + */ +struct luo_session_ser { + char name[LIVEUPDATE_SESSION_NAME_LENGTH]; +} __packed; + #endif /* _LINUX_KHO_ABI_LUO_H */ -- cgit v1.2.3 From 7c722a7f44e0c1f9714084152226bc7bd644b7e3 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 25 Nov 2025 11:58:36 -0500 Subject: liveupdate: luo_file: implement file systems callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements the core mechanism for managing preserved files throughout the live update lifecycle. It provides the logic to invoke the file handler callbacks (preserve, unpreserve, freeze, unfreeze, retrieve, and finish) at the appropriate stages. During the reboot phase, luo_file_freeze() serializes the final metadata for each file (handler compatible string, token, and data handle) into a memory region preserved by KHO. In the new kernel, luo_file_deserialize() reconstructs the in-memory file list from this data, preparing the session for retrieval. Link: https://lkml.kernel.org/r/20251125165850.3389713-7-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Tested-by: David Matlack Cc: Aleksander Lobakin Cc: Alexander Graf Cc: Alice Ryhl Cc: Andriy Shevchenko Cc: anish kumar Cc: Anna Schumaker Cc: Bartosz Golaszewski Cc: Bjorn Helgaas Cc: Borislav Betkov Cc: Chanwoo Choi Cc: Chen Ridong Cc: Chris Li Cc: Christian Brauner Cc: Daniel Wagner Cc: Danilo Krummrich Cc: Dan Williams Cc: David Hildenbrand Cc: David Jeffery Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Guixin Liu Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ilpo Järvinen Cc: Ingo Molnar Cc: Ira Weiny Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Joanthan Cameron Cc: Joel Granados Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lennart Poettering Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Lukas Wunner Cc: Marc Rutland Cc: Masahiro Yamada Cc: Matthew Maurer Cc: Miguel Ojeda Cc: Myugnjoo Ham Cc: Parav Pandit Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Roman Gushchin Cc: Saeed Mahameed Cc: Samiullah Khawaja Cc: Song Liu Cc: Steven Rostedt Cc: Stuart Hayes Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Weißschuh Cc: Vincent Guittot Cc: William Tu Cc: Yoann Congal Cc: Zhu Yanjun Cc: Zijun Hu Signed-off-by: Andrew Morton --- include/linux/kho/abi/luo.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux/kho') diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h index bf1ab2910959..bb099c92e469 100644 --- a/include/linux/kho/abi/luo.h +++ b/include/linux/kho/abi/luo.h @@ -69,6 +69,11 @@ * Metadata for a single session, including its name and a physical pointer * to another preserved memory block containing an array of * `struct luo_file_ser` for all files in that session. + * + * - struct luo_file_ser: + * Metadata for a single preserved file. Contains the `compatible` string to + * find the correct handler in the new kernel, a user-provided `token` for + * identification, and an opaque `data` handle for the handler to use. */ #ifndef _LINUX_KHO_ABI_LUO_H @@ -86,13 +91,43 @@ #define LUO_FDT_COMPATIBLE "luo-v1" #define LUO_FDT_LIVEUPDATE_NUM "liveupdate-number" +#define LIVEUPDATE_HNDL_COMPAT_LENGTH 48 + +/** + * struct luo_file_ser - Represents the serialized preserves files. + * @compatible: File handler compatible string. + * @data: Private data + * @token: User provided token for this file + * + * If this structure is modified, LUO_SESSION_COMPATIBLE must be updated. + */ +struct luo_file_ser { + char compatible[LIVEUPDATE_HNDL_COMPAT_LENGTH]; + u64 data; + u64 token; +} __packed; + +/** + * struct luo_file_set_ser - Represents the serialized metadata for file set + * @files: The physical address of a contiguous memory block that holds + * the serialized state of files (array of luo_file_ser) in this file + * set. + * @count: The total number of files that were part of this session during + * serialization. Used for iteration and validation during + * restoration. + */ +struct luo_file_set_ser { + u64 files; + u64 count; +} __packed; + /* * LUO FDT session node * LUO_FDT_SESSION_HEADER: is a u64 physical address of struct * luo_session_header_ser */ #define LUO_FDT_SESSION_NODE_NAME "luo-session" -#define LUO_FDT_SESSION_COMPATIBLE "luo-session-v1" +#define LUO_FDT_SESSION_COMPATIBLE "luo-session-v2" #define LUO_FDT_SESSION_HEADER "luo-session-header" /** @@ -114,6 +149,7 @@ struct luo_session_header_ser { * struct luo_session_ser - Represents the serialized metadata for a LUO session. * @name: The unique name of the session, provided by the userspace at * the time of session creation. + * @file_set_ser: Serialized files belonging to this session, * * This structure is used to package session-specific metadata for transfer * between kernels via Kexec Handover. An array of these structures (one per @@ -124,6 +160,7 @@ struct luo_session_header_ser { */ struct luo_session_ser { char name[LIVEUPDATE_SESSION_NAME_LENGTH]; + struct luo_file_set_ser file_set_ser; } __packed; #endif /* _LINUX_KHO_ABI_LUO_H */ -- cgit v1.2.3 From b3749f174d686627f702234e64bad976dc432dbc Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Tue, 25 Nov 2025 11:58:44 -0500 Subject: mm: memfd_luo: allow preserving memfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ability to preserve a memfd allows userspace to use KHO and LUO to transfer its memory contents to the next kernel. This is useful in many ways. For one, it can be used with IOMMUFD as the backing store for IOMMU page tables. Preserving IOMMUFD is essential for performing a hypervisor live update with passthrough devices. memfd support provides the first building block for making that possible. For another, applications with a large amount of memory that takes time to reconstruct, reboots to consume kernel upgrades can be very expensive. memfd with LUO gives those applications reboot-persistent memory that they can use to quickly save and reconstruct that state. While memfd is backed by either hugetlbfs or shmem, currently only support on shmem is added. To be more precise, support for anonymous shmem files is added. The handover to the next kernel is not transparent. All the properties of the file are not preserved; only its memory contents, position, and size. The recreated file gets the UID and GID of the task doing the restore, and the task's cgroup gets charged with the memory. Once preserved, the file cannot grow or shrink, and all its pages are pinned to avoid migrations and swapping. The file can still be read from or written to. Use vmalloc to get the buffer to hold the folios, and preserve it using kho_preserve_vmalloc(). This doesn't have the size limit. Link: https://lkml.kernel.org/r/20251125165850.3389713-15-pasha.tatashin@soleen.com Signed-off-by: Pratyush Yadav Co-developed-by: Pasha Tatashin Signed-off-by: Pasha Tatashin Reviewed-by: Mike Rapoport (Microsoft) Tested-by: David Matlack Cc: Aleksander Lobakin Cc: Alexander Graf Cc: Alice Ryhl Cc: Andriy Shevchenko Cc: anish kumar Cc: Anna Schumaker Cc: Bartosz Golaszewski Cc: Bjorn Helgaas Cc: Borislav Betkov Cc: Chanwoo Choi Cc: Chen Ridong Cc: Chris Li Cc: Christian Brauner Cc: Daniel Wagner Cc: Danilo Krummrich Cc: Dan Williams Cc: David Hildenbrand Cc: David Jeffery Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Guixin Liu Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ilpo Järvinen Cc: Ingo Molnar Cc: Ira Weiny Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Joanthan Cameron Cc: Joel Granados Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lennart Poettering Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Lukas Wunner Cc: Marc Rutland Cc: Masahiro Yamada Cc: Matthew Maurer Cc: Miguel Ojeda Cc: Myugnjoo Ham Cc: Parav Pandit Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Roman Gushchin Cc: Saeed Mahameed Cc: Samiullah Khawaja Cc: Song Liu Cc: Steven Rostedt Cc: Stuart Hayes Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Weißschuh Cc: Vincent Guittot Cc: William Tu Cc: Yoann Congal Cc: Zhu Yanjun Cc: Zijun Hu Signed-off-by: Andrew Morton --- include/linux/kho/abi/memfd.h | 77 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 include/linux/kho/abi/memfd.h (limited to 'include/linux/kho') diff --git a/include/linux/kho/abi/memfd.h b/include/linux/kho/abi/memfd.h new file mode 100644 index 000000000000..da7d063474a1 --- /dev/null +++ b/include/linux/kho/abi/memfd.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin + * + * Copyright (C) 2025 Amazon.com Inc. or its affiliates. + * Pratyush Yadav + */ + +#ifndef _LINUX_KHO_ABI_MEMFD_H +#define _LINUX_KHO_ABI_MEMFD_H + +#include +#include + +/** + * DOC: memfd Live Update ABI + * + * This header defines the ABI for preserving the state of a memfd across a + * kexec reboot using the LUO. + * + * The state is serialized into a packed structure `struct memfd_luo_ser` + * which is handed over to the next kernel via the KHO mechanism. + * + * This interface is a contract. Any modification to the structure layout + * constitutes a breaking change. Such changes require incrementing the + * version number in the MEMFD_LUO_FH_COMPATIBLE string. + */ + +/** + * MEMFD_LUO_FOLIO_DIRTY - The folio is dirty. + * + * This flag indicates the folio contains data from user. A non-dirty folio is + * one that was allocated (say using fallocate(2)) but not written to. + */ +#define MEMFD_LUO_FOLIO_DIRTY BIT(0) + +/** + * MEMFD_LUO_FOLIO_UPTODATE - The folio is up-to-date. + * + * An up-to-date folio has been zeroed out. shmem zeroes out folios on first + * use. This flag tracks which folios need zeroing. + */ +#define MEMFD_LUO_FOLIO_UPTODATE BIT(1) + +/** + * struct memfd_luo_folio_ser - Serialized state of a single folio. + * @pfn: The page frame number of the folio. + * @flags: Flags to describe the state of the folio. + * @index: The page offset (pgoff_t) of the folio within the original file. + */ +struct memfd_luo_folio_ser { + u64 pfn:52; + u64 flags:12; + u64 index; +} __packed; + +/** + * struct memfd_luo_ser - Main serialization structure for a memfd. + * @pos: The file's current position (f_pos). + * @size: The total size of the file in bytes (i_size). + * @nr_folios: Number of folios in the folios array. + * @folios: KHO vmalloc descriptor pointing to the array of + * struct memfd_luo_folio_ser. + */ +struct memfd_luo_ser { + u64 pos; + u64 size; + u64 nr_folios; + struct kho_vmalloc folios; +} __packed; + +/* The compatibility string for memfd file handler */ +#define MEMFD_LUO_FH_COMPATIBLE "memfd-v1" + +#endif /* _LINUX_KHO_ABI_MEMFD_H */ -- cgit v1.2.3