summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2025-11-20 03:19:17 +0000
committerDave Jiang <dave.jiang@intel.com>2026-01-16 09:02:36 -0700
commitbc62f5b308cbdedf29132fe96e9d591e526527e1 (patch)
tree56c7ebfbe7d47906e747b4cc601cb17b3f7feb38
parent29317f8dc6ed601ec54575689c2cd55cc470bcce (diff)
dax/hmem, e820, resource: Defer Soft Reserved insertion until hmem is ready
Insert Soft Reserved memory into a dedicated soft_reserve_resource tree instead of the iomem_resource tree at boot. Delay publishing these ranges into the iomem hierarchy until ownership is resolved and the HMEM path is ready to consume them. Publishing Soft Reserved ranges into iomem too early conflicts with CXL hotplug and prevents region assembly when those ranges overlap CXL windows. Follow up patches will reinsert Soft Reserved ranges into iomem after CXL window publication is complete and HMEM is ready to claim the memory. This provides a cleaner handoff between EFI-defined memory ranges and CXL resource management without trimming or deleting resources later. In the meantime "Soft Reserved" resources will no longer appear in /proc/iomem, only their results. I.e. with "memmap=4G%4G+0xefffffff" Before: 100000000-1ffffffff : Soft Reserved 100000000-1ffffffff : dax1.0 100000000-1ffffffff : System RAM (kmem) After: 100000000-1ffffffff : dax1.0 100000000-1ffffffff : System RAM (kmem) The expectation is that this does not lead to a user visible regression because the dax1.0 device is created in both instances. Co-developed-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> [Smita: incorporate feedback from x86 maintainer review] Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> Link: https://patch.msgid.link/20251120031925.87762-2-Smita.KoralahalliChannabasappa@amd.com [djbw: cleanups and clarifications] Link: https://lore.kernel.org/69443f707b025_1cee10022@dwillia2-mobl4.notmuch Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
-rw-r--r--arch/x86/kernel/e820.c15
-rw-r--r--drivers/dax/hmem/device.c3
-rw-r--r--drivers/dax/hmem/hmem.c5
-rw-r--r--include/linux/ioport.h5
-rw-r--r--kernel/resource.c71
5 files changed, 80 insertions, 19 deletions
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index b15b97d3cb52..69c050f50e18 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1151,11 +1151,18 @@ void __init e820__reserve_resources_late(void)
int i;
struct resource *res;
- res = e820_res;
- for (i = 0; i < e820_table->nr_entries; i++) {
- if (!res->parent && res->end)
+ for (i = 0, res = e820_res; i < e820_table->nr_entries; i++, res++) {
+ /* skip added or uninitialized resources */
+ if (res->parent || !res->end)
+ continue;
+
+ /* set aside soft-reserved resources for driver consideration */
+ if (res->desc == IORES_DESC_SOFT_RESERVED) {
+ insert_resource_expand_to_fit(&soft_reserve_resource, res);
+ } else {
+ /* publish the rest immediately */
insert_resource_expand_to_fit(&iomem_resource, res);
- res++;
+ }
}
/*
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index f9e1a76a04a9..56e3cbd181b5 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -83,8 +83,7 @@ static __init int hmem_register_one(struct resource *res, void *data)
static __init int hmem_init(void)
{
- walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
- IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
+ walk_soft_reserve_res(0, -1, NULL, hmem_register_one);
return 0;
}
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index c18451a37e4f..1cf7c2a0ee1c 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -73,11 +73,12 @@ static int hmem_register_device(struct device *host, int target_nid,
return 0;
}
- rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
- IORES_DESC_SOFT_RESERVED);
+ rc = region_intersects_soft_reserve(res->start, resource_size(res));
if (rc != REGION_INTERSECTS)
return 0;
+ /* TODO: Add Soft-Reserved memory back to iomem */
+
id = memregion_alloc(GFP_KERNEL);
if (id < 0) {
dev_err(host, "memregion allocation failure for %pr\n", res);
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 9afa30f9346f..95662b2fb458 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -232,6 +232,7 @@ struct resource_constraint {
/* PC/ISA/whatever - the normal PC address spaces: IO and memory */
extern struct resource ioport_resource;
extern struct resource iomem_resource;
+extern struct resource soft_reserve_resource;
extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
extern int request_resource(struct resource *root, struct resource *new);
@@ -418,6 +419,10 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
extern int
walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
void *arg, int (*func)(struct resource *, void *));
+extern int walk_soft_reserve_res(u64 start, u64 end, void *arg,
+ int (*func)(struct resource *, void *));
+extern int
+region_intersects_soft_reserve(resource_size_t start, size_t size);
struct resource *devm_request_free_mem_region(struct device *dev,
struct resource *base, unsigned long size);
diff --git a/kernel/resource.c b/kernel/resource.c
index e4e9bac12e6e..b40ac7615d55 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -48,6 +48,14 @@ struct resource iomem_resource = {
};
EXPORT_SYMBOL(iomem_resource);
+struct resource soft_reserve_resource = {
+ .name = "Soft Reserved",
+ .start = 0,
+ .end = -1,
+ .desc = IORES_DESC_SOFT_RESERVED,
+ .flags = IORESOURCE_MEM,
+};
+
static DEFINE_RWLOCK(resource_lock);
/*
@@ -321,13 +329,14 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
}
/**
- * find_next_iomem_res - Finds the lowest iomem resource that covers part of
- * [@start..@end].
+ * find_next_res - Finds the lowest resource that covers part of
+ * [@start..@end].
*
* If a resource is found, returns 0 and @*res is overwritten with the part
* of the resource that's within [@start..@end]; if none is found, returns
* -ENODEV. Returns -EINVAL for invalid parameters.
*
+ * @parent: resource tree root to search
* @start: start address of the resource searched for
* @end: end address of same resource
* @flags: flags which the resource must have
@@ -337,9 +346,9 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
* The caller must specify @start, @end, @flags, and @desc
* (which may be IORES_DESC_NONE).
*/
-static int find_next_iomem_res(resource_size_t start, resource_size_t end,
- unsigned long flags, unsigned long desc,
- struct resource *res)
+static int find_next_res(struct resource *parent, resource_size_t start,
+ resource_size_t end, unsigned long flags,
+ unsigned long desc, struct resource *res)
{
/* Skip children until we find a top level range that matches */
bool skip_children = true;
@@ -353,7 +362,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
read_lock(&resource_lock);
- for_each_resource(&iomem_resource, p, skip_children) {
+ for_each_resource(parent, p, skip_children) {
/* If we passed the resource we are looking for, stop */
if (p->start > end) {
p = NULL;
@@ -390,16 +399,23 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
return p ? 0 : -ENODEV;
}
-static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
- unsigned long flags, unsigned long desc,
- void *arg,
- int (*func)(struct resource *, void *))
+static int find_next_iomem_res(resource_size_t start, resource_size_t end,
+ unsigned long flags, unsigned long desc,
+ struct resource *res)
+{
+ return find_next_res(&iomem_resource, start, end, flags, desc, res);
+}
+
+static int walk_res_desc(struct resource *parent, resource_size_t start,
+ resource_size_t end, unsigned long flags,
+ unsigned long desc, void *arg,
+ int (*func)(struct resource *, void *))
{
struct resource res;
int ret = -EINVAL;
while (start < end &&
- !find_next_iomem_res(start, end, flags, desc, &res)) {
+ !find_next_res(parent, start, end, flags, desc, &res)) {
ret = (*func)(&res, arg);
if (ret)
break;
@@ -410,6 +426,15 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
return ret;
}
+static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
+ unsigned long flags, unsigned long desc,
+ void *arg,
+ int (*func)(struct resource *, void *))
+{
+ return walk_res_desc(&iomem_resource, start, end, flags, desc, arg, func);
+}
+
+
/**
* walk_iomem_res_desc - Walks through iomem resources and calls func()
* with matching resource ranges.
@@ -435,6 +460,18 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
/*
+ * In support of device drivers claiming Soft Reserved resources, walk the Soft
+ * Reserved resource deferral tree.
+ */
+int walk_soft_reserve_res(u64 start, u64 end, void *arg,
+ int (*func)(struct resource *, void *))
+{
+ return walk_res_desc(&soft_reserve_resource, start, end, IORESOURCE_MEM,
+ IORES_DESC_SOFT_RESERVED, arg, func);
+}
+EXPORT_SYMBOL_GPL(walk_soft_reserve_res);
+
+/*
* This function calls the @func callback against all memory ranges of type
* System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
* Now, this function is only for System RAM, it deals with full ranges and
@@ -656,6 +693,18 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
}
EXPORT_SYMBOL_GPL(region_intersects);
+/*
+ * Check if the provided range is registered in the Soft Reserved resource
+ * deferral tree for driver consideration.
+ */
+int region_intersects_soft_reserve(resource_size_t start, size_t size)
+{
+ guard(read_lock)(&resource_lock);
+ return __region_intersects(&soft_reserve_resource, start, size,
+ IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED);
+}
+EXPORT_SYMBOL_GPL(region_intersects_soft_reserve);
+
void __weak arch_remove_reservations(struct resource *avail)
{
}