summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86_64/mm/numa.c1
-rw-r--r--arch/x86_64/mm/srat.c76
-rw-r--r--drivers/acpi/numa.c11
-rw-r--r--include/acpi/acpi_numa.h1
-rw-r--r--include/asm-x86_64/acpi.h11
-rw-r--r--include/linux/acpi.h3
6 files changed, 96 insertions, 7 deletions
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 51548947ad3b..30bf8043984d 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -484,6 +484,7 @@ out:
nodes[i].end >> PAGE_SHIFT);
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
+ acpi_fake_nodes(nodes, num_nodes);
numa_init_array();
return 0;
}
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 0e0725db20b7..7ac8ff333e84 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -350,7 +350,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
/* Sanity check to catch more bad SRATs (they are amazingly common).
Make sure the PXMs cover all memory. */
-static int nodes_cover_memory(void)
+static int __init nodes_cover_memory(const struct bootnode *nodes)
{
int i;
unsigned long pxmram, e820ram;
@@ -406,7 +406,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
}
}
- if (!nodes_cover_memory()) {
+ if (!nodes_cover_memory(nodes)) {
bad_srat();
return -1;
}
@@ -440,6 +440,75 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return 0;
}
+#ifdef CONFIG_NUMA_EMU
+static int __init find_node_by_addr(unsigned long addr)
+{
+ int ret = NUMA_NO_NODE;
+ int i;
+
+ for_each_node_mask(i, nodes_parsed) {
+ /*
+ * Find the real node that this emulated node appears on. For
+ * the sake of simplicity, we only use a real node's starting
+ * address to determine which emulated node it appears on.
+ */
+ if (addr >= nodes[i].start && addr < nodes[i].end) {
+ ret = i;
+ break;
+ }
+ }
+ return i;
+}
+
+/*
+ * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
+ * mappings that respect the real ACPI topology but reflect our emulated
+ * environment. For each emulated node, we find which real node it appears on
+ * and create PXM to NID mappings for those fake nodes which mirror that
+ * locality. SLIT will now represent the correct distances between emulated
+ * nodes as a result of the real topology.
+ */
+void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
+{
+ int i;
+ int fake_node_to_pxm_map[MAX_NUMNODES] = {
+ [0 ... MAX_NUMNODES-1] = PXM_INVAL
+ };
+
+ printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
+ "topology.\n");
+ for (i = 0; i < num_nodes; i++) {
+ int nid, pxm;
+
+ nid = find_node_by_addr(fake_nodes[i].start);
+ if (nid == NUMA_NO_NODE)
+ continue;
+ pxm = node_to_pxm(nid);
+ if (pxm == PXM_INVAL)
+ continue;
+ fake_node_to_pxm_map[i] = pxm;
+ }
+ for (i = 0; i < num_nodes; i++)
+ __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
+
+ nodes_clear(nodes_parsed);
+ for (i = 0; i < num_nodes; i++)
+ if (fake_nodes[i].start != fake_nodes[i].end)
+ node_set(i, nodes_parsed);
+ WARN_ON(!nodes_cover_memory(fake_nodes));
+}
+
+static int null_slit_node_compare(int a, int b)
+{
+ return node_to_pxm(a) == node_to_pxm(b);
+}
+#else
+static int null_slit_node_compare(int a, int b)
+{
+ return a == b;
+}
+#endif /* CONFIG_NUMA_EMU */
+
void __init srat_reserve_add_area(int nodeid)
{
if (found_add_area && nodes_add[nodeid].end) {
@@ -464,7 +533,8 @@ int __node_distance(int a, int b)
int index;
if (!acpi_slit)
- return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+ return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
+ REMOTE_DISTANCE;
index = acpi_slit->locality_count * node_to_pxm(a);
return acpi_slit->entry[index + node_to_pxm(b)];
}
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 6c44b522f4d3..ab04d848b19d 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -36,8 +36,6 @@
ACPI_MODULE_NAME("numa");
static nodemask_t nodes_found_map = NODE_MASK_NONE;
-#define PXM_INVAL -1
-#define NID_INVAL -1
/* maps to convert between proximity domain and logical node ID */
static int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS]
@@ -59,6 +57,12 @@ int node_to_pxm(int node)
return node_to_pxm_map[node];
}
+void __acpi_map_pxm_to_node(int pxm, int node)
+{
+ pxm_to_node_map[pxm] = node;
+ node_to_pxm_map[node] = pxm;
+}
+
int acpi_map_pxm_to_node(int pxm)
{
int node = pxm_to_node_map[pxm];
@@ -67,8 +71,7 @@ int acpi_map_pxm_to_node(int pxm)
if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
return NID_INVAL;
node = first_unset_node(nodes_found_map);
- pxm_to_node_map[pxm] = node;
- node_to_pxm_map[node] = pxm;
+ __acpi_map_pxm_to_node(pxm, node);
node_set(node, nodes_found_map);
}
diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index e2fcee2b340d..62c5ee4311da 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -13,6 +13,7 @@
extern int pxm_to_node(int);
extern int node_to_pxm(int);
+extern void __acpi_map_pxm_to_node(int, int);
extern int acpi_map_pxm_to_node(int);
extern void __cpuinit acpi_unmap_pxm_to_node(int);
diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h
index a29f05087a31..1da8f49c0fe2 100644
--- a/include/asm-x86_64/acpi.h
+++ b/include/asm-x86_64/acpi.h
@@ -29,6 +29,7 @@
#ifdef __KERNEL__
#include <acpi/pdc_intel.h>
+#include <asm/numa.h>
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -141,6 +142,16 @@ extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
+#ifdef CONFIG_ACPI_NUMA
+extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
+ int num_nodes);
+#else
+static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
+ int num_nodes)
+{
+}
+#endif
+
#endif /*__KERNEL__*/
#endif /*_ASM_ACPI_H*/
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e88b62e6b3aa..d5680cd7746a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -231,6 +231,9 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size);
extern int pnpacpi_disabled;
+#define PXM_INVAL (-1)
+#define NID_INVAL (-1)
+
#else /* CONFIG_ACPI */
static inline int acpi_boot_init(void)