summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/kernel/efi.c7
-rw-r--r--arch/x86/Kconfig.debug9
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/include/asm/pgtable.h3
-rw-r--r--arch/x86/include/asm/pgtable_types.h2
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/mm/dump_pagetables.c84
-rw-r--r--arch/x86/mm/pageattr.c44
-rw-r--r--arch/x86/platform/efi/efi.c278
-rw-r--r--arch/x86/platform/efi/efi_32.c7
-rw-r--r--arch/x86/platform/efi/efi_64.c41
-rw-r--r--drivers/firmware/efi/efi.c5
-rw-r--r--drivers/firmware/efi/efivars.c2
-rw-r--r--fs/efivarfs/file.c13
-rw-r--r--include/linux/efi.h16
15 files changed, 357 insertions, 165 deletions
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index da5b462e6de6..741b99c1a0b1 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -477,6 +477,9 @@ efi_init (void)
char *cp, vendor[100] = "unknown";
int i;
+ set_bit(EFI_BOOT, &efi.flags);
+ set_bit(EFI_64BIT, &efi.flags);
+
/*
* It's too early to be able to use the standard kernel command line
* support...
@@ -529,6 +532,8 @@ efi_init (void)
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff, vendor);
+ set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
palo_phys = EFI_INVALID_TABLE_ADDR;
if (efi_config_init(arch_tables) != 0)
@@ -657,6 +662,8 @@ efi_enter_virtual_mode (void)
return;
}
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
/*
* Now that EFI is in virtual mode, we call the EFI functions more
* efficiently:
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 321a52ccf63a..61bd2ad94281 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -81,6 +81,15 @@ config X86_PTDUMP
kernel.
If in doubt, say "N"
+config EFI_PGT_DUMP
+ bool "Dump the EFI pagetable"
+ depends on EFI && X86_PTDUMP
+ ---help---
+ Enable this if you want to dump the EFI page table before
+ enabling virtual mode. This can be used to debug miscellaneous
+ issues with the mapping of the EFI runtime regions into that
+ table.
+
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
default y
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index acd86c850414..86d1fd4bf24c 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -119,7 +119,6 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
#endif /* CONFIG_X86_32 */
extern int add_efi_memmap;
-extern unsigned long x86_efi_facility;
extern struct efi_scratch efi_scratch;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
@@ -130,10 +129,12 @@ extern void efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
-extern void efi_setup_page_tables(void);
+extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
+extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
extern void __init old_map_region(efi_memory_desc_t *md);
extern void __init runtime_code_page_mkexec(void);
extern void __init efi_runtime_mkexec(void);
+extern void __init efi_dump_pagetable(void);
extern void __init efi_apply_memmap_quirks(void);
struct efi_setup_data {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5ad38ad07890..938ef1d0458e 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,9 +15,10 @@
: (prot))
#ifndef __ASSEMBLY__
-
#include <asm/x86_init.h>
+void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 1aa9ccd43223..94e40f1efdfd 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -385,6 +385,8 @@ extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags);
+void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
+ unsigned numpages);
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ce72964b2f46..fa511acff7e6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -926,11 +926,11 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL32", 4)) {
- set_bit(EFI_BOOT, &x86_efi_facility);
+ set_bit(EFI_BOOT, &efi.flags);
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL64", 4)) {
- set_bit(EFI_BOOT, &x86_efi_facility);
- set_bit(EFI_64BIT, &x86_efi_facility);
+ set_bit(EFI_BOOT, &efi.flags);
+ set_bit(EFI_64BIT, &efi.flags);
}
if (efi_enabled(EFI_BOOT))
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0002a3a33081..20621d753d5f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -30,6 +30,7 @@ struct pg_state {
unsigned long start_address;
unsigned long current_address;
const struct addr_marker *marker;
+ bool to_dmesg;
};
struct addr_marker {
@@ -88,10 +89,28 @@ static struct addr_marker address_markers[] = {
#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+#define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \
+({ \
+ if (to_dmesg) \
+ printk(KERN_INFO fmt, ##args); \
+ else \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \
+({ \
+ if (to_dmesg) \
+ printk(KERN_CONT fmt, ##args); \
+ else \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
/*
* Print a readable form of a pgprot_t to the seq_file
*/
-static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
+static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
{
pgprotval_t pr = pgprot_val(prot);
static const char * const level_name[] =
@@ -99,47 +118,47 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
if (!pgprot_val(prot)) {
/* Not present */
- seq_printf(m, " ");
+ pt_dump_cont_printf(m, dmsg, " ");
} else {
if (pr & _PAGE_USER)
- seq_printf(m, "USR ");
+ pt_dump_cont_printf(m, dmsg, "USR ");
else
- seq_printf(m, " ");
+ pt_dump_cont_printf(m, dmsg, " ");
if (pr & _PAGE_RW)
- seq_printf(m, "RW ");
+ pt_dump_cont_printf(m, dmsg, "RW ");
else
- seq_printf(m, "ro ");
+ pt_dump_cont_printf(m, dmsg, "ro ");
if (pr & _PAGE_PWT)
- seq_printf(m, "PWT ");
+ pt_dump_cont_printf(m, dmsg, "PWT ");
else
- seq_printf(m, " ");
+ pt_dump_cont_printf(m, dmsg, " ");
if (pr & _PAGE_PCD)
- seq_printf(m, "PCD ");
+ pt_dump_cont_printf(m, dmsg, "PCD ");
else
- seq_printf(m, " ");
+ pt_dump_cont_printf(m, dmsg, " ");
/* Bit 9 has a different meaning on level 3 vs 4 */
if (level <= 3) {
if (pr & _PAGE_PSE)
- seq_printf(m, "PSE ");
+ pt_dump_cont_printf(m, dmsg, "PSE ");
else
- seq_printf(m, " ");
+ pt_dump_cont_printf(m, dmsg, " ");
} else {
if (pr & _PAGE_PAT)
- seq_printf(m, "pat ");
+ pt_dump_cont_printf(m, dmsg, "pat ");
else
- seq_printf(m, " ");
+ pt_dump_cont_printf(m, dmsg, " ");
}
if (pr & _PAGE_GLOBAL)
- seq_printf(m, "GLB ");
+ pt_dump_cont_printf(m, dmsg, "GLB ");
else
- seq_printf(m, " ");
+ pt_dump_cont_printf(m, dmsg, " ");
if (pr & _PAGE_NX)
- seq_printf(m, "NX ");
+ pt_dump_cont_printf(m, dmsg, "NX ");
else
- seq_printf(m, "x ");
+ pt_dump_cont_printf(m, dmsg, "x ");
}
- seq_printf(m, "%s\n", level_name[level]);
+ pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
}
/*
@@ -178,7 +197,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
st->current_prot = new_prot;
st->level = level;
st->marker = address_markers;
- seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
+ st->marker->name);
} else if (prot != cur || level != st->level ||
st->current_address >= st->marker[1].start_address) {
const char *unit = units;
@@ -188,17 +208,17 @@ static void note_page(struct seq_file *m, struct pg_state *st,
/*
* Now print the actual finished series
*/
- seq_printf(m, "0x%0*lx-0x%0*lx ",
- width, st->start_address,
- width, st->current_address);
+ pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ",
+ width, st->start_address,
+ width, st->current_address);
delta = (st->current_address - st->start_address) >> 10;
while (!(delta & 1023) && unit[1]) {
delta >>= 10;
unit++;
}
- seq_printf(m, "%9lu%c ", delta, *unit);
- printk_prot(m, st->current_prot, st->level);
+ pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit);
+ printk_prot(m, st->current_prot, st->level, st->to_dmesg);
/*
* We print markers for special areas of address space,
@@ -207,7 +227,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
*/
if (st->current_address >= st->marker[1].start_address) {
st->marker++;
- seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
+ st->marker->name);
}
st->start_address = st->current_address;
@@ -296,7 +317,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif
-static void walk_pgd_level(struct seq_file *m)
+void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
#ifdef CONFIG_X86_64
pgd_t *start = (pgd_t *) &init_level4_pgt;
@@ -304,9 +325,12 @@ static void walk_pgd_level(struct seq_file *m)
pgd_t *start = swapper_pg_dir;
#endif
int i;
- struct pg_state st;
+ struct pg_state st = {};
- memset(&st, 0, sizeof(st));
+ if (pgd) {
+ start = pgd;
+ st.to_dmesg = true;
+ }
for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
@@ -331,7 +355,7 @@ static void walk_pgd_level(struct seq_file *m)
static int ptdump_show(struct seq_file *m, void *v)
{
- walk_pgd_level(m);
+ ptdump_walk_pgd_level(m, NULL);
return 0;
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index b3b19f46c016..a3488689e301 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -692,6 +692,18 @@ static bool try_to_free_pmd_page(pmd_t *pmd)
return true;
}
+static bool try_to_free_pud_page(pud_t *pud)
+{
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++)
+ if (!pud_none(pud[i]))
+ return false;
+
+ free_page((unsigned long)pud);
+ return true;
+}
+
static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
{
pte_t *pte = pte_offset_kernel(pmd, start);
@@ -805,6 +817,16 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
*/
}
+static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
+{
+ pgd_t *pgd_entry = root + pgd_index(addr);
+
+ unmap_pud_range(pgd_entry, addr, end);
+
+ if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
+ pgd_clear(pgd_entry);
+}
+
static int alloc_pte_page(pmd_t *pmd)
{
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
@@ -999,9 +1021,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
{
pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
- bool allocd_pgd = false;
- pgd_t *pgd_entry;
pud_t *pud = NULL; /* shut up gcc */
+ pgd_t *pgd_entry;
int ret;
pgd_entry = cpa->pgd + pgd_index(addr);
@@ -1015,7 +1036,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
return -1;
set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
- allocd_pgd = true;
}
pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
@@ -1023,19 +1043,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
ret = populate_pud(cpa, addr, pgd_entry, pgprot);
if (ret < 0) {
- unmap_pud_range(pgd_entry, addr,
+ unmap_pgd_range(cpa->pgd, addr,
addr + (cpa->numpages << PAGE_SHIFT));
-
- if (allocd_pgd) {
- /*
- * If I allocated this PUD page, I can just as well
- * free it in this error path.
- */
- pgd_clear(pgd_entry);
- free_page((unsigned long)pud);
- }
return ret;
}
+
cpa->numpages = ret;
return 0;
}
@@ -1861,6 +1873,12 @@ out:
return retval;
}
+void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
+ unsigned numpages)
+{
+ unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
+}
+
/*
* The testcases use internal knowledge of the implementation that shouldn't
* be exposed to the rest of the kernel. Include these directly here.
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index b97acecf3fd9..45d4f7674678 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -68,9 +68,7 @@ struct efi_memory_map memmap;
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
-unsigned long x86_efi_facility;
-
-static __initdata efi_config_table_type_t arch_tables[] = {
+static efi_config_table_type_t arch_tables[] __initdata = {
#ifdef CONFIG_X86_UV
{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
#endif
@@ -79,16 +77,7 @@ static __initdata efi_config_table_type_t arch_tables[] = {
u64 efi_setup; /* efi setup_data physical address */
-/*
- * Returns 1 if 'facility' is enabled, 0 otherwise.
- */
-int efi_enabled(int facility)
-{
- return test_bit(facility, &x86_efi_facility) != 0;
-}
-EXPORT_SYMBOL(efi_enabled);
-
-static bool __initdata disable_runtime = false;
+static bool disable_runtime __initdata = false;
static int __init setup_noefi(char *arg)
{
disable_runtime = true;
@@ -275,9 +264,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
int efi_set_rtc_mmss(const struct timespec *now)
{
unsigned long nowtime = now->tv_sec;
- efi_status_t status;
- efi_time_t eft;
- efi_time_cap_t cap;
+ efi_status_t status;
+ efi_time_t eft;
+ efi_time_cap_t cap;
struct rtc_time tm;
status = efi.get_time(&eft, &cap);
@@ -295,9 +284,8 @@ int efi_set_rtc_mmss(const struct timespec *now)
eft.second = tm.tm_sec;
eft.nanosecond = 0;
} else {
- printk(KERN_ERR
- "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n",
- __FUNCTION__, nowtime);
+ pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n",
+ __func__, nowtime);
return -1;
}
@@ -413,8 +401,7 @@ static void __init print_efi_memmap(void)
p < memmap.map_end;
p += memmap.desc_size, i++) {
md = p;
- pr_info("mem%02u: type=%u, attr=0x%llx, "
- "range=[0x%016llx-0x%016llx) (%lluMB)\n",
+ pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n",
i, md->type, md->attribute, md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
@@ -446,9 +433,8 @@ void __init efi_reserve_boot_services(void)
memblock_is_region_reserved(start, size)) {
/* Could not reserve, skip it */
md->num_pages = 0;
- memblock_dbg("Could not reserve boot range "
- "[0x%010llx-0x%010llx]\n",
- start, start+size-1);
+ memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n",
+ start, start+size-1);
} else
memblock_reserve(start, size);
}
@@ -456,7 +442,7 @@ void __init efi_reserve_boot_services(void)
void __init efi_unmap_memmap(void)
{
- clear_bit(EFI_MEMMAP, &x86_efi_facility);
+ clear_bit(EFI_MEMMAP, &efi.flags);
if (memmap.map) {
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
@@ -584,11 +570,12 @@ static int __init efi_systab_init(void *phys)
return -EINVAL;
}
if ((efi.systab->hdr.revision >> 16) == 0)
- pr_err("Warning: System table version "
- "%d.%02d, expected 1.00 or greater!\n",
+ pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n",
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff);
+ set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
return 0;
}
@@ -624,6 +611,8 @@ static int __init efi_runtime_init(void)
efi.get_time = phys_efi_get_time;
early_iounmap(runtime, sizeof(efi_runtime_services_t));
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
return 0;
}
@@ -641,6 +630,8 @@ static int __init efi_memmap_init(void)
if (add_efi_memmap)
do_add_efi_memmap();
+ set_bit(EFI_MEMMAP, &efi.flags);
+
return 0;
}
@@ -723,7 +714,7 @@ void __init efi_init(void)
if (efi_systab_init(efi_phys.systab))
return;
- set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
+ set_bit(EFI_SYSTEM_TABLES, &efi.flags);
efi.config_table = (unsigned long)efi.systab->tables;
efi.fw_vendor = (unsigned long)efi.systab->fw_vendor;
@@ -751,8 +742,6 @@ void __init efi_init(void)
if (efi_config_init(arch_tables))
return;
- set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
-
/*
* Note: We currently don't support runtime services on an EFI
* that doesn't match the kernel 32/64-bit mode.
@@ -763,12 +752,11 @@ void __init efi_init(void)
else {
if (disable_runtime || efi_runtime_init())
return;
- set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
}
if (efi_memmap_init())
return;
- set_bit(EFI_MEMMAP, &x86_efi_facility);
+ set_bit(EFI_MEMMAP, &efi.flags);
print_efi_memmap();
}
@@ -892,8 +880,9 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md)
}
}
-static int __init save_runtime_map(void)
+static void __init save_runtime_map(void)
{
+#ifdef CONFIG_KEXEC
efi_memory_desc_t *md;
void *tmp, *p, *q = NULL;
int count = 0;
@@ -915,38 +904,44 @@ static int __init save_runtime_map(void)
}
efi_runtime_map_setup(q, count, memmap.desc_size);
+ return;
- return 0;
out:
kfree(q);
- return -ENOMEM;
+ pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
+#endif
}
-/*
- * Map efi regions which were passed via setup_data. The virt_addr is a fixed
- * addr which was used in first kernel of a kexec boot.
- */
-static void __init efi_map_regions_fixed(void)
+static void *realloc_pages(void *old_memmap, int old_shift)
{
- void *p;
- efi_memory_desc_t *md;
+ void *ret;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- efi_map_region_fixed(md); /* FIXME: add error handling */
- get_systab_virt_addr(md);
- }
+ ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
+ if (!ret)
+ goto out;
+ /*
+ * A first-time allocation doesn't have anything to copy.
+ */
+ if (!old_memmap)
+ return ret;
+
+ memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
+
+out:
+ free_pages((unsigned long)old_memmap, old_shift);
+ return ret;
}
/*
- * Map efi memory ranges for runtime serivce and update new_memmap with virtual
- * addresses.
+ * Map the efi memory ranges of the runtime services and update new_mmap with
+ * virtual addresses.
*/
-static void * __init efi_map_regions(int *count)
+static void * __init efi_map_regions(int *count, int *pg_shift)
{
+ void *p, *new_memmap = NULL;
+ unsigned long left = 0;
efi_memory_desc_t *md;
- void *p, *tmp, *new_memmap = NULL;
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
@@ -961,20 +956,89 @@ static void * __init efi_map_regions(int *count)
efi_map_region(md);
get_systab_virt_addr(md);
- tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
- GFP_KERNEL);
- if (!tmp)
- goto out;
- new_memmap = tmp;
+ if (left < memmap.desc_size) {
+ new_memmap = realloc_pages(new_memmap, *pg_shift);
+ if (!new_memmap)
+ return NULL;
+
+ left += PAGE_SIZE << *pg_shift;
+ (*pg_shift)++;
+ }
+
memcpy(new_memmap + (*count * memmap.desc_size), md,
memmap.desc_size);
+
+ left -= memmap.desc_size;
(*count)++;
}
return new_memmap;
-out:
- kfree(new_memmap);
- return NULL;
+}
+
+static void __init kexec_enter_virtual_mode(void)
+{
+#ifdef CONFIG_KEXEC
+ efi_memory_desc_t *md;
+ void *p;
+
+ efi.systab = NULL;
+
+ /*
+ * We don't do virtual mode, since we don't do runtime services, on
+ * non-native EFI
+ */
+ if (!efi_is_native()) {
+ efi_unmap_memmap();
+ return;
+ }
+
+ /*
+ * Map efi regions which were passed via setup_data. The virt_addr is a
+ * fixed addr which was used in first kernel of a kexec boot.
+ */
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+ efi_map_region_fixed(md); /* FIXME: add error handling */
+ get_systab_virt_addr(md);
+ }
+
+ save_runtime_map();
+
+ BUG_ON(!efi.systab);
+
+ efi_sync_low_kernel_mappings();
+
+ /*
+ * Now that EFI is in virtual mode, update the function
+ * pointers in the runtime service table to the new virtual addresses.
+ *
+ * Call EFI services through wrapper functions.
+ */
+ efi.runtime_version = efi_systab.hdr.revision;
+ efi.get_time = virt_efi_get_time;
+ efi.set_time = virt_efi_set_time;
+ efi.get_wakeup_time = virt_efi_get_wakeup_time;
+ efi.set_wakeup_time = virt_efi_set_wakeup_time;
+ efi.get_variable = virt_efi_get_variable;
+ efi.get_next_variable = virt_efi_get_next_variable;
+ efi.set_variable = virt_efi_set_variable;
+ efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
+ efi.reset_system = virt_efi_reset_system;
+ efi.set_virtual_address_map = NULL;
+ efi.query_variable_info = virt_efi_query_variable_info;
+ efi.update_capsule = virt_efi_update_capsule;
+ efi.query_capsule_caps = virt_efi_query_capsule_caps;
+
+ if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
+ runtime_code_page_mkexec();
+
+ /* clean DUMMY object */
+ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS,
+ 0, NULL);
+#endif
}
/*
@@ -996,13 +1060,14 @@ out:
*
* Specially for kexec boot, efi runtime maps in previous kernel should
* be passed in via setup_data. In that case runtime ranges will be mapped
- * to the same virtual addresses as the first kernel.
+ * to the same virtual addresses as the first kernel, see
+ * kexec_enter_virtual_mode().
*/
-void __init efi_enter_virtual_mode(void)
+static void __init __efi_enter_virtual_mode(void)
{
- efi_status_t status;
+ int count = 0, pg_shift = 0;
void *new_memmap = NULL;
- int err, count = 0;
+ efi_status_t status;
efi.systab = NULL;
@@ -1015,38 +1080,33 @@ void __init efi_enter_virtual_mode(void)
return;
}
- if (efi_setup) {
- efi_map_regions_fixed();
- } else {
- efi_merge_regions();
- new_memmap = efi_map_regions(&count);
- if (!new_memmap) {
- pr_err("Error reallocating memory, EFI runtime non-functional!\n");
- return;
- }
+ efi_merge_regions();
+ new_memmap = efi_map_regions(&count, &pg_shift);
+ if (!new_memmap) {
+ pr_err("Error reallocating memory, EFI runtime non-functional!\n");
+ return;
}
- err = save_runtime_map();
- if (err)
- pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
+ save_runtime_map();
BUG_ON(!efi.systab);
- efi_setup_page_tables();
+ if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
+ return;
+
efi_sync_low_kernel_mappings();
+ efi_dump_pagetable();
- if (!efi_setup) {
- status = phys_efi_set_virtual_address_map(
+ status = phys_efi_set_virtual_address_map(
memmap.desc_size * count,
memmap.desc_size,
memmap.desc_version,
(efi_memory_desc_t *)__pa(new_memmap));
- if (status != EFI_SUCCESS) {
- pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
- status);
- panic("EFI call to SetVirtualAddressMap() failed!");
- }
+ if (status != EFI_SUCCESS) {
+ pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
+ status);
+ panic("EFI call to SetVirtualAddressMap() failed!");
}
/*
@@ -1072,7 +1132,33 @@ void __init efi_enter_virtual_mode(void)
efi_runtime_mkexec();
- kfree(new_memmap);
+ /*
+ * We mapped the descriptor array into the EFI pagetable above but we're
+ * not unmapping it here. Here's why:
+ *
+ * We're copying select PGDs from the kernel page table to the EFI page
+ * table and when we do so and make changes to those PGDs like unmapping
+ * stuff from them, those changes appear in the kernel page table and we
+ * go boom.
+ *
+ * From setup_real_mode():
+ *
+ * ...
+ * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+ *
+ * In this particular case, our allocation is in PGD 0 of the EFI page
+ * table but we've copied that PGD from PGD[272] of the EFI page table:
+ *
+ * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
+ *
+ * where the direct memory mapping in kernel space is.
+ *
+ * new_memmap's VA comes from that direct mapping and thus clearing it,
+ * it would get cleared in the kernel page table too.
+ *
+ * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
+ */
+ free_pages((unsigned long)new_memmap, pg_shift);
/* clean DUMMY object */
efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
@@ -1082,6 +1168,14 @@ void __init efi_enter_virtual_mode(void)
0, NULL);
}
+void __init efi_enter_virtual_mode(void)
+{
+ if (efi_setup)
+ kexec_enter_virtual_mode();
+ else
+ __efi_enter_virtual_mode();
+}
+
/*
* Convenience functions to obtain memory types and attributes
*/
@@ -1119,9 +1213,8 @@ u64 efi_mem_attributes(unsigned long phys_addr)
}
/*
- * Some firmware has serious problems when using more than 50% of the EFI
- * variable store, i.e. it triggers bugs that can brick machines. Ensure that
- * we never use more than this safe limit.
+ * Some firmware implementations refuse to boot if there's insufficient space
+ * in the variable store. Ensure that we never use more than a safe limit.
*
* Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
* store.
@@ -1140,10 +1233,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
return status;
/*
- * Some firmware implementations refuse to boot if there's insufficient
- * space in the variable store. We account for that by refusing the
- * write if permitting it would reduce the available space to under
- * 5KB. This figure was provided by Samsung, so should be safe.
+ * We account for that by refusing the write if permitting it would
+ * reduce the available space to under 5KB. This figure was provided by
+ * Samsung, so should be safe.
*/
if ((remaining_size - size < EFI_MIN_RESERVE) &&
!efi_no_storage_paranoia) {
@@ -1206,7 +1298,7 @@ static int __init parse_efi_cmdline(char *str)
str++;
if (!strncmp(str, "old_map", 7))
- set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
+ set_bit(EFI_OLD_MEMMAP, &efi.flags);
return 0;
}
@@ -1228,5 +1320,5 @@ void __init efi_apply_memmap_quirks(void)
* UV doesn't support the new EFI pagetable mapping yet.
*/
if (is_uv_system())
- set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
+ set_bit(EFI_OLD_MEMMAP, &efi.flags);
}
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 0b74cdf7f816..9ee3491e31fb 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -40,7 +40,12 @@
static unsigned long efi_rt_eflags;
void efi_sync_low_kernel_mappings(void) {}
-void efi_setup_page_tables(void) {}
+void __init efi_dump_pagetable(void) {}
+int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+{
+ return 0;
+}
+void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {}
void __init efi_map_region(efi_memory_desc_t *md)
{
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 0c2a234fef1e..19280900ec25 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -137,12 +137,38 @@ void efi_sync_low_kernel_mappings(void)
sizeof(pgd_t) * num_pgds);
}
-void efi_setup_page_tables(void)
+int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
+ pgd_t *pgd;
+
+ if (efi_enabled(EFI_OLD_MEMMAP))
+ return 0;
+
efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
+ pgd = __va(efi_scratch.efi_pgt);
- if (!efi_enabled(EFI_OLD_MEMMAP))
- efi_scratch.use_pgd = true;
+ /*
+ * It can happen that the physical address of new_memmap lands in memory
+ * which is not mapped in the EFI page table. Therefore we need to go
+ * and ident-map those pages containing the map before calling
+ * phys_efi_set_virtual_address_map().
+ */
+ if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) {
+ pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
+ return 1;
+ }
+
+ efi_scratch.use_pgd = true;
+
+
+ return 0;
+}
+
+void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+{
+ pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+
+ kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
}
static void __init __map_region(efi_memory_desc_t *md, u64 va)
@@ -242,3 +268,12 @@ void __init efi_runtime_mkexec(void)
if (__supported_pte_mask & _PAGE_NX)
runtime_code_page_mkexec();
}
+
+void __init efi_dump_pagetable(void)
+{
+#ifdef CONFIG_EFI_PGT_DUMP
+ pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+
+ ptdump_walk_pgd_level(NULL, pgd);
+#endif
+}
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 4753bac65279..af20f1712337 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -233,7 +233,7 @@ static __initdata efi_config_table_type_t common_tables[] = {
{SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab},
{SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios},
{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
- {NULL_GUID, NULL, 0},
+ {NULL_GUID, NULL, NULL},
};
static __init int match_config_table(efi_guid_t *guid,
@@ -313,5 +313,8 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables)
}
pr_cont("\n");
early_iounmap(config_tables, efi.systab->nr_tables * sz);
+
+ set_bit(EFI_CONFIG_TABLES, &efi.flags);
+
return 0;
}
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 3dc248239197..50ea412a25e6 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -227,7 +227,7 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
memcpy(&entry->var, new_var, count);
err = efivar_entry_set(entry, new_var->Attributes,
- new_var->DataSize, new_var->Data, false);
+ new_var->DataSize, new_var->Data, NULL);
if (err) {
printk(KERN_WARNING "efivars: set_variable() failed: status=%d\n", err);
return -EIO;
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 8dd524f32284..cdb2971192a5 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -21,7 +21,7 @@ static ssize_t efivarfs_file_write(struct file *file,
u32 attributes;
struct inode *inode = file->f_mapping->host;
unsigned long datasize = count - sizeof(attributes);
- ssize_t bytes = 0;
+ ssize_t bytes;
bool set = false;
if (count < sizeof(attributes))
@@ -33,14 +33,9 @@ static ssize_t efivarfs_file_write(struct file *file,
if (attributes & ~(EFI_VARIABLE_MASK))
return -EINVAL;
- data = kmalloc(datasize, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) {
- bytes = -EFAULT;
- goto out;
- }
+ data = memdup_user(userbuf + sizeof(attributes), datasize);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
bytes = efivar_entry_set_get_size(var, attributes, &datasize,
data, &set);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0a819e7a60c9..64d532ca890a 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -573,6 +573,7 @@ extern struct efi {
efi_reset_system_t *reset_system;
efi_set_virtual_address_map_t *set_virtual_address_map;
struct efi_memory_map *memmap;
+ unsigned long flags;
} efi;
static inline int
@@ -659,18 +660,17 @@ extern int __init efi_setup_pcdp_console(char *);
#define EFI_ARCH_1 6 /* First arch-specific bit */
#ifdef CONFIG_EFI
-# ifdef CONFIG_X86
-extern int efi_enabled(int facility);
-# else
-static inline int efi_enabled(int facility)
+/*
+ * Test whether the above EFI_* bits are enabled.
+ */
+static inline bool efi_enabled(int feature)
{
- return 1;
+ return test_bit(feature, &efi.flags) != 0;
}
-# endif
#else
-static inline int efi_enabled(int facility)
+static inline bool efi_enabled(int feature)
{
- return 0;
+ return false;
}
#endif