diff options
author | Hugh Dickins <hughd@google.com> | 2017-09-05 12:05:01 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2018-01-05 15:46:32 +0100 |
commit | ac2f1018ac210cfedcfab82dbafbda4e2db7ed08 (patch) | |
tree | 4d73a20ff84a99331693931a0e3d60c33f5cc352 /arch/x86/include/asm/pgtable_64.h | |
parent | 8f0baadf2bea3861217763734b57e1dd2db703dd (diff) |
kaiser: do not set _PAGE_NX on pgd_none
native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must
avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry:
usually that just generated a warning on exit, but sometimes
more mysterious and damaging failures (our production machines
could not complete booting).
The original fix to this just avoided adding _PAGE_NX to
an empty entry; but eventually more problems surfaced with kexec,
and EFI mapping expected to be a problem too. So now instead
change native_set_pgd() to update shadow only if _PAGE_USER:
A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure)
use set_pgd() to set up a temporary internal virtual address space, with
physical pages remapped at what Kaiser regards as userspace addresses:
Kaiser then assumes a shadow pgd follows, which it will try to corrupt.
This appears to be responsible for the recent kexec and kdump failures;
though it's unclear how those did not manifest as a problem before.
Ah, the shadow pgd will only be assumed to "follow" if the requested
pgd is on an even-numbered page: so I suppose it was going wrong 50%
of the time all along.
What we need is a flag to set_pgd(), to tell it we're dealing with
userspace. Er, isn't that what the pgd's _PAGE_USER bit is saying?
Add a test for that. But we cannot do the same for pgd_clear()
(which may be called to clear corrupted entries - set aside the
question of "corrupt in which pgd?" until later), so there just
rely on pgd_clear() not being called in the problematic cases -
with a WARN_ON_ONCE() which should fire half the time if it is.
But this is getting too big for an inline function: move it into
arch/x86/mm/kaiser.c (which then demands a boot/compressed mod);
and de-void and de-space native_get_shadow/normal_pgd() while here.
Also make an unnecessary change to KASLR's init_trampoline(): it was
using set_pgd() to assign a pgd-value to a global variable (not in a
pg directory page), which was rather scary given Kaiser's previous
set_pgd() implementation: not a problem now, but too scary to leave
as was, it could easily blow up if we have to change set_pgd() again.
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch/x86/include/asm/pgtable_64.h')
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 51 |
1 files changed, 13 insertions, 38 deletions
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 000265c8b74e..177caf3f7ab1 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_t *pud) } #ifdef CONFIG_KAISER -static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) +extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) { - return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE); + return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); } -static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) +static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) { - return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE); + return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); } #else -static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) +static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) { BUILD_BUG_ON(1); return NULL; } -static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) +static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) { return pgdp; } #endif /* CONFIG_KAISER */ -/* - * Page table pages are page-aligned. The lower half of the top - * level is used for userspace and the top half for the kernel. - * This returns true for user pages that need to get copied into - * both the user and kernel copies of the page tables, and false - * for kernel pages that should only be in the kernel copy. - */ -static inline bool is_userspace_pgd(void *__ptr) -{ - unsigned long ptr = (unsigned long)__ptr; - - return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2)); -} - static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { -#ifdef CONFIG_KAISER - pteval_t extra_kern_pgd_flags = 0; - /* Do we need to also populate the shadow pgd? */ - if (is_userspace_pgd(pgdp)) { - native_get_shadow_pgd(pgdp)->pgd = pgd.pgd; - /* - * Even if the entry is *mapping* userspace, ensure - * that userspace can not use it. This way, if we - * get out to userspace running on the kernel CR3, - * userspace will crash instead of running. - */ - extra_kern_pgd_flags = _PAGE_NX; - } - pgdp->pgd = pgd.pgd; - pgdp->pgd |= extra_kern_pgd_flags; -#else /* CONFIG_KAISER */ - *pgdp = pgd; -#endif + *pgdp = kaiser_set_shadow_pgd(pgdp, pgd); } static inline void native_pgd_clear(pgd_t *pgd) |