summaryrefslogtreecommitdiff
path: root/arch/x86/boot
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/boot')
-rw-r--r--arch/x86/boot/bitops.h8
-rw-r--r--arch/x86/boot/boot.h18
-rw-r--r--arch/x86/boot/compressed/Makefile18
-rw-r--r--arch/x86/boot/compressed/eboot.c2
-rw-r--r--arch/x86/boot/compressed/kaslr.c251
-rw-r--r--arch/x86/boot/compressed/misc.c49
-rw-r--r--arch/x86/boot/compressed/misc.h25
-rw-r--r--arch/x86/boot/compressed/pagetable.c29
-rw-r--r--arch/x86/boot/cpu.c2
-rw-r--r--arch/x86/boot/cpucheck.c33
-rw-r--r--arch/x86/boot/cpuflags.c1
-rw-r--r--arch/x86/boot/cpuflags.h1
-rw-r--r--arch/x86/boot/string.c2
13 files changed, 243 insertions, 196 deletions
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 878e4b9940d9..0d41d68131cc 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -16,14 +16,16 @@
#define BOOT_BITOPS_H
#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
-static inline int constant_test_bit(int nr, const void *addr)
+#include <linux/types.h>
+
+static inline bool constant_test_bit(int nr, const void *addr)
{
const u32 *p = (const u32 *)addr;
return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
}
-static inline int variable_test_bit(int nr, const void *addr)
+static inline bool variable_test_bit(int nr, const void *addr)
{
- u8 v;
+ bool v;
const u32 *p = (const u32 *)addr;
asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 9011a88353de..e5612f3e3b57 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -24,6 +24,7 @@
#include <linux/types.h>
#include <linux/edd.h>
#include <asm/setup.h>
+#include <asm/asm.h>
#include "bitops.h"
#include "ctype.h"
#include "cpuflags.h"
@@ -176,18 +177,18 @@ static inline void wrgs32(u32 v, addr_t addr)
}
/* Note: these only return true/false, not a signed return value! */
-static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
+static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
{
- u8 diff;
- asm volatile("fs; repe; cmpsb; setnz %0"
- : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ bool diff;
+ asm volatile("fs; repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
-static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
+static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
{
- u8 diff;
- asm volatile("gs; repe; cmpsb; setnz %0"
- : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ bool diff;
+ asm volatile("gs; repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
@@ -294,6 +295,7 @@ static inline int cmdline_find_option_bool(const char *option)
/* cpu.c, cpucheck.c */
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
+int check_knl_erratum(void);
int validate_cpu(void);
/* early_serial_console.c */
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f1356889204e..536ccfcc01c6 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -85,7 +85,25 @@ vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
$(objtree)/drivers/firmware/efi/libstub/lib.a
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
+# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
+# can place it anywhere in memory and it will still run. However, since
+# it is executed as-is without any ELF relocation processing performed
+# (and has already had all relocation sections stripped from the binary),
+# none of the code can use data relocations (e.g. static assignments of
+# pointer values), since they will be meaningless at runtime. This check
+# will refuse to link the vmlinux if any of these relocations are found.
+quiet_cmd_check_data_rel = DATAREL $@
+define cmd_check_data_rel
+ for obj in $(filter %.o,$^); do \
+ readelf -S $$obj | grep -qF .rel.local && { \
+ echo "error: $$obj has data relocations!" >&2; \
+ exit 1; \
+ } || true; \
+ done
+endef
+
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
+ $(call if_changed,check_data_rel)
$(call if_changed,ld)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 52fef606bc54..ff574dad95cc 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -757,7 +757,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
struct boot_params *boot_params;
struct apm_bios_info *bi;
struct setup_header *hdr;
- struct efi_info *efi;
efi_loaded_image_t *image;
void *options, *handle;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
@@ -800,7 +799,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
memset(boot_params, 0x0, 0x4000);
hdr = &boot_params->hdr;
- efi = &boot_params->efi_info;
bi = &boot_params->apm_bios_info;
/* Copy the second sector to boot_params */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index cfeb0259ed81..a66854d99ee1 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -12,10 +12,6 @@
#include "misc.h"
#include "error.h"
-#include <asm/msr.h>
-#include <asm/archrandom.h>
-#include <asm/e820.h>
-
#include <generated/compile.h>
#include <linux/module.h>
#include <linux/uts.h>
@@ -26,26 +22,6 @@
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
-#define I8254_PORT_CONTROL 0x43
-#define I8254_PORT_COUNTER0 0x40
-#define I8254_CMD_READBACK 0xC0
-#define I8254_SELECT_COUNTER0 0x02
-#define I8254_STATUS_NOTREADY 0x40
-static inline u16 i8254(void)
-{
- u16 status, timer;
-
- do {
- outb(I8254_PORT_CONTROL,
- I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
- status = inb(I8254_PORT_COUNTER0);
- timer = inb(I8254_PORT_COUNTER0);
- timer |= inb(I8254_PORT_COUNTER0) << 8;
- } while (status & I8254_STATUS_NOTREADY);
-
- return timer;
-}
-
static unsigned long rotate_xor(unsigned long hash, const void *area,
size_t size)
{
@@ -62,7 +38,7 @@ static unsigned long rotate_xor(unsigned long hash, const void *area,
}
/* Attempt to create a simple but unpredictable starting entropy. */
-static unsigned long get_random_boot(void)
+static unsigned long get_boot_seed(void)
{
unsigned long hash = 0;
@@ -72,50 +48,8 @@ static unsigned long get_random_boot(void)
return hash;
}
-static unsigned long get_random_long(const char *purpose)
-{
-#ifdef CONFIG_X86_64
- const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
-#else
- const unsigned long mix_const = 0x3f39e593UL;
-#endif
- unsigned long raw, random = get_random_boot();
- bool use_i8254 = true;
-
- debug_putstr(purpose);
- debug_putstr(" KASLR using");
-
- if (has_cpuflag(X86_FEATURE_RDRAND)) {
- debug_putstr(" RDRAND");
- if (rdrand_long(&raw)) {
- random ^= raw;
- use_i8254 = false;
- }
- }
-
- if (has_cpuflag(X86_FEATURE_TSC)) {
- debug_putstr(" RDTSC");
- raw = rdtsc();
-
- random ^= raw;
- use_i8254 = false;
- }
-
- if (use_i8254) {
- debug_putstr(" i8254");
- random ^= i8254();
- }
-
- /* Circular multiply for better bit diffusion */
- asm("mul %3"
- : "=a" (random), "=d" (raw)
- : "a" (random), "rm" (mix_const));
- random += raw;
-
- debug_putstr("...\n");
-
- return random;
-}
+#define KASLR_COMPRESSED_BOOT
+#include "../../lib/kaslr.c"
struct mem_vector {
unsigned long start;
@@ -132,17 +66,6 @@ enum mem_avoid_index {
static struct mem_vector mem_avoid[MEM_AVOID_MAX];
-static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
-{
- /* Item at least partially before region. */
- if (item->start < region->start)
- return false;
- /* Item at least partially after region. */
- if (item->start + item->size > region->start + region->size)
- return false;
- return true;
-}
-
static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
{
/* Item one is entirely before item two. */
@@ -296,6 +219,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
if (mem_overlaps(img, &mem_avoid[i]) &&
mem_avoid[i].start < earliest) {
*overlap = mem_avoid[i];
+ earliest = overlap->start;
is_overlapping = true;
}
}
@@ -310,6 +234,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
*overlap = avoid;
+ earliest = overlap->start;
is_overlapping = true;
}
@@ -319,8 +244,6 @@ static bool mem_avoid_overlap(struct mem_vector *img,
return is_overlapping;
}
-static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN];
-
struct slot_area {
unsigned long addr;
int num;
@@ -351,36 +274,44 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
}
}
-static void slots_append(unsigned long addr)
-{
- /* Overflowing the slots list should be impossible. */
- if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN)
- return;
-
- slots[slot_max++] = addr;
-}
-
static unsigned long slots_fetch_random(void)
{
+ unsigned long slot;
+ int i;
+
/* Handle case of no slots stored. */
if (slot_max == 0)
return 0;
- return slots[get_random_long("Physical") % slot_max];
+ slot = kaslr_get_random_long("Physical") % slot_max;
+
+ for (i = 0; i < slot_area_index; i++) {
+ if (slot >= slot_areas[i].num) {
+ slot -= slot_areas[i].num;
+ continue;
+ }
+ return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
+ }
+
+ if (i == slot_area_index)
+ debug_putstr("slots_fetch_random() failed!?\n");
+ return 0;
}
static void process_e820_entry(struct e820entry *entry,
unsigned long minimum,
unsigned long image_size)
{
- struct mem_vector region, img, overlap;
+ struct mem_vector region, overlap;
+ struct slot_area slot_area;
+ unsigned long start_orig;
/* Skip non-RAM entries. */
if (entry->type != E820_RAM)
return;
- /* Ignore entries entirely above our maximum. */
- if (entry->addr >= KERNEL_IMAGE_SIZE)
+ /* On 32-bit, ignore entries entirely above our maximum. */
+ if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
return;
/* Ignore entries entirely below our minimum. */
@@ -390,31 +321,55 @@ static void process_e820_entry(struct e820entry *entry,
region.start = entry->addr;
region.size = entry->size;
- /* Potentially raise address to minimum location. */
- if (region.start < minimum)
- region.start = minimum;
+ /* Give up if slot area array is full. */
+ while (slot_area_index < MAX_SLOT_AREA) {
+ start_orig = region.start;
- /* Potentially raise address to meet alignment requirements. */
- region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+ /* Potentially raise address to minimum location. */
+ if (region.start < minimum)
+ region.start = minimum;
- /* Did we raise the address above the bounds of this e820 region? */
- if (region.start > entry->addr + entry->size)
- return;
+ /* Potentially raise address to meet alignment needs. */
+ region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
- /* Reduce size by any delta from the original address. */
- region.size -= region.start - entry->addr;
+ /* Did we raise the address above this e820 region? */
+ if (region.start > entry->addr + entry->size)
+ return;
- /* Reduce maximum size to fit end of image within maximum limit. */
- if (region.start + region.size > KERNEL_IMAGE_SIZE)
- region.size = KERNEL_IMAGE_SIZE - region.start;
+ /* Reduce size by any delta from the original address. */
+ region.size -= region.start - start_orig;
- /* Walk each aligned slot and check for avoided areas. */
- for (img.start = region.start, img.size = image_size ;
- mem_contains(&region, &img) ;
- img.start += CONFIG_PHYSICAL_ALIGN) {
- if (mem_avoid_overlap(&img, &overlap))
- continue;
- slots_append(img.start);
+ /* On 32-bit, reduce region size to fit within max size. */
+ if (IS_ENABLED(CONFIG_X86_32) &&
+ region.start + region.size > KERNEL_IMAGE_SIZE)
+ region.size = KERNEL_IMAGE_SIZE - region.start;
+
+ /* Return if region can't contain decompressed kernel */
+ if (region.size < image_size)
+ return;
+
+ /* If nothing overlaps, store the region and return. */
+ if (!mem_avoid_overlap(&region, &overlap)) {
+ store_slot_info(&region, image_size);
+ return;
+ }
+
+ /* Store beginning of region if holds at least image_size. */
+ if (overlap.start > region.start + image_size) {
+ struct mem_vector beginning;
+
+ beginning.start = region.start;
+ beginning.size = overlap.start - region.start;
+ store_slot_info(&beginning, image_size);
+ }
+
+ /* Return if overlap extends to or past end of region. */
+ if (overlap.start + overlap.size >= region.start + region.size)
+ return;
+
+ /* Clip off the overlapping region and start over. */
+ region.size -= overlap.start - region.start + overlap.size;
+ region.start = overlap.start + overlap.size;
}
}
@@ -431,6 +386,10 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
for (i = 0; i < boot_params->e820_entries; i++) {
process_e820_entry(&boot_params->e820_map[i], minimum,
image_size);
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted e820 scan (slot_areas full)!\n");
+ break;
+ }
}
return slots_fetch_random();
@@ -454,7 +413,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
CONFIG_PHYSICAL_ALIGN + 1;
- random_addr = get_random_long("Virtual") % slots;
+ random_addr = kaslr_get_random_long("Virtual") % slots;
return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
}
@@ -463,48 +422,54 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
* Since this function examines addresses much more numerically,
* it takes the input and output pointers as 'unsigned long'.
*/
-unsigned char *choose_random_location(unsigned long input,
- unsigned long input_size,
- unsigned long output,
- unsigned long output_size)
+void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr)
{
- unsigned long choice = output;
- unsigned long random_addr;
+ unsigned long random_addr, min_addr;
+
+ /* By default, keep output position unchanged. */
+ *virt_addr = *output;
-#ifdef CONFIG_HIBERNATION
- if (!cmdline_find_option_bool("kaslr")) {
- warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected).");
- goto out;
- }
-#else
if (cmdline_find_option_bool("nokaslr")) {
warn("KASLR disabled: 'nokaslr' on cmdline.");
- goto out;
+ return;
}
-#endif
boot_params->hdr.loadflags |= KASLR_FLAG;
+ /* Prepare to add new identity pagetables on demand. */
+ initialize_identity_maps();
+
/* Record the various known unsafe memory ranges. */
- mem_avoid_init(input, input_size, output);
+ mem_avoid_init(input, input_size, *output);
+
+ /*
+ * Low end of the randomization range should be the
+ * smaller of 512M or the initial kernel image
+ * location:
+ */
+ min_addr = min(*output, 512UL << 20);
/* Walk e820 and find a random address. */
- random_addr = find_random_phys_addr(output, output_size);
+ random_addr = find_random_phys_addr(min_addr, output_size);
if (!random_addr) {
warn("KASLR disabled: could not find suitable E820 region!");
- goto out;
+ } else {
+ /* Update the new physical address location. */
+ if (*output != random_addr) {
+ add_identity_map(random_addr, output_size);
+ *output = random_addr;
+ }
}
- /* Always enforce the minimum. */
- if (random_addr < choice)
- goto out;
-
- choice = random_addr;
-
- add_identity_map(choice, output_size);
-
/* This actually loads the identity pagetable on x86_64. */
finalize_identity_maps();
-out:
- return (unsigned char *)choice;
+
+ /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
+ if (IS_ENABLED(CONFIG_X86_64))
+ random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
+ *virt_addr = random_addr;
}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index f14db4e21654..b3c5a5f030ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -170,7 +170,8 @@ void __puthex(unsigned long value)
}
#if CONFIG_X86_NEED_RELOCS
-static void handle_relocations(void *output, unsigned long output_len)
+static void handle_relocations(void *output, unsigned long output_len,
+ unsigned long virt_addr)
{
int *reloc;
unsigned long delta, map, ptr;
@@ -182,11 +183,6 @@ static void handle_relocations(void *output, unsigned long output_len)
* and where it was actually loaded.
*/
delta = min_addr - LOAD_PHYSICAL_ADDR;
- if (!delta) {
- debug_putstr("No relocation needed... ");
- return;
- }
- debug_putstr("Performing relocations... ");
/*
* The kernel contains a table of relocation addresses. Those
@@ -198,6 +194,20 @@ static void handle_relocations(void *output, unsigned long output_len)
map = delta - __START_KERNEL_map;
/*
+ * 32-bit always performs relocations. 64-bit relocations are only
+ * needed if KASLR has chosen a different starting address offset
+ * from __START_KERNEL_map.
+ */
+ if (IS_ENABLED(CONFIG_X86_64))
+ delta = virt_addr - LOAD_PHYSICAL_ADDR;
+
+ if (!delta) {
+ debug_putstr("No relocation needed... ");
+ return;
+ }
+ debug_putstr("Performing relocations... ");
+
+ /*
* Process relocations: 32 bit relocations first then 64 bit after.
* Three sets of binary relocations are added to the end of the kernel
* before compression. Each relocation table entry is the kernel
@@ -250,7 +260,8 @@ static void handle_relocations(void *output, unsigned long output_len)
#endif
}
#else
-static inline void handle_relocations(void *output, unsigned long output_len)
+static inline void handle_relocations(void *output, unsigned long output_len,
+ unsigned long virt_addr)
{ }
#endif
@@ -327,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
unsigned long output_len)
{
const unsigned long kernel_total_size = VO__end - VO__text;
- unsigned char *output_orig = output;
+ unsigned long virt_addr = (unsigned long)output;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
@@ -366,13 +377,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
* the entire decompressed kernel plus relocation table, or the
* entire decompressed kernel plus .bss and .brk sections.
*/
- output = choose_random_location((unsigned long)input_data, input_len,
- (unsigned long)output,
- max(output_len, kernel_total_size));
+ choose_random_location((unsigned long)input_data, input_len,
+ (unsigned long *)&output,
+ max(output_len, kernel_total_size),
+ &virt_addr);
/* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
- error("Destination address inappropriately aligned");
+ error("Destination physical address inappropriately aligned");
+ if (virt_addr & (MIN_KERNEL_ALIGN - 1))
+ error("Destination virtual address inappropriately aligned");
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
@@ -382,19 +396,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#endif
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
- error("Wrong destination address");
+ error("Destination address does not match LOAD_PHYSICAL_ADDR");
+ if ((unsigned long)output != virt_addr)
+ error("Destination virtual address changed when not relocatable");
#endif
debug_putstr("\nDecompressing Linux... ");
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
parse_elf(output);
- /*
- * 32-bit always performs relocations. 64-bit relocations are only
- * needed if kASLR has chosen a different load address.
- */
- if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
- handle_relocations(output, output_len);
+ handle_relocations(output, output_len, virt_addr);
debug_putstr("done.\nBooting the kernel.\n");
return output;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index b6fec1ff10e4..1c8355eadbd1 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -67,28 +67,33 @@ int cmdline_find_option_bool(const char *option);
#if CONFIG_RANDOMIZE_BASE
/* kaslr.c */
-unsigned char *choose_random_location(unsigned long input_ptr,
- unsigned long input_size,
- unsigned long output_ptr,
- unsigned long output_size);
+void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr);
/* cpuflags.c */
bool has_cpuflag(int flag);
#else
-static inline
-unsigned char *choose_random_location(unsigned long input_ptr,
- unsigned long input_size,
- unsigned long output_ptr,
- unsigned long output_size)
+static inline void choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long *output,
+ unsigned long output_size,
+ unsigned long *virt_addr)
{
- return (unsigned char *)output_ptr;
+ /* No change from existing output location. */
+ *virt_addr = *output;
}
#endif
#ifdef CONFIG_X86_64
+void initialize_identity_maps(void);
void add_identity_map(unsigned long start, unsigned long size);
void finalize_identity_maps(void);
extern unsigned char _pgtable[];
#else
+static inline void initialize_identity_maps(void)
+{ }
static inline void add_identity_map(unsigned long start, unsigned long size)
{ }
static inline void finalize_identity_maps(void)
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 34b95df14e69..56589d0a804b 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -2,6 +2,9 @@
* This code is used on x86_64 to create page table identity mappings on
* demand by building up a new set of page tables (or appending to the
* existing ones), and then switching over to them when ready.
+ *
+ * Copyright (C) 2015-2016 Yinghai Lu
+ * Copyright (C) 2016 Kees Cook
*/
/*
@@ -17,6 +20,9 @@
/* These actually do the work of building the kernel identity maps. */
#include <asm/init.h>
#include <asm/pgtable.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
#include "../../mm/ident_map.c"
/* Used by pgtable.h asm code to force instruction serialization. */
@@ -59,9 +65,21 @@ static struct alloc_pgt_data pgt_data;
/* The top level page table entry pointer. */
static unsigned long level4p;
+/*
+ * Mapping information structure passed to kernel_ident_mapping_init().
+ * Due to relocation, pointers must be assigned at run time not build time.
+ */
+static struct x86_mapping_info mapping_info = {
+ .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+};
+
/* Locates and clears a region for a new top level page table. */
-static void prepare_level4(void)
+void initialize_identity_maps(void)
{
+ /* Init mapping_info with run-time function/buffer pointers. */
+ mapping_info.alloc_pgt_page = alloc_pgt_page;
+ mapping_info.context = &pgt_data;
+
/*
* It should be impossible for this not to already be true,
* but since calling this a second time would rewind the other
@@ -96,17 +114,8 @@ static void prepare_level4(void)
*/
void add_identity_map(unsigned long start, unsigned long size)
{
- struct x86_mapping_info mapping_info = {
- .alloc_pgt_page = alloc_pgt_page,
- .context = &pgt_data,
- .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
- };
unsigned long end = start + size;
- /* Make sure we have a top level page table ready to use. */
- if (!level4p)
- prepare_level4();
-
/* Align boundary to 2M. */
start = round_down(start, PMD_SIZE);
end = round_up(end, PMD_SIZE);
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 29207f69ae8c..26240dde081e 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -93,6 +93,8 @@ int validate_cpu(void)
show_cap_strs(err_flags);
putchar('\n');
return -1;
+ } else if (check_knl_erratum()) {
+ return -1;
} else {
return 0;
}
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 1fd7d575092e..4ad7d70e8739 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -24,6 +24,7 @@
# include "boot.h"
#endif
#include <linux/types.h>
+#include <asm/intel-family.h>
#include <asm/processor-flags.h>
#include <asm/required-features.h>
#include <asm/msr-index.h>
@@ -175,6 +176,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
}
}
+ if (!err)
+ err = check_knl_erratum();
if (err_flags_ptr)
*err_flags_ptr = err ? err_flags : NULL;
@@ -185,3 +188,33 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
return (cpu.level < req_level || err) ? -1 : 0;
}
+
+int check_knl_erratum(void)
+{
+ /*
+ * First check for the affected model/family:
+ */
+ if (!is_intel() ||
+ cpu.family != 6 ||
+ cpu.model != INTEL_FAM6_XEON_PHI_KNL)
+ return 0;
+
+ /*
+ * This erratum affects the Accessed/Dirty bits, and can
+ * cause stray bits to be set in !Present PTEs. We have
+ * enough bits in our 64-bit PTEs (which we have on real
+ * 64-bit mode or PAE) to avoid using these troublesome
+ * bits. But, we do not have enough space in our 32-bit
+ * PTEs. So, refuse to run on 32-bit non-PAE kernels.
+ */
+ if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE))
+ return 0;
+
+ puts("This 32-bit kernel can not run on this Xeon Phi x200\n"
+ "processor due to a processor erratum. Use a 64-bit\n"
+ "kernel, or enable PAE in this 32-bit kernel.\n\n");
+
+ return -1;
+}
+
+
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index 431fa5f84537..6687ab953257 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -102,6 +102,7 @@ void get_cpuflags(void)
cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
&cpu.flags[0]);
cpu.level = (tfms >> 8) & 15;
+ cpu.family = cpu.level;
cpu.model = (tfms >> 4) & 15;
if (cpu.level >= 6)
cpu.model += ((tfms >> 16) & 0xf) << 4;
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index 4cb404fd45ce..15ad56a3f905 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -6,6 +6,7 @@
struct cpu_features {
int level; /* Family, or 64 for x86-64 */
+ int family; /* Family, always */
int model;
u32 flags[NCAPINTS];
};
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 318b8465d302..cc3bd583dce1 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -17,7 +17,7 @@
int memcmp(const void *s1, const void *s2, size_t len)
{
- u8 diff;
+ bool diff;
asm("repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;