diff options
author | Tom Rini <trini@konsulko.com> | 2025-04-03 11:43:38 -0600 |
---|---|---|
committer | Tom Rini <trini@konsulko.com> | 2025-04-03 11:43:38 -0600 |
commit | 1f2a3d066c99f57675162ce09586e9de30407f1b (patch) | |
tree | 40bfceab01cc1c6a035eb66792638149400db1ef /arch/x86/cpu | |
parent | 39ff722b3ee0bd569388a3b89c59899511ac1a24 (diff) | |
parent | a3d255d996b346c527962926ff80343e02ae8f00 (diff) |
Merge patch series "x86: Improve operation under QEMU"
Simon Glass <sjg@chromium.org> says:
U-Boot can start and boot an OS in both qemu-x86 and qemu-x86_64 but it
is not perfect.
With both builds, executing the VESA ROM causes an intermittent hang, at
least on some AMD CPUs.
With qemu-x86_64 kvm cannot be used since the move to long mode (64-bit)
is done in a way that works on real hardware but not with QEMU. This
means that performance is 4-5x slower than it could be, at least on my
CPU.
We can work around the first problem by using Bochs, which is anyway a
better choice than VESA for QEMU. The second can be addressed by using
the same descriptor across the jump to long mode.
With an MTRR fix this allows booting into Ubuntu on qemu-x86_64
In v3 some e820 patches are included to make booting reliable and avoid
ACPI tables being dropped. Also, several MTTR problems are addressed, to
support memory sizes above 4GB reliably.
Link: https://lore.kernel.org/all/20250315142643.2600605-1-sjg@chromium.org/
Diffstat (limited to 'arch/x86/cpu')
-rw-r--r-- | arch/x86/cpu/cpu.c | 24 | ||||
-rw-r--r-- | arch/x86/cpu/i386/call64.S | 35 | ||||
-rw-r--r-- | arch/x86/cpu/i386/cpu.c | 41 | ||||
-rw-r--r-- | arch/x86/cpu/mtrr.c | 115 | ||||
-rw-r--r-- | arch/x86/cpu/qemu/dram.c | 18 | ||||
-rw-r--r-- | arch/x86/cpu/qemu/e820.c | 62 | ||||
-rw-r--r-- | arch/x86/cpu/qemu/qemu.c | 20 | ||||
-rw-r--r-- | arch/x86/cpu/start.S | 4 | ||||
-rw-r--r-- | arch/x86/cpu/start16.S | 3 | ||||
-rw-r--r-- | arch/x86/cpu/x86_64/cpu.c | 5 |
10 files changed, 205 insertions, 122 deletions
diff --git a/arch/x86/cpu/cpu.c b/arch/x86/cpu/cpu.c index a8b21406ac0..c373b14df30 100644 --- a/arch/x86/cpu/cpu.c +++ b/arch/x86/cpu/cpu.c @@ -364,3 +364,27 @@ long locate_coreboot_table(void) return addr; } + +static bool has_cpuid(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static uint cpu_cpuid_extended_level(void) +{ + return cpuid_eax(0x80000000); +} + +int cpu_phys_address_size(void) +{ + if (!has_cpuid()) + return 32; + + if (cpu_cpuid_extended_level() >= 0x80000008) + return cpuid_eax(0x80000008) & 0xff; + + if (cpuid_edx(1) & (CPUID_FEATURE_PAE | CPUID_FEATURE_PSE36)) + return 36; + + return 32; +} diff --git a/arch/x86/cpu/i386/call64.S b/arch/x86/cpu/i386/call64.S index 424732fa3fa..a9d3f16a6ad 100644 --- a/arch/x86/cpu/i386/call64.S +++ b/arch/x86/cpu/i386/call64.S @@ -7,6 +7,7 @@ */ #include <asm/msr-index.h> +#include <asm/processor.h> #include <asm/processor-flags.h> .code32 @@ -21,17 +22,19 @@ cpu_call64: * ecx - target */ cli + pushl $0 /* top 64-bits of target */ push %ecx /* arg2 = target */ push %edx /* arg1 = setup_base */ mov %eax, %ebx - /* Load new GDT with the 64bit segments using 32bit descriptor */ - leal gdt, %eax - movl %eax, gdt+2 - lgdt gdt + # disable paging + movl %cr0, %eax + andl $~X86_CR0_PG, %eax + movl %eax, %cr0 /* Enable PAE mode */ - movl $(X86_CR4_PAE), %eax + movl %cr4, %eax + orl $X86_CR4_PAE, %eax movl %eax, %cr4 /* Enable the boot page tables */ @@ -44,12 +47,6 @@ cpu_call64: btsl $_EFER_LME, %eax wrmsr - /* After gdt is loaded */ - xorl %eax, %eax - lldt %ax - movl $0x20, %eax - ltr %ax - /* * Setup for the jump to 64bit mode * @@ -62,22 +59,18 @@ cpu_call64: */ pop %esi /* setup_base */ - pushl $0x10 - leal lret_target, %eax - pushl %eax - /* Enter paged protected Mode, activating Long Mode */ - movl $(X86_CR0_PG | X86_CR0_PE), %eax + movl %cr0, %eax + orl $X86_CR0_PG, %eax movl %eax, %cr0 /* Jump from 32bit compatibility mode into 64bit mode. */ - lret + ljmp $(X86_GDT_ENTRY_64BIT_CS * X86_GDT_ENTRY_SIZE), $lret_target -code64: +.code64 lret_target: - pop %eax /* target */ - mov %eax, %eax /* Clear bits 63:32 */ - jmp *%eax /* Jump to the 64-bit target */ + pop %rax /* target */ + jmp *%rax /* Jump to the 64-bit target */ .globl call64_stub_size call64_stub_size: diff --git a/arch/x86/cpu/i386/cpu.c b/arch/x86/cpu/i386/cpu.c index a51a24498a7..ee6dbeb5c48 100644 --- a/arch/x86/cpu/i386/cpu.c +++ b/arch/x86/cpu/i386/cpu.c @@ -35,10 +35,6 @@ DECLARE_GLOBAL_DATA_PTR; -#define CPUID_FEATURE_PAE BIT(6) -#define CPUID_FEATURE_PSE36 BIT(17) -#define CPUID_FEAURE_HTT BIT(28) - /* * Constructor for a conventional segment GDT (or LDT) entry * This is a macro so it can be used in initialisers @@ -160,6 +156,9 @@ void arch_setup_gd(gd_t *new_gd) gdt_addr[X86_GDT_ENTRY_16BIT_FLAT_CS] = GDT_ENTRY(0x809b, 0, 0xfffff); gdt_addr[X86_GDT_ENTRY_16BIT_FLAT_DS] = GDT_ENTRY(0x8093, 0, 0xfffff); + gdt_addr[X86_GDT_ENTRY_64BIT_CS] = GDT_ENTRY(0xaf9b, 0, 0xfffff); + gdt_addr[X86_GDT_ENTRY_64BIT_TS1] = GDT_ENTRY(0x8980, 0, 0xfffff); + gdt_addr[X86_GDT_ENTRY_64BIT_TS2] = 0; load_gdt(gdt_addr, X86_GDT_NUM_ENTRIES); load_ds(X86_GDT_ENTRY_32BIT_DS); @@ -409,25 +408,6 @@ static void setup_identity(void) } } -static uint cpu_cpuid_extended_level(void) -{ - return cpuid_eax(0x80000000); -} - -int cpu_phys_address_size(void) -{ - if (!has_cpuid()) - return 32; - - if (cpu_cpuid_extended_level() >= 0x80000008) - return cpuid_eax(0x80000008) & 0xff; - - if (cpuid_edx(1) & (CPUID_FEATURE_PAE | CPUID_FEATURE_PSE36)) - return 36; - - return 32; -} - static void setup_mtrr(void) { u64 mtrr_cap; @@ -589,6 +569,13 @@ int cpu_has_64bit(void) #define PAGETABLE_BASE 0x80000 #define PAGETABLE_SIZE (6 * 4096) +#define _PRES BIT(0) /* present */ +#define _RW BIT(1) /* write allowed */ +#define _US BIT(2) /* user-access allowed */ +#define _A BIT(5) /* has been accessed */ +#define _DT BIT(6) /* has been written to */ +#define _PS BIT(7) /* indicates 2MB page size here */ + /** * build_pagetable() - build a flat 4GiB page table structure for 64-bti mode * @@ -601,15 +588,17 @@ static void build_pagetable(uint32_t *pgtable) memset(pgtable, '\0', PAGETABLE_SIZE); /* Level 4 needs a single entry */ - pgtable[0] = (ulong)&pgtable[1024] + 7; + pgtable[0] = (ulong)&pgtable[1024] + _PRES + _RW + _US + _A; /* Level 3 has one 64-bit entry for each GiB of memory */ for (i = 0; i < 4; i++) - pgtable[1024 + i * 2] = (ulong)&pgtable[2048] + 0x1000 * i + 7; + pgtable[1024 + i * 2] = (ulong)&pgtable[2048] + 0x1000 * i + + _PRES + _RW + _US + _A; /* Level 2 has 2048 64-bit entries, each repesenting 2MiB */ for (i = 0; i < 2048; i++) - pgtable[2048 + i * 2] = 0x183 + (i << 21UL); + pgtable[2048 + i * 2] = _PRES + _RW + _US + _PS + _A + _DT + + (i << 21UL); } int cpu_jump_to_64bit(ulong setup_base, ulong target) diff --git a/arch/x86/cpu/mtrr.c b/arch/x86/cpu/mtrr.c index 07ea89162de..7a0f00b9b8f 100644 --- a/arch/x86/cpu/mtrr.c +++ b/arch/x86/cpu/mtrr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ /* * (C) Copyright 2014 Google, Inc + * Portions added from coreboot * * Memory Type Range Regsters - these are used to tell the CPU whether * memory is cacheable and if so the cache write mode to use. @@ -16,6 +17,7 @@ * since the MTRR registers are sometimes in flux. */ +#include <cpu.h> #include <cpu_func.h> #include <log.h> #include <sort.h> @@ -39,6 +41,27 @@ static const char *const mtrr_type_name[MTRR_TYPE_COUNT] = { "Back", }; +u64 mtrr_to_size(u64 mask) +{ + u64 size; + + size = ~mask & ((1ULL << cpu_phys_address_size()) - 1); + size |= (1 << 12) - 1; + size += 1; + + return size; +} + +u64 mtrr_to_mask(u64 size) +{ + u64 mask; + + mask = ~(size - 1); + mask &= (1ull << cpu_phys_address_size()) - 1; + + return mask; +} + /* Prepare to adjust MTRRs */ void mtrr_open(struct mtrr_state *state, bool do_caches) { @@ -68,11 +91,9 @@ void mtrr_close(struct mtrr_state *state, bool do_caches) static void set_var_mtrr(uint reg, uint type, uint64_t start, uint64_t size) { - u64 mask; + u64 mask = mtrr_to_mask(size); wrmsrl(MTRR_PHYS_BASE_MSR(reg), start | type); - mask = ~(size - 1); - mask &= (1ULL << CONFIG_CPU_ADDR_BITS) - 1; wrmsrl(MTRR_PHYS_MASK_MSR(reg), mask | MTRR_PHYS_MASK_VALID); } @@ -184,30 +205,80 @@ int mtrr_commit(bool do_caches) return 0; } -int mtrr_add_request(int type, uint64_t start, uint64_t size) +/* fms: find most significant bit set (from Linux) */ +static inline uint fms(uint val) +{ + uint ret; + + __asm__("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $0,%0\n" + "1:" : "=r" (ret) : "mr" (val)); + + return ret; +} + +/* + * fms64: find most significant bit set in a 64-bit word + * As samples, fms64(0x0) = 0; fms64(0x4400) = 14; + * fms64(0x40400000000) = 42. + */ +static uint fms64(uint64_t val) +{ + u32 hi = (u32)(val >> 32); + + if (!hi) + return fms((u32)val); + + return fms(hi) + 32; +} + +int mtrr_add_request(int type, u64 base, uint64_t size) { struct mtrr_request *req; - uint64_t mask; + u64 mask; debug("%s: count=%d\n", __func__, gd->arch.mtrr_req_count); if (!gd->arch.has_mtrr) return -ENOSYS; - if (!is_power_of_2(size)) - return -EINVAL; - - if (gd->arch.mtrr_req_count == MAX_MTRR_REQUESTS) - return -ENOSPC; - req = &gd->arch.mtrr_req[gd->arch.mtrr_req_count++]; - req->type = type; - req->start = start; - req->size = size; - debug("%d: type=%d, %08llx %08llx\n", gd->arch.mtrr_req_count - 1, - req->type, req->start, req->size); - mask = ~(req->size - 1); - mask &= (1ULL << CONFIG_CPU_ADDR_BITS) - 1; - mask |= MTRR_PHYS_MASK_VALID; - debug(" %016llx %016llx\n", req->start | req->type, mask); + while (size) { + uint addr_lsb; + uint size_msb; + u64 mtrr_size; + + addr_lsb = fls64(base); + size_msb = fms64(size); + + /* + * All MTRR entries need to have their base aligned to the + * mask size. The maximum size is calculated by a function of + * the min base bit set and maximum size bit set. + * Algorithm is from coreboot + */ + if (!addr_lsb || addr_lsb > size_msb) + mtrr_size = 1ull << size_msb; + else + mtrr_size = 1ull << addr_lsb; + log_debug("addr_lsb %x size_msb %x mtrr_size %llx\n", + addr_lsb, size_msb, mtrr_size); + + if (gd->arch.mtrr_req_count == MAX_MTRR_REQUESTS) + return -ENOSPC; + req = &gd->arch.mtrr_req[gd->arch.mtrr_req_count++]; + req->type = type; + req->start = base; + req->size = mtrr_size; + log_debug("%d: type=%d, %08llx %08llx ", + gd->arch.mtrr_req_count - 1, req->type, req->start, + req->size); + mask = mtrr_to_mask(req->size); + mask |= MTRR_PHYS_MASK_VALID; + log_debug(" %016llx %016llx\n", req->start | req->type, mask); + + size -= mtrr_size; + base += mtrr_size; + } return 0; } @@ -360,9 +431,7 @@ int mtrr_list(int reg_count, int cpu_select) base = info.mtrr[i].base; mask = info.mtrr[i].mask; - size = ~mask & ((1ULL << CONFIG_CPU_ADDR_BITS) - 1); - size |= (1 << 12) - 1; - size += 1; + size = mtrr_to_size(mask); valid = mask & MTRR_PHYS_MASK_VALID; type = mtrr_type_name[base & MTRR_BASE_TYPE_MASK]; printf("%d %-5s %-12s %016llx %016llx %016llx\n", i, diff --git a/arch/x86/cpu/qemu/dram.c b/arch/x86/cpu/qemu/dram.c index 62a301c0fd3..ba3638e6acc 100644 --- a/arch/x86/cpu/qemu/dram.c +++ b/arch/x86/cpu/qemu/dram.c @@ -4,7 +4,9 @@ */ #include <init.h> +#include <spl.h> #include <asm/global_data.h> +#include <asm/mtrr.h> #include <asm/post.h> #include <asm/arch/qemu.h> #include <linux/sizes.h> @@ -44,6 +46,22 @@ int dram_init(void) gd->ram_size += qemu_get_high_memory_size(); post_code(POST_DRAM); + if (xpl_phase() == PHASE_BOARD_F) { + u64 total = gd->ram_size; + int ret; + + if (total > SZ_2G + SZ_1G) + total += SZ_1G; + ret = mtrr_add_request(MTRR_TYPE_WRBACK, 0, total); + if (ret != -ENOSYS) { + if (ret) + return log_msg_ret("mta", ret); + ret = mtrr_commit(false); + if (ret) + return log_msg_ret("mtc", ret); + } + } + return 0; } diff --git a/arch/x86/cpu/qemu/e820.c b/arch/x86/cpu/qemu/e820.c index 17a04f86479..078d1d86b02 100644 --- a/arch/x86/cpu/qemu/e820.c +++ b/arch/x86/cpu/qemu/e820.c @@ -6,6 +6,7 @@ * (C) Copyright 2019 Bin Meng <bmeng.cn@gmail.com> */ +#include <bloblist.h> #include <env_internal.h> #include <malloc.h> #include <asm/e820.h> @@ -19,51 +20,34 @@ unsigned int install_e820_map(unsigned int max_entries, struct e820_entry *entries) { u64 high_mem_size; - int n = 0; + struct e820_ctx ctx; - entries[n].addr = 0; - entries[n].size = ISA_START_ADDRESS; - entries[n].type = E820_RAM; - n++; + e820_init(&ctx, entries, max_entries); - entries[n].addr = ISA_START_ADDRESS; - entries[n].size = ISA_END_ADDRESS - ISA_START_ADDRESS; - entries[n].type = E820_RESERVED; - n++; + e820_next(&ctx, E820_RAM, ISA_START_ADDRESS); + e820_next(&ctx, E820_RESERVED, ISA_END_ADDRESS); /* - * since we use memalign(malloc) to allocate high memory for - * storing ACPI tables, we need to reserve them in e820 tables, - * otherwise kernel will reclaim them and data will be corrupted + * if we use bloblist to allocate high memory for storing ACPI tables, + * we need to reserve that region in e820 tables, otherwise the kernel + * will reclaim them and data will be corrupted. The ACPI tables may not + * have been written yet, so use the whole bloblist size */ - entries[n].addr = ISA_END_ADDRESS; - entries[n].size = gd->relocaddr - TOTAL_MALLOC_LEN - ISA_END_ADDRESS; - entries[n].type = E820_RAM; - n++; - - /* for simplicity, reserve entire malloc space */ - entries[n].addr = gd->relocaddr - TOTAL_MALLOC_LEN; - entries[n].size = TOTAL_MALLOC_LEN; - entries[n].type = E820_RESERVED; - n++; - - entries[n].addr = gd->relocaddr; - entries[n].size = qemu_get_low_memory_size() - gd->relocaddr; - entries[n].type = E820_RESERVED; - n++; - - entries[n].addr = CONFIG_PCIE_ECAM_BASE; - entries[n].size = CONFIG_PCIE_ECAM_SIZE; - entries[n].type = E820_RESERVED; - n++; + if (IS_ENABLED(CONFIG_BLOBLIST_TABLES)) { + e820_to_addr(&ctx, E820_RAM, (ulong)gd->bloblist); + e820_next(&ctx, E820_ACPI, bloblist_get_total_size()); + } else { + /* If using memalign() reserve that whole region instead */ + e820_to_addr(&ctx, E820_RAM, gd->relocaddr - TOTAL_MALLOC_LEN); + e820_next(&ctx, E820_ACPI, TOTAL_MALLOC_LEN); + } + e820_to_addr(&ctx, E820_RAM, qemu_get_low_memory_size()); + e820_add(&ctx, E820_RESERVED, CONFIG_PCIE_ECAM_BASE, + CONFIG_PCIE_ECAM_SIZE); high_mem_size = qemu_get_high_memory_size(); - if (high_mem_size) { - entries[n].addr = SZ_4G; - entries[n].size = high_mem_size; - entries[n].type = E820_RAM; - n++; - } + if (high_mem_size) + e820_add(&ctx, E820_RAM, SZ_4G, high_mem_size); - return n; + return e820_finish(&ctx); } diff --git a/arch/x86/cpu/qemu/qemu.c b/arch/x86/cpu/qemu/qemu.c index 563f63e2bc8..e846ccd44aa 100644 --- a/arch/x86/cpu/qemu/qemu.c +++ b/arch/x86/cpu/qemu/qemu.c @@ -15,14 +15,21 @@ #include <asm/arch/qemu.h> #include <asm/u-boot-x86.h> -static bool i440fx; - #if CONFIG_IS_ENABLED(QFW_PIO) U_BOOT_DRVINFO(x86_qfw_pio) = { .name = "qfw_pio", }; #endif +static bool is_i440fx(void) +{ + u16 device; + + pci_read_config16(PCI_BDF(0, 0, 0), PCI_DEVICE_ID, &device); + + return device == PCI_DEVICE_ID_INTEL_82441; +} + static void enable_pm_piix(void) { u8 en; @@ -50,16 +57,17 @@ static void enable_pm_ich9(void) void qemu_chipset_init(void) { - u16 device, xbcs; + bool i440fx; + u16 xbcs; int pam, i; + i440fx = is_i440fx(); + /* * i440FX and Q35 chipset have different PAM register offset, but with * the same bitfield layout. Here we determine the offset based on its * PCI device ID. */ - pci_read_config16(PCI_BDF(0, 0, 0), PCI_DEVICE_ID, &device); - i440fx = (device == PCI_DEVICE_ID_INTEL_82441); pam = i440fx ? I440FX_PAM : Q35_PAM; /* @@ -123,7 +131,7 @@ int mp_determine_pci_dstirq(int bus, int dev, int func, int pirq) { u8 irq; - if (i440fx) { + if (is_i440fx()) { /* * Not like most x86 platforms, the PIRQ[A-D] on PIIX3 are not * connected to I/O APIC INTPIN#16-19. Instead they are routed diff --git a/arch/x86/cpu/start.S b/arch/x86/cpu/start.S index 0ef27cc5a00..385a691265e 100644 --- a/arch/x86/cpu/start.S +++ b/arch/x86/cpu/start.S @@ -254,7 +254,7 @@ multiboot_header: * GDT is setup in a safe location in RAM */ gdt_ptr2: - .word 0x1f /* limit (31 bytes = 4 GDT entries - 1) */ + .word gdt2_end - gdt_ptr2 - 1 .long gdt_rom2 /* base */ /* Some CPUs are picky about GDT alignment... */ @@ -313,4 +313,6 @@ gdt_rom2: .byte 0x93 /* access */ .byte 0xcf /* flags + limit_high */ .byte 0x00 /* base_high */ +gdt2_end: + #endif diff --git a/arch/x86/cpu/start16.S b/arch/x86/cpu/start16.S index 865a49731e5..8d9acb193e0 100644 --- a/arch/x86/cpu/start16.S +++ b/arch/x86/cpu/start16.S @@ -61,7 +61,7 @@ idt_ptr: * GDT is setup in a safe location in RAM */ gdt_ptr: - .word 0x1f /* limit (31 bytes = 4 GDT entries - 1) */ + .word gdt_end - gdt_rom - 1 .long BOOT_SEG + gdt_rom /* base */ /* Some CPUs are picky about GDT alignment... */ @@ -120,3 +120,4 @@ gdt_rom: .byte 0x93 /* access */ .byte 0xcf /* flags + limit_high */ .byte 0x00 /* base_high */ +gdt_end: diff --git a/arch/x86/cpu/x86_64/cpu.c b/arch/x86/cpu/x86_64/cpu.c index 71bc07f872a..25ae92c702f 100644 --- a/arch/x86/cpu/x86_64/cpu.c +++ b/arch/x86/cpu/x86_64/cpu.c @@ -59,11 +59,6 @@ int x86_cpu_reinit_f(void) return 0; } -int cpu_phys_address_size(void) -{ - return CONFIG_CPU_ADDR_BITS; -} - int x86_cpu_init_f(void) { return 0; |