From 8280c0c58e9762a9fe29d550a9db81410de77691 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] i386: fix GDT's number of quadwords in comment Fix comments to represent the true number of quadwords in GDT. Signed-off-by: Ahmed S. Darwish Signed-off-by: Andi Kleen Acked-by: Randy Dunlap Signed-off-by: Andrew Morton --- arch/i386/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel/head.S') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 3fa7f9389afe..cb185f40c282 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -612,7 +612,7 @@ ENTRY(boot_gdt_table) .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ /* - * The Global Descriptor Table contains 28 quadwords, per-CPU. + * The Global Descriptor Table contains 32 quadwords, per-CPU. */ .align L1_CACHE_BYTES ENTRY(cpu_gdt_table) @@ -639,7 +639,7 @@ ENTRY(cpu_gdt_table) /* * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, + * The code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ .quad 0x00409a000000ffff /* 0x90 32-bit code */ -- cgit v1.2.3 From bf50467204b435421d8de33ad080fa46c6f3d50b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: Use per-cpu GDT immediately upon boot Now we are no longer dynamically allocating the GDT, we don't need the "cpu_gdt_table" at all: we can switch straight from "boot_gdt_table" to the per-cpu GDT. This means initializing the cpu_gdt array in C. The boot CPU uses the per-cpu var directly, then in smp_prepare_cpus() it switches to the per-cpu copy just allocated. For secondary CPUs, the early_gdt_descr is set to point directly to their per-cpu copy. For UP the code is very simple: it keeps using the "per-cpu" GDT as per SMP, but we never have to move. Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/head.S | 55 +------------------------------------------------ 1 file changed, 1 insertion(+), 54 deletions(-) (limited to 'arch/i386/kernel/head.S') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index cb185f40c282..633fd2f47429 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -599,7 +599,7 @@ idt_descr: .word 0 # 32 bit align gdt_desc.address ENTRY(early_gdt_descr) .word GDT_ENTRIES*8-1 - .long cpu_gdt_table + .long per_cpu__cpu_gdt /* Overwritten for secondary CPUs */ /* * The boot_gdt_table must mirror the equivalent in setup.S and is @@ -610,56 +610,3 @@ ENTRY(boot_gdt_table) .fill GDT_ENTRY_BOOT_CS,8,0 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ - -/* - * The Global Descriptor Table contains 32 quadwords, per-CPU. - */ - .align L1_CACHE_BYTES -ENTRY(cpu_gdt_table) - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* 0x0b reserved */ - .quad 0x0000000000000000 /* 0x13 reserved */ - .quad 0x0000000000000000 /* 0x1b reserved */ - .quad 0x0000000000000000 /* 0x20 unused */ - .quad 0x0000000000000000 /* 0x28 unused */ - .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ - .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ - .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ - .quad 0x0000000000000000 /* 0x4b reserved */ - .quad 0x0000000000000000 /* 0x53 reserved */ - .quad 0x0000000000000000 /* 0x5b reserved */ - - .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ - - .quad 0x0000000000000000 /* 0x80 TSS descriptor */ - .quad 0x0000000000000000 /* 0x88 LDT descriptor */ - - /* - * Segments used for calling PnP BIOS have byte granularity. - * The code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - .quad 0x00409a000000ffff /* 0x90 32-bit code */ - .quad 0x00009a000000ffff /* 0x98 16-bit code */ - .quad 0x000092000000ffff /* 0xa0 16-bit data */ - .quad 0x0000920000000000 /* 0xa8 16-bit data */ - .quad 0x0000920000000000 /* 0xb0 16-bit data */ - - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - .quad 0x00409a000000ffff /* 0xb8 APM CS code */ - .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x004092000000ffff /* 0xc8 APM DS data */ - - .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ - .quad 0x00cf92000000ffff /* 0xd8 - PDA */ - .quad 0x0000000000000000 /* 0xe0 - unused */ - .quad 0x0000000000000000 /* 0xe8 - unused */ - .quad 0x0000000000000000 /* 0xf0 - unused */ - .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ - -- cgit v1.2.3 From 52de74dd3994e165ef1b35c33d54655a6400e30c Mon Sep 17 00:00:00 2001 From: Sebastien Dugue Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] i386: Rename boot_gdt_table to boot_gdt Rename boot_gdt_table to boot_gdt to avoid the duplicate T(able). Signed-off-by: Sebastien Dugue Signed-off-by: Andi Kleen Acked-by: Rusty Russell Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/head.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/i386/kernel/head.S') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 633fd2f47429..cc46494787e8 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -147,8 +147,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); /* * Non-boot CPU entry point; entered from trampoline.S * We can't lgdt here, because lgdt itself uses a data segment, but - * we know the trampoline has already loaded the boot_gdt_table GDT - * for us. + * we know the trampoline has already loaded the boot_gdt for us. * * If cpu hotplug is not supported then this code can go in init section * which will be freed later @@ -588,7 +587,7 @@ fault_msg: .word 0 # 32 bit align gdt_desc.address boot_gdt_descr: .word __BOOT_DS+7 - .long boot_gdt_table - __PAGE_OFFSET + .long boot_gdt - __PAGE_OFFSET .word 0 # 32-bit align idt_desc.address idt_descr: @@ -602,11 +601,11 @@ ENTRY(early_gdt_descr) .long per_cpu__cpu_gdt /* Overwritten for secondary CPUs */ /* - * The boot_gdt_table must mirror the equivalent in setup.S and is + * The boot_gdt must mirror the equivalent in setup.S and is * used only for booting. */ .align L1_CACHE_BYTES -ENTRY(boot_gdt_table) +ENTRY(boot_gdt) .fill GDT_ENTRY_BOOT_CS,8,0 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ -- cgit v1.2.3 From 7a61d35d4b4056e7711031202da7605e052f4137 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: Page-align the GDT Xen wants a dedicated page for the GDT. I believe VMI likes it too. lguest, KVM and native don't care. Simple transformation to page-aligned "struct gdt_page". Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Acked-by: Jeremy Fitzhardinge --- arch/i386/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel/head.S') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index cc46494787e8..bb36c24311b4 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -598,7 +598,7 @@ idt_descr: .word 0 # 32 bit align gdt_desc.address ENTRY(early_gdt_descr) .word GDT_ENTRIES*8-1 - .long per_cpu__cpu_gdt /* Overwritten for secondary CPUs */ + .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ /* * The boot_gdt must mirror the equivalent in setup.S and is -- cgit v1.2.3 From 7c3576d261ce046789a7db14f43303f8120910c7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Convert PDA into the percpu section Currently x86 (similar to x84-64) has a special per-cpu structure called "i386_pda" which can be easily and efficiently referenced via the %fs register. An ELF section is more flexible than a structure, allowing any piece of code to use this area. Indeed, such a section already exists: the per-cpu area. So this patch: (1) Removes the PDA and uses per-cpu variables for each current member. (2) Replaces the __KERNEL_PDA segment with __KERNEL_PERCPU. (3) Creates a per-cpu mirror of __per_cpu_offset called this_cpu_off, which can be used to calculate addresses for this CPU's variables. (4) Simplifies startup, because %fs doesn't need to be loaded with a special segment at early boot; it can be deferred until the first percpu area is allocated (or never for UP). The result is less code and one less x86-specific concept. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen --- arch/i386/kernel/head.S | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) (limited to 'arch/i386/kernel/head.S') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index bb36c24311b4..12277d8938df 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -317,12 +317,12 @@ is386: movl $2,%ecx # set MP movl %eax,%cr0 call check_x87 - call setup_pda lgdt early_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. + movl %eax,%fs # gets reset once there's real percpu movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds @@ -332,16 +332,17 @@ is386: movl $2,%ecx # set MP movl %eax,%gs lldt %ax - movl $(__KERNEL_PDA),%eax - mov %eax,%fs - cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder #ifdef CONFIG_SMP movb ready, %cl movb $1, ready cmpb $0,%cl # the first CPU calls start_kernel - jne initialize_secondary # all other CPUs call initialize_secondary + je 1f + movl $(__KERNEL_PERCPU), %eax + movl %eax,%fs # set this cpu's percpu + jmp initialize_secondary # all other CPUs call initialize_secondary +1: #endif /* CONFIG_SMP */ jmp start_kernel @@ -364,23 +365,6 @@ check_x87: .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ ret -/* - * Point the GDT at this CPU's PDA. On boot this will be - * cpu_gdt_table and boot_pda; for secondary CPUs, these will be - * that CPU's GDT and PDA. - */ -ENTRY(setup_pda) - /* get the PDA pointer */ - movl start_pda, %eax - - /* slot the PDA address into the GDT */ - mov early_gdt_descr+2, %ecx - mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ - shr $16, %eax - mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ - mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ - ret - /* * setup_idt * @@ -553,9 +537,6 @@ ENTRY(empty_zero_page) * This starts the data section. */ .data -ENTRY(start_pda) - .long boot_pda - ENTRY(stack_start) .long init_thread_union+THREAD_SIZE .long __BOOT_DS -- cgit v1.2.3 From 9ce8c2ed12550f90fd6e902990652b13df647793 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: map enough initial memory to create lowmem mappings head.S creates the very initial pagetable for the kernel. This just maps enough space for the kernel itself, and an allocation bitmap. The amount of mapped memory is rounded up to 4Mbytes, and so this typically ends up mapping 8Mbytes of memory. When booting, pagetable_init() needs to create mappings for all lowmem, and the pagetables for these mappings are allocated from the free pages around the kernel in low memory. If the number of pagetable pages + kernel size exceeds head.S's initial mapping, it will end up faulting on an unmapped page. This will only happen with specific combinations of kernel size and memory size. This patch makes sure that head.S also maps enough space to fit the kernel pagetables as well as the kernel itself. It ends up using an additional two pages of unreclaimable memory. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: "H. Peter Anvin" Cc: Andi Kleen Cc: Zachary Amsden Cc: Chris Wright Cc: "Eric W. Biederman" Cc: Linus Torvalds , --- arch/i386/kernel/head.S | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'arch/i386/kernel/head.S') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 12277d8938df..9b10af65faaa 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -34,17 +34,32 @@ /* * This is how much memory *in addition to the memory covered up to - * and including _end* we need mapped initially. We need one bit for - * each possible page, but only in low memory, which means - * 2^32/4096/8 = 128K worst case (4G/4G split.) + * and including _end* we need mapped initially. + * We need: + * - one bit for each possible page, but only in low memory, which means + * 2^32/4096/8 = 128K worst case (4G/4G split.) + * - enough space to map all low memory, which means + * (2^32/4096) / 1024 pages (worst case, non PAE) + * (2^32/4096) / 512 + 4 pages (worst case for PAE) + * - a few pages for allocator use before the kernel pagetable has + * been set up * * Modulo rounding, each megabyte assigned here requires a kilobyte of * memory, which is currently unreclaimed. * * This should be a multiple of a page. */ -#define INIT_MAP_BEYOND_END (128*1024) +LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) +#if PTRS_PER_PMD > 1 +PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD +#else +PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) +#endif +BOOTBITMAP_SIZE = LOW_PAGES / 8 +ALLOCATOR_SLOP = 4 + +INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm /* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, -- cgit v1.2.3