summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-26 07:44:09 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-26 07:44:09 -0800
commit7c811e4b6af424c295e3c6438fdc9b647fe6595f (patch)
treefeab985b30e44102eca093e71ccdfad4f0318087 /arch
parent37c00b84d0c1b5c4c65ae837e2235160c03e84c2 (diff)
parentf18edc95a37a901ffcbe91f5e05105f916a04fae (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (24 commits) x86: no robust/pi futex for real i386 CPUs x86: fix boot failure on 486 due to TSC breakage x86: fix build on non-C locales. x86: make c_idle.work have a static address. x86: don't save unreliable stack trace entries x86: don't make swapper_pg_pmd global x86: don't print a warning when MTRR are blank and running in KVM x86: fix execve with -fstack-protect x86: fix vsyscall wreckage x86: rename KERNEL_TEXT_SIZE => KERNEL_IMAGE_SIZE x86: fix spontaneous reboot with allyesconfig bzImage x86: remove double-checking empty zero pages debug x86: notsc is ignored on common configurations x86/mtrr: fix kernel-doc missing notation x86: handle BIOSes which terminate e820 with CF=1 and no SMAP x86: add comments for NOPs x86: don't use P6_NOPs if compiling with CONFIG_X86_GENERIC x86: require family >= 6 if we are using P6 NOPs x86: do not promote TM3x00/TM5x00 to i686-class x86: hpet fix docbook comment ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig.cpu14
-rw-r--r--arch/x86/boot/memory.c9
-rw-r--r--arch/x86/kernel/asm-offsets_32.c4
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c9
-rw-r--r--arch/x86/kernel/cpu/transmeta.c7
-rw-r--r--arch/x86/kernel/entry_64.S6
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S22
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/setup_64.c2
-rw-r--r--arch/x86/kernel/smpboot_64.c2
-rw-r--r--arch/x86/kernel/stacktrace.c4
-rw-r--r--arch/x86/kernel/tsc_32.c3
-rw-r--r--arch/x86/kernel/vsyscall_64.c52
-rw-r--r--arch/x86/lguest/boot.c12
-rw-r--r--arch/x86/mm/init_64.c13
-rw-r--r--arch/x86/mm/pageattr.c84
-rw-r--r--arch/x86/vdso/Makefile2
20 files changed, 121 insertions, 138 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e09a6b73a1aa..6d50064db182 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -377,6 +377,19 @@ config X86_OOSTORE
def_bool y
depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
+#
+# P6_NOPs are a relatively minor optimization that require a family >=
+# 6 processor, except that it is broken on certain VIA chips.
+# Furthermore, AMD chips prefer a totally different sequence of NOPs
+# (which work on all CPUs). As a result, disallow these if we're
+# compiling X86_GENERIC but not X86_64 (these NOPs do work on all
+# x86-64 capable chips); the list of processors in the right-hand clause
+# are the cores that benefit from this optimization.
+#
+config X86_P6_NOP
+ def_bool y
+ depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4)
+
config X86_TSC
def_bool y
depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
@@ -390,6 +403,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
+ default "6" if X86_32 && X86_P6_NOP
default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
default "3"
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 378353956b5d..e77d89f9e8aa 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -37,6 +37,12 @@ static int detect_memory_e820(void)
"=m" (*desc)
: "D" (desc), "d" (SMAP), "a" (0xe820));
+ /* BIOSes which terminate the chain with CF = 1 as opposed
+ to %ebx = 0 don't always report the SMAP signature on
+ the final, failing, probe. */
+ if (err)
+ break;
+
/* Some BIOSes stop returning SMAP in the middle of
the search loop. We don't know exactly how the BIOS
screwed up the map at that point, we might have a
@@ -47,9 +53,6 @@ static int detect_memory_e820(void)
break;
}
- if (err)
- break;
-
count++;
desc++;
} while (next && count < E820MAX);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index a33d53017997..8ea040124f7d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -128,13 +128,11 @@ void foo(void)
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
#endif
-#ifdef CONFIG_LGUEST_GUEST
+#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
-#endif
-#ifdef CONFIG_LGUEST
BLANK();
OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f86a3c4a2669..a38aafaefc23 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -504,7 +504,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* Clear all flags overriden by options */
for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] ^= cleared_cpu_caps[i];
+ c->x86_capability[i] &= ~cleared_cpu_caps[i];
/* Init Machine Check Exception if available. */
mcheck_init(c);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b6e136f23d3d..be83336fddba 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -43,6 +43,7 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
+#include <asm/kvm_para.h>
#include "mtrr.h"
u32 num_var_ranges = 0;
@@ -649,6 +650,7 @@ static __init int amd_special_default_mtrr(void)
/**
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ * @end_pfn: ending page frame number
*
* Some buggy BIOSes don't setup the MTRRs properly for systems with certain
* memory configurations. This routine checks that the highest MTRR matches
@@ -688,8 +690,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
if (!highest_pfn) {
- printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
- WARN_ON(1);
+ if (!kvm_para_available()) {
+ printk(KERN_WARNING
+ "WARNING: strange, CPU MTRRs all blank?\n");
+ WARN_ON(1);
+ }
return 0;
}
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 200fb3f9ebfb..e8b422c1c512 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -76,13 +76,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
/* All Transmeta CPUs have a constant TSC */
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
- /* If we can run i686 user-space code, call us an i686 */
-#define USER686 ((1 << X86_FEATURE_TSC)|\
- (1 << X86_FEATURE_CX8)|\
- (1 << X86_FEATURE_CMOV))
- if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686)
- c->x86 = 6;
-
#ifdef CONFIG_SYSCTL
/* randomize_va_space slows us down enormously;
it probably triggers retranslation of x86->native bytecode */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2ad9a1bc6a73..c20c9e7e08dd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -453,6 +453,7 @@ ENTRY(stub_execve)
CFI_REGISTER rip, r11
SAVE_REST
FIXUP_TOP_OF_STACK %r11
+ movq %rsp, %rcx
call sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
@@ -1036,15 +1037,16 @@ ENDPROC(child_rip)
* rdi: name, rsi: argv, rdx: envp
*
* We want to fallback into:
- * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
+ * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
*
* do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
*/
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
SAVE_ALL
+ movq %rsp,%rcx
call sys_execve
movq %rax, RAX(%rsp)
RESTORE_REST
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 25eb98540a41..fd8ca53943a8 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -606,7 +606,7 @@ ENTRY(_stext)
.section ".bss.page_aligned","wa"
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
-ENTRY(swapper_pg_pmd)
+swapper_pg_pmd:
.fill 1024*KPMDS,4,0
#else
ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index eb415043a929..a007454133a3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -379,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt)
/* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*/
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
NEXT_PAGE(level2_kernel_pgt)
- /* 40MB kernel mapping. The kernel code cannot be bigger than that.
- When you change this change KERNEL_TEXT_SIZE in page.h too. */
- /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE)
- /* Module mapping starts here */
- .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
+ /*
+ * 128 MB kernel mapping. We spend a full page on this pagetable
+ * anyway.
+ *
+ * The kernel code+data+bss must not be bigger than that.
+ *
+ * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+ * If you want to increase this then increase MODULES_VADDR
+ * too.)
+ */
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+ KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_spare_pgt)
- .fill 512,8,0
+ .fill 512, 8, 0
#undef PMDS
#undef NEXT_PAGE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 429d084e014d..235fd6c77504 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -368,8 +368,8 @@ static int hpet_clocksource_register(void)
return 0;
}
-/*
- * Try to setup the HPET timer
+/**
+ * hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
int __init hpet_enable(void)
{
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b0cc8f0136d8..43f287744f9f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -730,16 +730,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
asmlinkage
long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs regs)
+ char __user * __user *envp, struct pt_regs *regs)
{
long error;
char * filename;
filename = getname(name);
error = PTR_ERR(filename);
- if (IS_ERR(filename))
+ if (IS_ERR(filename))
return error;
- error = do_execve(filename, argv, envp, &regs);
+ error = do_execve(filename, argv, envp, regs);
putname(filename);
return error;
}
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6fd804f07821..7637dc91c79b 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1021,7 +1021,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* Clear all flags overriden by options */
for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] ^= cleared_cpu_caps[i];
+ c->x86_capability[i] &= ~cleared_cpu_caps[i];
#ifdef CONFIG_X86_MCE
mcheck_init(c);
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index d53bd6fcb428..0880f2c388a9 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -554,10 +554,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
int timeout;
unsigned long start_rip;
struct create_idle c_idle = {
- .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
.cpu = cpu,
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
+ INIT_WORK(&c_idle.work, do_fork_idle);
/* allocate memory for gdts of secondary cpus. Hotplug is considered */
if (!cpu_gdt_descr[cpu].address &&
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 02f0f61f5b11..c28c342c162f 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -25,6 +25,8 @@ static int save_stack_stack(void *data, char *name)
static void save_stack_address(void *data, unsigned long addr, int reliable)
{
struct stack_trace *trace = data;
+ if (!reliable)
+ return;
if (trace->skip > 0) {
trace->skip--;
return;
@@ -37,6 +39,8 @@ static void
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
struct stack_trace *trace = (struct stack_trace *)data;
+ if (!reliable)
+ return;
if (in_sched_functions(addr))
return;
if (trace->skip > 0) {
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 43517e324be8..f14cfd9d1f94 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -28,7 +28,8 @@ EXPORT_SYMBOL_GPL(tsc_khz);
static int __init tsc_setup(char *str)
{
printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC.\n");
+ "cannot disable TSC completely.\n");
+ mark_tsc_unstable("user disabled TSC");
return 1;
}
#else
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3f8242774580..b6be812fac05 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -44,11 +44,6 @@
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define __syscall_clobber "r11","cx","memory"
-#define __pa_vsymbol(x) \
- ({unsigned long v; \
- extern char __vsyscall_0; \
- asm("" : "=r" (v) : "0" (x)); \
- ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
/*
* vsyscall_gtod_data contains data that is :
@@ -102,7 +97,7 @@ static __always_inline void do_get_tz(struct timezone * tz)
static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
{
int ret;
- asm volatile("vsysc2: syscall"
+ asm volatile("syscall"
: "=a" (ret)
: "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
: __syscall_clobber );
@@ -112,7 +107,7 @@ static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
static __always_inline long time_syscall(long *t)
{
long secs;
- asm volatile("vsysc1: syscall"
+ asm volatile("syscall"
: "=a" (secs)
: "0" (__NR_time),"D" (t) : __syscall_clobber);
return secs;
@@ -227,50 +222,10 @@ long __vsyscall(3) venosys_1(void)
}
#ifdef CONFIG_SYSCTL
-
-#define SYSCALL 0x050f
-#define NOP2 0x9090
-
-/*
- * NOP out syscall in vsyscall page when not needed.
- */
-static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- extern u16 vsysc1, vsysc2;
- u16 __iomem *map1;
- u16 __iomem *map2;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- if (!write)
- return ret;
- /* gcc has some trouble with __va(__pa()), so just do it this
- way. */
- map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
- if (!map1)
- return -ENOMEM;
- map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
- if (!map2) {
- ret = -ENOMEM;
- goto out;
- }
- if (!vsyscall_gtod_data.sysctl_enabled) {
- writew(SYSCALL, map1);
- writew(SYSCALL, map2);
- } else {
- writew(NOP2, map1);
- writew(NOP2, map2);
- }
- iounmap(map2);
-out:
- iounmap(map1);
- return ret;
-}
-
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = vsyscall_sysctl_change },
+ .mode = 0644 },
{}
};
@@ -279,7 +234,6 @@ static ctl_table kernel_root_table2[] = {
.child = kernel_table2 },
{}
};
-
#endif
/* Assume __initcall executes before all user space. Hopefully kmod
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5afdde4895dc..cccb38a59653 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -57,6 +57,7 @@
#include <linux/lguest_launcher.h>
#include <linux/virtio_console.h>
#include <linux/pm.h>
+#include <asm/lguest.h>
#include <asm/paravirt.h>
#include <asm/param.h>
#include <asm/page.h>
@@ -75,15 +76,6 @@
* behaving in simplified but equivalent ways. In particular, the Guest is the
* same kernel as the Host (or at least, built from the same source code). :*/
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
-extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
-extern void lguest_iret(void);
-
struct lguest_data lguest_data = {
.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
.noirq_start = (u32)lguest_noirq_start,
@@ -489,7 +481,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
*pmdp = pmdval;
lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
- (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
+ (__pa(pmdp)&(PAGE_SIZE-1)), 0);
}
/* There are a couple of legacy places where the kernel sets a PTE, but we
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb652f5a93fb..a02a14f0f324 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -172,8 +172,9 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
}
/*
- * The head.S code sets up the kernel high mapping from:
- * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE
+ * The head.S code sets up the kernel high mapping:
+ *
+ * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
*
* phys_addr holds the negative offset to the kernel, which is added
* to the compile time generated pmds. This results in invalid pmds up
@@ -515,14 +516,6 @@ void __init mem_init(void)
/* clear_bss() already clear the empty_zero_page */
- /* temporary debugging - double check it's true: */
- {
- int i;
-
- for (i = 0; i < 1024; i++)
- WARN_ON_ONCE(empty_zero_page[i]);
- }
-
reservedpages = 0;
/* this will put all low memory onto the freelists */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 464d8fc21ce6..14e48b5a94ba 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void)
#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+# define debug_pagealloc 1
+#else
+# define debug_pagealloc 0
+#endif
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -355,45 +361,48 @@ out_unlock:
static LIST_HEAD(page_pool);
static unsigned long pool_size, pool_pages, pool_low;
-static unsigned long pool_used, pool_failed, pool_refill;
+static unsigned long pool_used, pool_failed;
-static void cpa_fill_pool(void)
+static void cpa_fill_pool(struct page **ret)
{
- struct page *p;
gfp_t gfp = GFP_KERNEL;
+ unsigned long flags;
+ struct page *p;
- /* Do not allocate from interrupt context */
- if (in_irq() || irqs_disabled())
- return;
/*
- * Check unlocked. I does not matter when we have one more
- * page in the pool. The bit lock avoids recursive pool
- * allocations:
+ * Avoid recursion (on debug-pagealloc) and also signal
+ * our priority to get to these pagetables:
*/
- if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+ if (current->flags & PF_MEMALLOC)
return;
+ current->flags |= PF_MEMALLOC;
-#ifdef CONFIG_DEBUG_PAGEALLOC
/*
- * We could do:
- * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
- * but this fails on !PREEMPT kernels
+ * Allocate atomically from atomic contexts:
*/
- gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-#endif
+ if (in_atomic() || irqs_disabled() || debug_pagealloc)
+ gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
- while (pool_pages < pool_size) {
+ while (pool_pages < pool_size || (ret && !*ret)) {
p = alloc_pages(gfp, 0);
if (!p) {
pool_failed++;
break;
}
- spin_lock_irq(&pgd_lock);
+ /*
+ * If the call site needs a page right now, provide it:
+ */
+ if (ret && !*ret) {
+ *ret = p;
+ continue;
+ }
+ spin_lock_irqsave(&pgd_lock, flags);
list_add(&p->lru, &page_pool);
pool_pages++;
- spin_unlock_irq(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
}
- clear_bit_unlock(0, &pool_refill);
+
+ current->flags &= ~PF_MEMALLOC;
}
#define SHIFT_MB (20 - PAGE_SHIFT)
@@ -414,11 +423,15 @@ void __init cpa_init(void)
* GiB. Shift MiB to Gib and multiply the result by
* POOL_PAGES_PER_GB:
*/
- gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
- pool_size = POOL_PAGES_PER_GB * gb;
+ if (debug_pagealloc) {
+ gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+ pool_size = POOL_PAGES_PER_GB * gb;
+ } else {
+ pool_size = 1;
+ }
pool_low = pool_size;
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
printk(KERN_DEBUG
"CPA: page pool initialized %lu of %lu pages preallocated\n",
pool_pages, pool_size);
@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address)
spin_lock_irqsave(&pgd_lock, flags);
if (list_empty(&page_pool)) {
spin_unlock_irqrestore(&pgd_lock, flags);
- return -ENOMEM;
+ base = NULL;
+ cpa_fill_pool(&base);
+ if (!base)
+ return -ENOMEM;
+ spin_lock_irqsave(&pgd_lock, flags);
+ } else {
+ base = list_first_entry(&page_pool, struct page, lru);
+ list_del(&base->lru);
+ pool_pages--;
+
+ if (pool_pages < pool_low)
+ pool_low = pool_pages;
}
- base = list_first_entry(&page_pool, struct page, lru);
- list_del(&base->lru);
- pool_pages--;
-
- if (pool_pages < pool_low)
- pool_low = pool_pages;
-
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
cpa_flush_all(cache);
out:
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
+
return ret;
}
@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* Try to refill the page pool here. We can do this only after
* the tlb flush.
*/
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
}
#ifdef CONFIG_HIBERNATION
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index f385a4b4a484..b8bd0c4aa02e 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -48,7 +48,7 @@ obj-$(VDSO64-y) += vdso-syms.lds
# Match symbols in the DSO that look like VDSO*; produce a file of constants.
#
sed-vdsosym := -e 's/^00*/0/' \
- -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+ -e 's/^\([[:xdigit:]]*\) . \(VDSO[[:alnum:]_]*\)$$/\2 = 0x\1;/p'
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@