summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.cpu31
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/relocs.c2
-rw-r--r--arch/x86/boot/cpucheck.c8
-rw-r--r--arch/x86/kernel/alternative.c36
-rw-r--r--arch/x86/kernel/amd_iommu.c22
-rw-r--r--arch/x86/kernel/apm_32.c3
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/centaur.c11
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/common_64.c74
-rw-r--r--arch/x86/kernel/cpu/cyrix.c32
-rw-r--r--arch/x86/kernel/cpu/feature_names.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/ds.c954
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/hpet.c19
-rw-r--r--arch/x86/kernel/io_delay.c8
-rw-r--r--arch/x86/kernel/ioport.c1
-rw-r--r--arch/x86/kernel/ipi.c3
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kgdb.c43
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/process.c17
-rw-r--r--arch/x86/kernel/process_32.c54
-rw-r--r--arch/x86/kernel/process_64.c41
-rw-r--r--arch/x86/kernel/ptrace.c446
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/signal_32.c1
-rw-r--r--arch/x86/kernel/signal_64.c1
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/sys_i386_32.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c1
-rw-r--r--arch/x86/kernel/syscall_64.c4
-rw-r--r--arch/x86/kernel/time_32.c1
-rw-r--r--arch/x86/kernel/tls.c1
-rw-r--r--arch/x86/kernel/vm86_32.c1
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vsmp_64.c2
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/svm.c12
-rw-r--r--arch/x86/kvm/vmx.c3
-rw-r--r--arch/x86/kvm/vmx.h2
-rw-r--r--arch/x86/mach-default/setup.c4
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/mm/init_32.c5
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/oprofile/nmi_int.c4
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/setup.c2
52 files changed, 1330 insertions, 590 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2c518fbc52ec..60a85768cfcb 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -382,14 +382,17 @@ config X86_OOSTORE
# P6_NOPs are a relatively minor optimization that require a family >=
# 6 processor, except that it is broken on certain VIA chips.
# Furthermore, AMD chips prefer a totally different sequence of NOPs
-# (which work on all CPUs). As a result, disallow these if we're
-# compiling X86_GENERIC but not X86_64 (these NOPs do work on all
-# x86-64 capable chips); the list of processors in the right-hand clause
-# are the cores that benefit from this optimization.
+# (which work on all CPUs). In addition, it looks like Virtual PC
+# does not understand them.
+#
+# As a result, disallow these if we're not compiling for X86_64 (these
+# NOPs do work on all x86-64 capable chips); the list of processors in
+# the right-hand clause are the cores that benefit from this optimization.
#
config X86_P6_NOP
def_bool y
- depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4 || MPSC)
+ depends on X86_64
+ depends on (MCORE2 || MPENTIUM4 || MPSC)
config X86_TSC
def_bool y
@@ -415,3 +418,21 @@ config X86_MINIMUM_CPU_FAMILY
config X86_DEBUGCTLMSR
def_bool y
depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+
+config X86_DS
+ bool "Debug Store support"
+ default y
+ help
+ Add support for Debug Store.
+ This allows the kernel to provide a memory buffer to the hardware
+ to store various profiling and tracing events.
+
+config X86_PTRACE_BTS
+ bool "ptrace interface to Branch Trace Store"
+ default y
+ depends on (X86_DS && X86_DEBUGCTLMSR)
+ help
+ Add a ptrace interface to allow collecting an execution trace
+ of the traced task.
+ This collects control flow changes in a (cyclic) buffer and allows
+ debuggers to fill in the gaps and show an execution trace of the debuggee.
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9fea73706479..aaf5a2131efc 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -16,7 +16,7 @@
*/
#undef CONFIG_PARAVIRT
#ifdef CONFIG_X86_32
-#define _ASM_DESC_H_ 1
+#define ASM_X86__DESC_H 1
#endif
#ifdef CONFIG_X86_64
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index a1310c52fc0c..857e492c571e 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -492,7 +492,7 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
continue;
}
sh_symtab = sec_symtab->symtab;
- sym_strtab = sec->link->strtab;
+ sym_strtab = sec_symtab->link->strtab;
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
Elf32_Rel *rel;
Elf32_Sym *sym;
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 4b9ae7c56748..4d3ff037201f 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -38,12 +38,12 @@ static const u32 req_flags[NCAPINTS] =
{
REQUIRED_MASK0,
REQUIRED_MASK1,
- REQUIRED_MASK2,
- REQUIRED_MASK3,
+ 0, /* REQUIRED_MASK2 not implemented in this file */
+ 0, /* REQUIRED_MASK3 not implemented in this file */
REQUIRED_MASK4,
- REQUIRED_MASK5,
+ 0, /* REQUIRED_MASK5 not implemented in this file */
REQUIRED_MASK6,
- REQUIRED_MASK7,
+ 0, /* REQUIRED_MASK7 not implemented in this file */
};
#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2763cb37b553..65a0c1b48696 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
extern char __vsyscall_0;
const unsigned char *const *find_nop_table(void)
{
- return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_has(X86_FEATURE_NOPL))
+ return p6_nops;
+ else
+ return k8_nops;
}
#else /* CONFIG_X86_64 */
-static const struct nop {
- int cpuid;
- const unsigned char *const *noptable;
-} noptypes[] = {
- { X86_FEATURE_K8, k8_nops },
- { X86_FEATURE_K7, k7_nops },
- { X86_FEATURE_P4, p6_nops },
- { X86_FEATURE_P3, p6_nops },
- { -1, NULL }
-};
-
const unsigned char *const *find_nop_table(void)
{
- const unsigned char *const *noptable = intel_nops;
- int i;
-
- for (i = 0; noptypes[i].cpuid >= 0; i++) {
- if (boot_cpu_has(noptypes[i].cpuid)) {
- noptable = noptypes[i].noptable;
- break;
- }
- }
- return noptable;
+ if (boot_cpu_has(X86_FEATURE_K8))
+ return k8_nops;
+ else if (boot_cpu_has(X86_FEATURE_K7))
+ return k7_nops;
+ else if (boot_cpu_has(X86_FEATURE_NOPL))
+ return p6_nops;
+ else
+ return intel_nops;
}
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 69b4d060b21c..042fdc27bc92 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -101,10 +101,10 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
- int ret, ready = 0;
+ int ret = 0, ready = 0;
unsigned status = 0;
struct iommu_cmd cmd;
- unsigned long i = 0;
+ unsigned long flags, i = 0;
memset(&cmd, 0, sizeof(cmd));
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
@@ -112,10 +112,12 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
iommu->need_sync = 0;
- ret = iommu_queue_command(iommu, &cmd);
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ ret = __iommu_queue_command(iommu, &cmd);
if (ret)
- return ret;
+ goto out;
while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
@@ -130,6 +132,8 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
+out:
+ spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
@@ -140,6 +144,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
{
struct iommu_cmd cmd;
+ int ret;
BUG_ON(iommu == NULL);
@@ -147,9 +152,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
cmd.data[0] = devid;
+ ret = iommu_queue_command(iommu, &cmd);
+
iommu->need_sync = 1;
- return iommu_queue_command(iommu, &cmd);
+ return ret;
}
/*
@@ -159,6 +166,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
u64 address, u16 domid, int pde, int s)
{
struct iommu_cmd cmd;
+ int ret;
memset(&cmd, 0, sizeof(cmd));
address &= PAGE_MASK;
@@ -171,9 +179,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+ ret = iommu_queue_command(iommu, &cmd);
+
iommu->need_sync = 1;
- return iommu_queue_command(iommu, &cmd);
+ return ret;
}
/*
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9ee24e6bc4b0..732d1f4e10ee 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -234,6 +234,7 @@
#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/i8253.h>
+#include <asm/olpc.h>
#include <asm/paravirt.h>
#include <asm/reboot.h>
@@ -2217,7 +2218,7 @@ static int __init apm_init(void)
dmi_check_system(apm_dmi_table);
- if (apm_info.bios.version == 0 || paravirt_enabled()) {
+ if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) {
printk(KERN_INFO "apm: BIOS not found.\n");
return -ENODEV;
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387006fe..505543a75a56 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
#define __NO_STUBS 1
#undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
#define __SYSCALL(nr, sym) [nr] = 1,
static char syscalls[] = {
#include <asm/unistd.h>
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index cae9cabc3031..18514ed26104 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & (1<<8))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
}
+
+ /* Set MTRR capability flag if appropriate */
+ if (c->x86_model == 13 || c->x86_model == 9 ||
+ (c->x86_model == 8 && c->x86_mask >= 8))
+ set_cpu_cap(c, X86_FEATURE_K6_MTRR);
}
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
mbytes);
}
- /* Set MTRR capability flag if appropriate */
- if (c->x86_model == 13 || c->x86_model == 9 ||
- (c->x86_model == 8 && c->x86_mask >= 8))
- set_cpu_cap(c, X86_FEATURE_K6_MTRR);
break;
}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e0f45edd6a55..a0534c04d38a 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -314,6 +314,16 @@ enum {
EAMD3D = 1<<20,
};
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+ switch (c->x86) {
+ case 5:
+ /* Emulate MTRRs using Centaur's MCR. */
+ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
+ break;
+ }
+}
+
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
@@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
+ .c_early_init = early_init_centaur,
.c_init = init_centaur,
.c_size_cache = centaur_size_cache,
};
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80ab20d4fa39..4e456bd955bb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
+#include <asm/asm.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
@@ -334,11 +335,24 @@ static void __init early_cpu_detect(void)
get_cpu_vendor(c, 1);
+ early_get_cap(c);
+
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c);
+}
- early_get_cap(c);
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6; unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect. In the latter case it doesn't even *fail*
+ * reliably, so probing for it doesn't even work. Disable it completely
+ * unless we can find a reliable way to detect all the broken cases.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
}
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -395,8 +409,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
}
init_scattered_cpuid_features(c);
+ detect_nopl(c);
}
-
}
static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index dd6e3f15017e..a11f5d4477cd 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -18,6 +18,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
+#include <asm/asm.h>
#include <asm/numa.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
@@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void)
}
}
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6, unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect. Hence, probe for it based on first
+ * principles.
+ *
+ * Note: no 64-bit chip is known to lack these, but put the code here
+ * for consistency with 32 bits, and to make it utterly trivial to
+ * diagnose the problem should it ever surface.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+ const u32 nopl_signature = 0x888c53b1; /* Random number */
+ u32 has_nopl = nopl_signature;
+
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
+ if (c->x86 >= 6) {
+ asm volatile("\n"
+ "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+ "2:\n"
+ " .section .fixup,\"ax\"\n"
+ "3: xor %0,%0\n"
+ " jmp 2b\n"
+ " .previous\n"
+ _ASM_EXTABLE(1b,3b)
+ : "+a" (has_nopl));
+
+ if (has_nopl == nopl_signature)
+ set_cpu_cap(c, X86_FEATURE_NOPL);
+ }
+}
+
static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
void __init early_cpu_init(void)
@@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_phys_bits = eax & 0xff;
}
+ detect_nopl(c);
+
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c);
@@ -493,17 +529,20 @@ void pda_init(int cpu)
/* others are initialized in smpboot.c */
pda->pcurrent = &init_task;
pda->irqstackptr = boot_cpu_stack;
+ pda->irqstackptr += IRQSTACKSIZE - 64;
} else {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d", cpu);
+ if (!pda->irqstackptr) {
+ pda->irqstackptr = (char *)
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+ if (!pda->irqstackptr)
+ panic("cannot allocate irqstack for cpu %d",
+ cpu);
+ pda->irqstackptr += IRQSTACKSIZE - 64;
+ }
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
}
-
- pda->irqstackptr += IRQSTACKSIZE-64;
}
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
@@ -601,19 +640,22 @@ void __cpuinit cpu_init(void)
/*
* set up and load the per-CPU TSS
*/
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (!orig_ist->ist[0]) {
static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
};
- if (cpu) {
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
- if (!estacks)
- panic("Cannot allocate exception stack %ld %d\n",
- v, cpu);
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (cpu) {
+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+ if (!estacks)
+ panic("Cannot allocate exception "
+ "stack %ld %d\n", v, cpu);
+ }
+ estacks += PAGE_SIZE << order[v];
+ orig_ist->ist[v] = t->x86_tss.ist[v] =
+ (unsigned long)estacks;
}
- estacks += PAGE_SIZE << order[v];
- orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
}
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index e710a21bb6e8..898a5a2002ed 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -15,13 +15,11 @@
/*
* Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
*/
-static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
{
unsigned char ccr2, ccr3;
- unsigned long flags;
/* we test for DEVID by checking whether CCR3 is writable */
- local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, ccr3 ^ 0x80);
getCx86(0xc0); /* dummy to change bus */
@@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
*dir0 = getCx86(CX86_DIR0);
*dir1 = getCx86(CX86_DIR1);
}
- local_irq_restore(flags);
}
+static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __do_cyrix_devid(dir0, dir1);
+ local_irq_restore(flags);
+}
/*
* Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
* order to identify the Cyrix CPU model after we're out of setup.c
@@ -161,6 +166,24 @@ static void __cpuinit geode_configure(void)
local_irq_restore(flags);
}
+static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c)
+{
+ unsigned char dir0, dir0_msn, dir1 = 0;
+
+ __do_cyrix_devid(&dir0, &dir1);
+ dir0_msn = dir0 >> 4; /* identifies CPU "family" */
+
+ switch (dir0_msn) {
+ case 3: /* 6x86/6x86L */
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
+ break;
+ case 5: /* 6x86MX/M II */
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
+ break;
+ }
+}
static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
{
@@ -416,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
.c_vendor = "Cyrix",
.c_ident = { "CyrixInstead" },
+ .c_early_init = early_init_cyrix,
.c_init = init_cyrix,
.c_identify = cyrix_identify,
};
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index e43ad4ad4cba..c9017799497c 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
NULL, NULL, NULL, NULL,
"constant_tsc", "up", NULL, "arch_perfmon",
"pebs", "bts", NULL, NULL,
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "rep_good", NULL, NULL, NULL,
+ "nopl", NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* Intel-defined (#2) */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b75f2569b8f8..f113ef4595f6 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_BTS);
if (!(l1 & (1<<12)))
set_cpu_cap(c, X86_FEATURE_PEBS);
+ ds_init_intel(c);
}
if (cpu_has_bts)
- ds_init_intel(c);
+ ptrace_bts_init_intel(c);
/*
* See if we have a good local APIC by checking for buggy Pentia,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b117d7f8a564..5df16d818371 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -729,7 +729,7 @@ struct var_mtrr_range_state {
mtrr_type type;
};
-struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
static int __init
@@ -834,7 +834,7 @@ static int __init enable_mtrr_cleanup_setup(char *str)
enable_mtrr_cleanup = 1;
return 0;
}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
struct var_mtrr_state {
unsigned long range_startk;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8ec48d..2b69994fd3a8 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,49 @@
* Debug Store support
*
* This provides a low-level interface to the hardware's Debug Store
- * feature that is used for last branch recording (LBR) and
+ * feature that is used for branch trace store (BTS) and
* precise-event based sampling (PEBS).
*
- * Different architectures use a different DS layout/pointer size.
- * The below functions therefore work on a void*.
+ * It manages:
+ * - per-thread and per-cpu allocation of BTS and PEBS
+ * - buffer memory allocation (optional)
+ * - buffer overflow handling
+ * - buffer access
*
+ * It assumes:
+ * - get_task_struct on all parameter tasks
+ * - current is allowed to trace parameter tasks
*
- * Since there is no user for PEBS, yet, only LBR (or branch
- * trace store, BTS) is supported.
*
- *
- * Copyright (C) 2007 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ * Copyright (C) 2007-2008 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
*/
+
+#ifdef CONFIG_X86_DS
+
#include <asm/ds.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+
+/*
+ * The configuration for a particular DS hardware implementation.
+ */
+struct ds_configuration {
+ /* the size of the DS structure in bytes */
+ unsigned char sizeof_ds;
+ /* the size of one pointer-typed field in the DS structure in bytes;
+ this covers the first 8 fields related to buffer management. */
+ unsigned char sizeof_field;
+ /* the size of a BTS/PEBS record in bytes */
+ unsigned char sizeof_rec[2];
+};
+static struct ds_configuration ds_cfg;
/*
@@ -44,378 +67,747 @@
* (interrupt occurs when write pointer passes interrupt pointer)
* - value to which counter is reset following counter overflow
*
- * On later architectures, the last branch recording hardware uses
- * 64bit pointers even in 32bit mode.
- *
- *
- * Branch Trace Store (BTS) records store information about control
- * flow changes. They at least provide the following information:
- * - source linear address
- * - destination linear address
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
*
- * Netburst supported a predicated bit that had been dropped in later
- * architectures. We do not suppor it.
*
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ * - an offset giving the start of the respective region
*
- * In order to abstract from the actual DS and BTS layout, we describe
- * the access to the relevant fields.
- * Thanks to Andi Kleen for proposing this design.
+ * This offset is further used to index various arrays holding
+ * information for BTS and PEBS at the respective index.
*
- * The implementation, however, is not as general as it might seem. In
- * order to stay somewhat simple and efficient, we assume an
- * underlying unsigned type (mostly a pointer type) and we expect the
- * field to be at least as big as that type.
+ * On later 32bit processors, we only access the lower 32bit of the
+ * 64bit pointer fields. The upper halves will be zeroed out.
*/
-/*
- * A special from_ip address to indicate that the BTS record is an
- * info record that needs to be interpreted or skipped.
- */
-#define BTS_ESCAPE_ADDRESS (-1)
+enum ds_field {
+ ds_buffer_base = 0,
+ ds_index,
+ ds_absolute_maximum,
+ ds_interrupt_threshold,
+};
-/*
- * A field access descriptor
- */
-struct access_desc {
- unsigned char offset;
- unsigned char size;
+enum ds_qualifier {
+ ds_bts = 0,
+ ds_pebs
};
+static inline unsigned long ds_get(const unsigned char *base,
+ enum ds_qualifier qual, enum ds_field field)
+{
+ base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+ return *(unsigned long *)base;
+}
+
+static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
+ enum ds_field field, unsigned long value)
+{
+ base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+ (*(unsigned long *)base) = value;
+}
+
+
/*
- * The configuration for a particular DS/BTS hardware implementation.
+ * Locking is done only for allocating BTS or PEBS resources and for
+ * guarding context and buffer memory allocation.
+ *
+ * Most functions require the current task to own the ds context part
+ * they are going to access. All the locking is done when validating
+ * access to the context.
*/
-struct ds_configuration {
- /* the DS configuration */
- unsigned char sizeof_ds;
- struct access_desc bts_buffer_base;
- struct access_desc bts_index;
- struct access_desc bts_absolute_maximum;
- struct access_desc bts_interrupt_threshold;
- /* the BTS configuration */
- unsigned char sizeof_bts;
- struct access_desc from_ip;
- struct access_desc to_ip;
- /* BTS variants used to store additional information like
- timestamps */
- struct access_desc info_type;
- struct access_desc info_data;
- unsigned long debugctl_mask;
-};
+static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
/*
- * The global configuration used by the below accessor functions
+ * Validate that the current task is allowed to access the BTS/PEBS
+ * buffer of the parameter task.
+ *
+ * Returns 0, if access is granted; -Eerrno, otherwise.
*/
-static struct ds_configuration ds_cfg;
+static inline int ds_validate_access(struct ds_context *context,
+ enum ds_qualifier qual)
+{
+ if (!context)
+ return -EPERM;
+
+ if (context->owner[qual] == current)
+ return 0;
+
+ return -EPERM;
+}
+
/*
- * Accessor functions for some DS and BTS fields using the above
- * global ptrace_bts_cfg.
+ * We either support (system-wide) per-cpu or per-thread allocation.
+ * We distinguish the two based on the task_struct pointer, where a
+ * NULL pointer indicates per-cpu allocation for the current cpu.
+ *
+ * Allocations are use-counted. As soon as resources are allocated,
+ * further allocations must be of the same type (per-cpu or
+ * per-thread). We model this by counting allocations (i.e. the number
+ * of tracers of a certain type) for one type negatively:
+ * =0 no tracers
+ * >0 number of per-thread tracers
+ * <0 number of per-cpu tracers
+ *
+ * The below functions to get and put tracers and to check the
+ * allocation type require the ds_lock to be held by the caller.
+ *
+ * Tracers essentially gives the number of ds contexts for a certain
+ * type of allocation.
*/
-static inline unsigned long get_bts_buffer_base(char *base)
+static long tracers;
+
+static inline void get_tracer(struct task_struct *task)
{
- return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset);
+ tracers += (task ? 1 : -1);
}
-static inline void set_bts_buffer_base(char *base, unsigned long value)
+
+static inline void put_tracer(struct task_struct *task)
{
- (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value;
+ tracers -= (task ? 1 : -1);
}
-static inline unsigned long get_bts_index(char *base)
+
+static inline int check_tracer(struct task_struct *task)
{
- return *(unsigned long *)(base + ds_cfg.bts_index.offset);
+ return (task ? (tracers >= 0) : (tracers <= 0));
}
-static inline void set_bts_index(char *base, unsigned long value)
+
+
+/*
+ * The DS context is either attached to a thread or to a cpu:
+ * - in the former case, the thread_struct contains a pointer to the
+ * attached context.
+ * - in the latter case, we use a static array of per-cpu context
+ * pointers.
+ *
+ * Contexts are use-counted. They are allocated on first access and
+ * deallocated when the last user puts the context.
+ *
+ * We distinguish between an allocating and a non-allocating get of a
+ * context:
+ * - the allocating get is used for requesting BTS/PEBS resources. It
+ * requires the caller to hold the global ds_lock.
+ * - the non-allocating get is used for all other cases. A
+ * non-existing context indicates an error. It acquires and releases
+ * the ds_lock itself for obtaining the context.
+ *
+ * A context and its DS configuration are allocated and deallocated
+ * together. A context always has a DS configuration of the
+ * appropriate size.
+ */
+static DEFINE_PER_CPU(struct ds_context *, system_context);
+
+#define this_system_context per_cpu(system_context, smp_processor_id())
+
+/*
+ * Returns the pointer to the parameter task's context or to the
+ * system-wide context, if task is NULL.
+ *
+ * Increases the use count of the returned context, if not NULL.
+ */
+static inline struct ds_context *ds_get_context(struct task_struct *task)
{
- (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value;
+ struct ds_context *context;
+
+ spin_lock(&ds_lock);
+
+ context = (task ? task->thread.ds_ctx : this_system_context);
+ if (context)
+ context->count++;
+
+ spin_unlock(&ds_lock);
+
+ return context;
}
-static inline unsigned long get_bts_absolute_maximum(char *base)
+
+/*
+ * Same as ds_get_context, but allocates the context and it's DS
+ * structure, if necessary; returns NULL; if out of memory.
+ *
+ * pre: requires ds_lock to be held
+ */
+static inline struct ds_context *ds_alloc_context(struct task_struct *task)
{
- return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset);
+ struct ds_context **p_context =
+ (task ? &task->thread.ds_ctx : &this_system_context);
+ struct ds_context *context = *p_context;
+
+ if (!context) {
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+
+ if (!context)
+ return NULL;
+
+ context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+ if (!context->ds) {
+ kfree(context);
+ return NULL;
+ }
+
+ *p_context = context;
+
+ context->this = p_context;
+ context->task = task;
+
+ if (task)
+ set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+
+ if (!task || (task == current))
+ wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
+
+ get_tracer(task);
+ }
+
+ context->count++;
+
+ return context;
}
-static inline void set_bts_absolute_maximum(char *base, unsigned long value)
+
+/*
+ * Decreases the use count of the parameter context, if not NULL.
+ * Deallocates the context, if the use count reaches zero.
+ */
+static inline void ds_put_context(struct ds_context *context)
{
- (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
+ if (!context)
+ return;
+
+ spin_lock(&ds_lock);
+
+ if (--context->count)
+ goto out;
+
+ *(context->this) = NULL;
+
+ if (context->task)
+ clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+
+ if (!context->task || (context->task == current))
+ wrmsrl(MSR_IA32_DS_AREA, 0);
+
+ put_tracer(context->task);
+
+ /* free any leftover buffers from tracers that did not
+ * deallocate them properly. */
+ kfree(context->buffer[ds_bts]);
+ kfree(context->buffer[ds_pebs]);
+ kfree(context->ds);
+ kfree(context);
+ out:
+ spin_unlock(&ds_lock);
}
-static inline unsigned long get_bts_interrupt_threshold(char *base)
+
+
+/*
+ * Handle a buffer overflow
+ *
+ * task: the task whose buffers are overflowing;
+ * NULL for a buffer overflow on the current cpu
+ * context: the ds context
+ * qual: the buffer type
+ */
+static void ds_overflow(struct task_struct *task, struct ds_context *context,
+ enum ds_qualifier qual)
{
- return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset);
+ if (!context)
+ return;
+
+ if (context->callback[qual])
+ (*context->callback[qual])(task);
+
+ /* todo: do some more overflow handling */
}
-static inline void set_bts_interrupt_threshold(char *base, unsigned long value)
+
+
+/*
+ * Allocate a non-pageable buffer of the parameter size.
+ * Checks the memory and the locked memory rlimit.
+ *
+ * Returns the buffer, if successful;
+ * NULL, if out of memory or rlimit exceeded.
+ *
+ * size: the requested buffer size in bytes
+ * pages (out): if not NULL, contains the number of pages reserved
+ */
+static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
{
- (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
+ unsigned long rlim, vm, pgsz;
+ void *buffer;
+
+ pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->total_vm + pgsz;
+ if (rlim < vm)
+ return NULL;
+
+ rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->locked_vm + pgsz;
+ if (rlim < vm)
+ return NULL;
+
+ buffer = kzalloc(size, GFP_KERNEL);
+ if (!buffer)
+ return NULL;
+
+ current->mm->total_vm += pgsz;
+ current->mm->locked_vm += pgsz;
+
+ if (pages)
+ *pages = pgsz;
+
+ return buffer;
}
-static inline unsigned long get_from_ip(char *base)
+
+static int ds_request(struct task_struct *task, void *base, size_t size,
+ ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
{
- return *(unsigned long *)(base + ds_cfg.from_ip.offset);
+ struct ds_context *context;
+ unsigned long buffer, adj;
+ const unsigned long alignment = (1 << 3);
+ int error = 0;
+
+ if (!ds_cfg.sizeof_ds)
+ return -EOPNOTSUPP;
+
+ /* we require some space to do alignment adjustments below */
+ if (size < (alignment + ds_cfg.sizeof_rec[qual]))
+ return -EINVAL;
+
+ /* buffer overflow notification is not yet implemented */
+ if (ovfl)
+ return -EOPNOTSUPP;
+
+
+ spin_lock(&ds_lock);
+
+ if (!check_tracer(task))
+ return -EPERM;
+
+ error = -ENOMEM;
+ context = ds_alloc_context(task);
+ if (!context)
+ goto out_unlock;
+
+ error = -EALREADY;
+ if (context->owner[qual] == current)
+ goto out_unlock;
+ error = -EPERM;
+ if (context->owner[qual] != NULL)
+ goto out_unlock;
+ context->owner[qual] = current;
+
+ spin_unlock(&ds_lock);
+
+
+ error = -ENOMEM;
+ if (!base) {
+ base = ds_allocate_buffer(size, &context->pages[qual]);
+ if (!base)
+ goto out_release;
+
+ context->buffer[qual] = base;
+ }
+ error = 0;
+
+ context->callback[qual] = ovfl;
+
+ /* adjust the buffer address and size to meet alignment
+ * constraints:
+ * - buffer is double-word aligned
+ * - size is multiple of record size
+ *
+ * We checked the size at the very beginning; we have enough
+ * space to do the adjustment.
+ */
+ buffer = (unsigned long)base;
+
+ adj = ALIGN(buffer, alignment) - buffer;
+ buffer += adj;
+ size -= adj;
+
+ size /= ds_cfg.sizeof_rec[qual];
+ size *= ds_cfg.sizeof_rec[qual];
+
+ ds_set(context->ds, qual, ds_buffer_base, buffer);
+ ds_set(context->ds, qual, ds_index, buffer);
+ ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
+
+ if (ovfl) {
+ /* todo: select a suitable interrupt threshold */
+ } else
+ ds_set(context->ds, qual,
+ ds_interrupt_threshold, buffer + size + 1);
+
+ /* we keep the context until ds_release */
+ return error;
+
+ out_release:
+ context->owner[qual] = NULL;
+ ds_put_context(context);
+ return error;
+
+ out_unlock:
+ spin_unlock(&ds_lock);
+ ds_put_context(context);
+ return error;
}
-static inline void set_from_ip(char *base, unsigned long value)
+
+int ds_request_bts(struct task_struct *task, void *base, size_t size,
+ ds_ovfl_callback_t ovfl)
{
- (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value;
+ return ds_request(task, base, size, ovfl, ds_bts);
}
-static inline unsigned long get_to_ip(char *base)
+
+int ds_request_pebs(struct task_struct *task, void *base, size_t size,
+ ds_ovfl_callback_t ovfl)
{
- return *(unsigned long *)(base + ds_cfg.to_ip.offset);
+ return ds_request(task, base, size, ovfl, ds_pebs);
}
-static inline void set_to_ip(char *base, unsigned long value)
+
+static int ds_release(struct task_struct *task, enum ds_qualifier qual)
{
- (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value;
+ struct ds_context *context;
+ int error;
+
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
+
+ kfree(context->buffer[qual]);
+ context->buffer[qual] = NULL;
+
+ current->mm->total_vm -= context->pages[qual];
+ current->mm->locked_vm -= context->pages[qual];
+ context->pages[qual] = 0;
+ context->owner[qual] = NULL;
+
+ /*
+ * we put the context twice:
+ * once for the ds_get_context
+ * once for the corresponding ds_request
+ */
+ ds_put_context(context);
+ out:
+ ds_put_context(context);
+ return error;
}
-static inline unsigned char get_info_type(char *base)
+
+int ds_release_bts(struct task_struct *task)
{
- return *(unsigned char *)(base + ds_cfg.info_type.offset);
+ return ds_release(task, ds_bts);
}
-static inline void set_info_type(char *base, unsigned char value)
+
+int ds_release_pebs(struct task_struct *task)
{
- (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
+ return ds_release(task, ds_pebs);
}
-static inline unsigned long get_info_data(char *base)
+
+static int ds_get_index(struct task_struct *task, size_t *pos,
+ enum ds_qualifier qual)
{
- return *(unsigned long *)(base + ds_cfg.info_data.offset);
+ struct ds_context *context;
+ unsigned long base, index;
+ int error;
+
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
+
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ index = ds_get(context->ds, qual, ds_index);
+
+ error = ((index - base) / ds_cfg.sizeof_rec[qual]);
+ if (pos)
+ *pos = error;
+ out:
+ ds_put_context(context);
+ return error;
}
-static inline void set_info_data(char *base, unsigned long value)
+
+int ds_get_bts_index(struct task_struct *task, size_t *pos)
{
- (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value;
+ return ds_get_index(task, pos, ds_bts);
}
+int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+{
+ return ds_get_index(task, pos, ds_pebs);
+}
-int ds_allocate(void **dsp, size_t bts_size_in_bytes)
+static int ds_get_end(struct task_struct *task, size_t *pos,
+ enum ds_qualifier qual)
{
- size_t bts_size_in_records;
- unsigned long bts;
- void *ds;
+ struct ds_context *context;
+ unsigned long base, end;
+ int error;
+
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
+
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ end = ds_get(context->ds, qual, ds_absolute_maximum);
+
+ error = ((end - base) / ds_cfg.sizeof_rec[qual]);
+ if (pos)
+ *pos = error;
+ out:
+ ds_put_context(context);
+ return error;
+}
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
+int ds_get_bts_end(struct task_struct *task, size_t *pos)
+{
+ return ds_get_end(task, pos, ds_bts);
+}
- if (bts_size_in_bytes < 0)
- return -EINVAL;
+int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+{
+ return ds_get_end(task, pos, ds_pebs);
+}
- bts_size_in_records =
- bts_size_in_bytes / ds_cfg.sizeof_bts;
- bts_size_in_bytes =
- bts_size_in_records * ds_cfg.sizeof_bts;
+static int ds_access(struct task_struct *task, size_t index,
+ const void **record, enum ds_qualifier qual)
+{
+ struct ds_context *context;
+ unsigned long base, idx;
+ int error;
- if (bts_size_in_bytes <= 0)
+ if (!record)
return -EINVAL;
- bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL);
-
- if (!bts)
- return -ENOMEM;
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
- ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ idx = base + (index * ds_cfg.sizeof_rec[qual]);
- if (!ds) {
- kfree((void *)bts);
- return -ENOMEM;
- }
-
- set_bts_buffer_base(ds, bts);
- set_bts_index(ds, bts);
- set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
- set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
+ error = -EINVAL;
+ if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
+ goto out;
- *dsp = ds;
- return 0;
+ *record = (const void *)idx;
+ error = ds_cfg.sizeof_rec[qual];
+ out:
+ ds_put_context(context);
+ return error;
}
-int ds_free(void **dsp)
+int ds_access_bts(struct task_struct *task, size_t index, const void **record)
{
- if (*dsp) {
- kfree((void *)get_bts_buffer_base(*dsp));
- kfree(*dsp);
- *dsp = NULL;
- }
- return 0;
+ return ds_access(task, index, record, ds_bts);
}
-int ds_get_bts_size(void *ds)
+int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
{
- int size_in_bytes;
-
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
-
- if (!ds)
- return 0;
-
- size_in_bytes =
- get_bts_absolute_maximum(ds) -
- get_bts_buffer_base(ds);
- return size_in_bytes;
+ return ds_access(task, index, record, ds_pebs);
}
-int ds_get_bts_end(void *ds)
+static int ds_write(struct task_struct *task, const void *record, size_t size,
+ enum ds_qualifier qual, int force)
{
- int size_in_bytes = ds_get_bts_size(ds);
-
- if (size_in_bytes <= 0)
- return size_in_bytes;
+ struct ds_context *context;
+ int error;
- return size_in_bytes / ds_cfg.sizeof_bts;
-}
+ if (!record)
+ return -EINVAL;
-int ds_get_bts_index(void *ds)
-{
- int index_offset_in_bytes;
+ error = -EPERM;
+ context = ds_get_context(task);
+ if (!context)
+ goto out;
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
+ if (!force) {
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
+ }
- index_offset_in_bytes =
- get_bts_index(ds) -
- get_bts_buffer_base(ds);
+ error = 0;
+ while (size) {
+ unsigned long base, index, end, write_end, int_th;
+ unsigned long write_size, adj_write_size;
+
+ /*
+ * write as much as possible without producing an
+ * overflow interrupt.
+ *
+ * interrupt_threshold must either be
+ * - bigger than absolute_maximum or
+ * - point to a record between buffer_base and absolute_maximum
+ *
+ * index points to a valid record.
+ */
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ index = ds_get(context->ds, qual, ds_index);
+ end = ds_get(context->ds, qual, ds_absolute_maximum);
+ int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
+
+ write_end = min(end, int_th);
+
+ /* if we are already beyond the interrupt threshold,
+ * we fill the entire buffer */
+ if (write_end <= index)
+ write_end = end;
+
+ if (write_end <= index)
+ goto out;
+
+ write_size = min((unsigned long) size, write_end - index);
+ memcpy((void *)index, record, write_size);
+
+ record = (const char *)record + write_size;
+ size -= write_size;
+ error += write_size;
+
+ adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
+ adj_write_size *= ds_cfg.sizeof_rec[qual];
+
+ /* zero out trailing bytes */
+ memset((char *)index + write_size, 0,
+ adj_write_size - write_size);
+ index += adj_write_size;
+
+ if (index >= end)
+ index = base;
+ ds_set(context->ds, qual, ds_index, index);
+
+ if (index >= int_th)
+ ds_overflow(task, context, qual);
+ }
- return index_offset_in_bytes / ds_cfg.sizeof_bts;
+ out:
+ ds_put_context(context);
+ return error;
}
-int ds_set_overflow(void *ds, int method)
+int ds_write_bts(struct task_struct *task, const void *record, size_t size)
{
- switch (method) {
- case DS_O_SIGNAL:
- return -EOPNOTSUPP;
- case DS_O_WRAP:
- return 0;
- default:
- return -EINVAL;
- }
+ return ds_write(task, record, size, ds_bts, /* force = */ 0);
}
-int ds_get_overflow(void *ds)
+int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
{
- return DS_O_WRAP;
+ return ds_write(task, record, size, ds_pebs, /* force = */ 0);
}
-int ds_clear(void *ds)
+int ds_unchecked_write_bts(struct task_struct *task,
+ const void *record, size_t size)
{
- int bts_size = ds_get_bts_size(ds);
- unsigned long bts_base;
-
- if (bts_size <= 0)
- return bts_size;
-
- bts_base = get_bts_buffer_base(ds);
- memset((void *)bts_base, 0, bts_size);
-
- set_bts_index(ds, bts_base);
- return 0;
+ return ds_write(task, record, size, ds_bts, /* force = */ 1);
}
-int ds_read_bts(void *ds, int index, struct bts_struct *out)
+int ds_unchecked_write_pebs(struct task_struct *task,
+ const void *record, size_t size)
{
- void *bts;
+ return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+}
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
+static int ds_reset_or_clear(struct task_struct *task,
+ enum ds_qualifier qual, int clear)
+{
+ struct ds_context *context;
+ unsigned long base, end;
+ int error;
- if (index < 0)
- return -EINVAL;
+ context = ds_get_context(task);
+ error = ds_validate_access(context, qual);
+ if (error < 0)
+ goto out;
- if (index >= ds_get_bts_size(ds))
- return -EINVAL;
+ base = ds_get(context->ds, qual, ds_buffer_base);
+ end = ds_get(context->ds, qual, ds_absolute_maximum);
- bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts));
+ if (clear)
+ memset((void *)base, 0, end - base);
- memset(out, 0, sizeof(*out));
- if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
- out->qualifier = get_info_type(bts);
- out->variant.jiffies = get_info_data(bts);
- } else {
- out->qualifier = BTS_BRANCH;
- out->variant.lbr.from_ip = get_from_ip(bts);
- out->variant.lbr.to_ip = get_to_ip(bts);
- }
+ ds_set(context->ds, qual, ds_index, base);
- return sizeof(*out);;
+ error = 0;
+ out:
+ ds_put_context(context);
+ return error;
}
-int ds_write_bts(void *ds, const struct bts_struct *in)
+int ds_reset_bts(struct task_struct *task)
{
- unsigned long bts;
-
- if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
- return -EOPNOTSUPP;
-
- if (ds_get_bts_size(ds) <= 0)
- return -ENXIO;
+ return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
+}
- bts = get_bts_index(ds);
+int ds_reset_pebs(struct task_struct *task)
+{
+ return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+}
- memset((void *)bts, 0, ds_cfg.sizeof_bts);
- switch (in->qualifier) {
- case BTS_INVALID:
- break;
+int ds_clear_bts(struct task_struct *task)
+{
+ return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
+}
- case BTS_BRANCH:
- set_from_ip((void *)bts, in->variant.lbr.from_ip);
- set_to_ip((void *)bts, in->variant.lbr.to_ip);
- break;
+int ds_clear_pebs(struct task_struct *task)
+{
+ return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
+}
- case BTS_TASK_ARRIVES:
- case BTS_TASK_DEPARTS:
- set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS);
- set_info_type((void *)bts, in->qualifier);
- set_info_data((void *)bts, in->variant.jiffies);
- break;
+int ds_get_pebs_reset(struct task_struct *task, u64 *value)
+{
+ struct ds_context *context;
+ int error;
- default:
+ if (!value)
return -EINVAL;
- }
- bts = bts + ds_cfg.sizeof_bts;
- if (bts >= get_bts_absolute_maximum(ds))
- bts = get_bts_buffer_base(ds);
- set_bts_index(ds, bts);
+ context = ds_get_context(task);
+ error = ds_validate_access(context, ds_pebs);
+ if (error < 0)
+ goto out;
- return ds_cfg.sizeof_bts;
+ *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
+
+ error = 0;
+ out:
+ ds_put_context(context);
+ return error;
}
-unsigned long ds_debugctl_mask(void)
+int ds_set_pebs_reset(struct task_struct *task, u64 value)
{
- return ds_cfg.debugctl_mask;
-}
+ struct ds_context *context;
+ int error;
-#ifdef __i386__
-static const struct ds_configuration ds_cfg_netburst = {
- .sizeof_ds = 9 * 4,
- .bts_buffer_base = { 0, 4 },
- .bts_index = { 4, 4 },
- .bts_absolute_maximum = { 8, 4 },
- .bts_interrupt_threshold = { 12, 4 },
- .sizeof_bts = 3 * 4,
- .from_ip = { 0, 4 },
- .to_ip = { 4, 4 },
- .info_type = { 4, 1 },
- .info_data = { 8, 4 },
- .debugctl_mask = (1<<2)|(1<<3)
-};
+ context = ds_get_context(task);
+ error = ds_validate_access(context, ds_pebs);
+ if (error < 0)
+ goto out;
-static const struct ds_configuration ds_cfg_pentium_m = {
- .sizeof_ds = 9 * 4,
- .bts_buffer_base = { 0, 4 },
- .bts_index = { 4, 4 },
- .bts_absolute_maximum = { 8, 4 },
- .bts_interrupt_threshold = { 12, 4 },
- .sizeof_bts = 3 * 4,
- .from_ip = { 0, 4 },
- .to_ip = { 4, 4 },
- .info_type = { 4, 1 },
- .info_data = { 8, 4 },
- .debugctl_mask = (1<<6)|(1<<7)
+ *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+
+ error = 0;
+ out:
+ ds_put_context(context);
+ return error;
+}
+
+static const struct ds_configuration ds_cfg_var = {
+ .sizeof_ds = sizeof(long) * 12,
+ .sizeof_field = sizeof(long),
+ .sizeof_rec[ds_bts] = sizeof(long) * 3,
+ .sizeof_rec[ds_pebs] = sizeof(long) * 10
};
-#endif /* _i386_ */
-
-static const struct ds_configuration ds_cfg_core2 = {
- .sizeof_ds = 9 * 8,
- .bts_buffer_base = { 0, 8 },
- .bts_index = { 8, 8 },
- .bts_absolute_maximum = { 16, 8 },
- .bts_interrupt_threshold = { 24, 8 },
- .sizeof_bts = 3 * 8,
- .from_ip = { 0, 8 },
- .to_ip = { 8, 8 },
- .info_type = { 8, 1 },
- .info_data = { 16, 8 },
- .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+static const struct ds_configuration ds_cfg_64 = {
+ .sizeof_ds = 8 * 12,
+ .sizeof_field = 8,
+ .sizeof_rec[ds_bts] = 8 * 3,
+ .sizeof_rec[ds_pebs] = 8 * 10
};
static inline void
@@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
-#ifdef __i386__
case 0xD:
case 0xE: /* Pentium M */
- ds_configure(&ds_cfg_pentium_m);
+ ds_configure(&ds_cfg_var);
break;
-#endif /* _i386_ */
case 0xF: /* Core2 */
- ds_configure(&ds_cfg_core2);
+ case 0x1C: /* Atom */
+ ds_configure(&ds_cfg_64);
break;
default:
/* sorry, don't know about them */
@@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
break;
case 0xF:
switch (c->x86_model) {
-#ifdef __i386__
case 0x0:
case 0x1:
case 0x2: /* Netburst */
- ds_configure(&ds_cfg_netburst);
+ ds_configure(&ds_cfg_var);
break;
-#endif /* _i386_ */
default:
/* sorry, don't know about them */
break;
@@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
break;
}
}
+
+void ds_free(struct ds_context *context)
+{
+ /* This is called when the task owning the parameter context
+ * is dying. There should not be any user of that context left
+ * to disturb us, anymore. */
+ unsigned long leftovers = context->count;
+ while (leftovers--)
+ ds_put_context(context);
+}
+#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 9af89078f7bb..66e48aa2dd1b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1203,7 +1203,7 @@ static int __init parse_memmap_opt(char *p)
if (!p)
return -EINVAL;
- if (!strcmp(p, "exactmap")) {
+ if (!strncmp(p, "exactmap", 8)) {
#ifdef CONFIG_CRASH_DUMP
/*
* If we are doing a crash dump, we still need to know
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 59fd3b6b1303..73deaffadd03 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void)
/* Calculate the min / max delta */
hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
&hpet_clockevent);
- hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30,
- &hpet_clockevent);
+ /* 5 usec minimum reprogramming delta. */
+ hpet_clockevent.min_delta_ns = 5000;
/*
* Start hpet with the boot cpu mask and make it
@@ -270,15 +270,22 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode,
}
static int hpet_legacy_next_event(unsigned long delta,
- struct clock_event_device *evt)
+ struct clock_event_device *evt)
{
- unsigned long cnt;
+ u32 cnt;
cnt = hpet_readl(HPET_COUNTER);
- cnt += delta;
+ cnt += (u32) delta;
hpet_writel(cnt, HPET_T0_CMP);
- return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0;
+ /*
+ * We need to read back the CMP register to make sure that
+ * what we wrote hit the chip before we compare it to the
+ * counter.
+ */
+ WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt);
+
+ return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
}
/*
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 1c3a66a67f83..720d2607aacb 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "30BF")
}
},
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "Presario F700",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30D3")
+ }
+ },
{ }
};
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 50e5e4a31c85..191914302744 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/syscalls.h>
+#include <asm/syscalls.h>
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
static void set_bitmap(unsigned long *bitmap, unsigned int base,
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 3f7537b669d3..f1c688e46f35 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -20,6 +20,8 @@
#ifdef CONFIG_X86_32
#include <mach_apic.h>
+#include <mach_ipi.h>
+
/*
* the following functions deal with sending IPIs between CPUs.
*
@@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
}
/* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
static int convert_apicid_to_cpu(int apic_id)
{
int i;
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index f2d43bc75514..ff7d3b0124f1 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
if (PageHighMem(pg)) {
data = ioremap_cache(pa_data, sizeof(*data));
if (!data) {
+ kfree(node);
error = -ENXIO;
goto err_dir;
}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index f47f0eb886b8..8282a2139681 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -69,6 +69,9 @@ static int gdb_x86vector = -1;
*/
void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
{
+#ifndef CONFIG_X86_32
+ u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
gdb_regs[GDB_AX] = regs->ax;
gdb_regs[GDB_BX] = regs->bx;
gdb_regs[GDB_CX] = regs->cx;
@@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_SI] = regs->si;
gdb_regs[GDB_DI] = regs->di;
gdb_regs[GDB_BP] = regs->bp;
- gdb_regs[GDB_PS] = regs->flags;
gdb_regs[GDB_PC] = regs->ip;
#ifdef CONFIG_X86_32
+ gdb_regs[GDB_PS] = regs->flags;
gdb_regs[GDB_DS] = regs->ds;
gdb_regs[GDB_ES] = regs->es;
gdb_regs[GDB_CS] = regs->cs;
@@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_R13] = regs->r13;
gdb_regs[GDB_R14] = regs->r14;
gdb_regs[GDB_R15] = regs->r15;
+ gdb_regs32[GDB_PS] = regs->flags;
+ gdb_regs32[GDB_CS] = regs->cs;
+ gdb_regs32[GDB_SS] = regs->ss;
#endif
gdb_regs[GDB_SP] = regs->sp;
}
@@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
*/
void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
{
+#ifndef CONFIG_X86_32
+ u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
gdb_regs[GDB_AX] = 0;
gdb_regs[GDB_BX] = 0;
gdb_regs[GDB_CX] = 0;
@@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
#else
- gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
- gdb_regs[GDB_PC] = 0;
+ gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
+ gdb_regs32[GDB_CS] = __KERNEL_CS;
+ gdb_regs32[GDB_SS] = __KERNEL_DS;
+ gdb_regs[GDB_PC] = p->thread.ip;
gdb_regs[GDB_R8] = 0;
gdb_regs[GDB_R9] = 0;
gdb_regs[GDB_R10] = 0;
@@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
*/
void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
{
+#ifndef CONFIG_X86_32
+ u32 *gdb_regs32 = (u32 *)gdb_regs;
+#endif
regs->ax = gdb_regs[GDB_AX];
regs->bx = gdb_regs[GDB_BX];
regs->cx = gdb_regs[GDB_CX];
@@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
regs->si = gdb_regs[GDB_SI];
regs->di = gdb_regs[GDB_DI];
regs->bp = gdb_regs[GDB_BP];
- regs->flags = gdb_regs[GDB_PS];
regs->ip = gdb_regs[GDB_PC];
#ifdef CONFIG_X86_32
+ regs->flags = gdb_regs[GDB_PS];
regs->ds = gdb_regs[GDB_DS];
regs->es = gdb_regs[GDB_ES];
regs->cs = gdb_regs[GDB_CS];
@@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
regs->r13 = gdb_regs[GDB_R13];
regs->r14 = gdb_regs[GDB_R14];
regs->r15 = gdb_regs[GDB_R15];
+ regs->flags = gdb_regs32[GDB_PS];
+ regs->cs = gdb_regs32[GDB_CS];
+ regs->ss = gdb_regs32[GDB_SS];
#endif
}
@@ -378,10 +395,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
if (remcomInBuffer[0] == 's') {
linux_regs->flags |= X86_EFLAGS_TF;
kgdb_single_step = 1;
- if (kgdb_contthread) {
- atomic_set(&kgdb_cpu_doing_single_step,
- raw_smp_processor_id());
- }
+ atomic_set(&kgdb_cpu_doing_single_step,
+ raw_smp_processor_id());
}
get_debugreg(dr6, 6);
@@ -466,9 +481,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
case DIE_DEBUG:
if (atomic_read(&kgdb_cpu_doing_single_step) ==
- raw_smp_processor_id() &&
- user_mode(regs))
- return single_step_cont(regs, args);
+ raw_smp_processor_id()) {
+ if (user_mode(regs))
+ return single_step_cont(regs, args);
+ break;
+ } else if (test_thread_flag(TIF_SINGLESTEP))
+ /* This means a user thread is single stepping
+ * a system call which should be ignored
+ */
+ return NOTIFY_DONE;
/* fall through */
default:
if (user_mode(regs))
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f06f4f..0ed5f939b905 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -18,6 +18,7 @@
#include <asm/ldt.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
+#include <asm/syscalls.h>
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7fc4d5b0a6a0..876e91890777 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -246,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
return 1;
}
+static cpumask_t c1e_mask = CPU_MASK_NONE;
+static int c1e_detected;
+
+void c1e_remove_cpu(int cpu)
+{
+ cpu_clear(cpu, c1e_mask);
+}
+
/*
* C1E aware idle routine. We check for C1E active in the interrupt
* pending message MSR. If we detect C1E, then we handle it the same
@@ -253,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
*/
static void c1e_idle(void)
{
- static cpumask_t c1e_mask = CPU_MASK_NONE;
- static int c1e_detected;
-
if (need_resched())
return;
@@ -265,8 +270,10 @@ static void c1e_idle(void)
rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
if (lo & K8_INTP_C1E_ACTIVE_MASK) {
c1e_detected = 1;
- mark_tsc_unstable("TSC halt in C1E");
- printk(KERN_INFO "System has C1E enabled\n");
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ mark_tsc_unstable("TSC halt in AMD C1E");
+ printk(KERN_INFO "System has AMD C1E enabled\n");
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
}
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3b7a1ddcc0bc..c8609dea443f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,6 +55,9 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
#include <asm/kdebug.h>
+#include <asm/idle.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -88,6 +91,7 @@ static void cpu_exit_clear(void)
cpu_clear(cpu, cpu_callin_map);
numa_remove_cpu(cpu);
+ c1e_remove_cpu(cpu);
}
/* We don't actually take CPU down, just spin without interrupts. */
@@ -275,6 +279,14 @@ void exit_thread(void)
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
+#ifdef CONFIG_X86_DS
+ /* Free any DS contexts that have not been properly released. */
+ if (unlikely(current->thread.ds_ctx)) {
+ /* we clear debugctl to make sure DS is not used. */
+ update_debugctlmsr(0);
+ ds_free(current->thread.ds_ctx);
+ }
+#endif /* CONFIG_X86_DS */
}
void flush_thread(void)
@@ -436,6 +448,35 @@ int set_tsc_mode(unsigned int val)
return 0;
}
+#ifdef CONFIG_X86_DS
+static int update_debugctl(struct thread_struct *prev,
+ struct thread_struct *next, unsigned long debugctl)
+{
+ unsigned long ds_prev = 0;
+ unsigned long ds_next = 0;
+
+ if (prev->ds_ctx)
+ ds_prev = (unsigned long)prev->ds_ctx->ds;
+ if (next->ds_ctx)
+ ds_next = (unsigned long)next->ds_ctx->ds;
+
+ if (ds_next != ds_prev) {
+ /* we clear debugctl to make sure DS
+ * is not in use when we change it */
+ debugctl = 0;
+ update_debugctlmsr(0);
+ wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
+ }
+ return debugctl;
+}
+#else
+static int update_debugctl(struct thread_struct *prev,
+ struct thread_struct *next, unsigned long debugctl)
+{
+ return debugctl;
+}
+#endif /* CONFIG_X86_DS */
+
static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
@@ -446,14 +487,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
prev = &prev_p->thread;
next = &next_p->thread;
- debugctl = prev->debugctlmsr;
- if (next->ds_area_msr != prev->ds_area_msr) {
- /* we clear debugctl to make sure DS
- * is not in use when we change it */
- debugctl = 0;
- update_debugctlmsr(0);
- wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
- }
+ debugctl = update_debugctl(prev, next, prev->debugctlmsr);
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
@@ -477,13 +511,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
hard_enable_TSC();
}
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71553b664e2a..79e3e173ab40 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,7 @@
#include <asm/proto.h>
#include <asm/ia32.h>
#include <asm/idle.h>
+#include <asm/syscalls.h>
asmlinkage extern void ret_from_fork(void);
@@ -93,6 +94,8 @@ DECLARE_PER_CPU(int, cpu_state);
static inline void play_dead(void)
{
idle_task_exit();
+ c1e_remove_cpu(raw_smp_processor_id());
+
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
@@ -238,6 +241,14 @@ void exit_thread(void)
t->io_bitmap_max = 0;
put_cpu();
}
+#ifdef CONFIG_X86_DS
+ /* Free any DS contexts that have not been properly released. */
+ if (unlikely(t->ds_ctx)) {
+ /* we clear debugctl to make sure DS is not used. */
+ update_debugctlmsr(0);
+ ds_free(t->ds_ctx);
+ }
+#endif /* CONFIG_X86_DS */
}
void flush_thread(void)
@@ -471,13 +482,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
next = &next_p->thread;
debugctl = prev->debugctlmsr;
- if (next->ds_area_msr != prev->ds_area_msr) {
- /* we clear debugctl to make sure DS
- * is not in use when we change it */
- debugctl = 0;
- update_debugctlmsr(0);
- wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+
+#ifdef CONFIG_X86_DS
+ {
+ unsigned long ds_prev = 0, ds_next = 0;
+
+ if (prev->ds_ctx)
+ ds_prev = (unsigned long)prev->ds_ctx->ds;
+ if (next->ds_ctx)
+ ds_next = (unsigned long)next->ds_ctx->ds;
+
+ if (ds_next != ds_prev) {
+ /*
+ * We clear debugctl to make sure DS
+ * is not in use when we change it:
+ */
+ debugctl = 0;
+ update_debugctlmsr(0);
+ wrmsrl(MSR_IA32_DS_AREA, ds_next);
+ }
}
+#endif /* CONFIG_X86_DS */
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
@@ -515,13 +540,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
}
/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 19a7d2c40560..e375b658efc3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -70,7 +70,7 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
regno >>= 2;
@@ -555,45 +555,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
return 0;
}
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * The configuration for a particular BTS hardware implementation.
+ */
+struct bts_configuration {
+ /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
+ unsigned char sizeof_bts;
+ /* the size of a field in the BTS record in bytes */
+ unsigned char sizeof_field;
+ /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
+ unsigned long debugctl_mask;
+};
+static struct bts_configuration bts_cfg;
+
+#define BTS_MAX_RECORD_SIZE (8 * 3)
+
+
+/*
+ * Branch Trace Store (BTS) uses the following format. Different
+ * architectures vary in the size of those fields.
+ * - source linear address
+ * - destination linear address
+ * - flags
+ *
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
+ *
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ *
+ * In order to store additional information in the BTS buffer, we use
+ * a special source address to indicate that the record requires
+ * special interpretation.
+ *
+ * Netburst indicated via a bit in the flags field whether the branch
+ * was predicted; this is ignored.
+ */
+
+enum bts_field {
+ bts_from = 0,
+ bts_to,
+ bts_flags,
+
+ bts_escape = (unsigned long)-1,
+ bts_qual = bts_to,
+ bts_jiffies = bts_flags
+};
-static int ptrace_bts_get_size(struct task_struct *child)
+static inline unsigned long bts_get(const char *base, enum bts_field field)
{
- if (!child->thread.ds_area_msr)
- return -ENXIO;
+ base += (bts_cfg.sizeof_field * field);
+ return *(unsigned long *)base;
+}
+
+static inline void bts_set(char *base, enum bts_field field, unsigned long val)
+{
+ base += (bts_cfg.sizeof_field * field);;
+ (*(unsigned long *)base) = val;
+}
- return ds_get_bts_index((void *)child->thread.ds_area_msr);
+/*
+ * Translate a BTS record from the raw format into the bts_struct format
+ *
+ * out (out): bts_struct interpretation
+ * raw: raw BTS record
+ */
+static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
+{
+ memset(out, 0, sizeof(*out));
+ if (bts_get(raw, bts_from) == bts_escape) {
+ out->qualifier = bts_get(raw, bts_qual);
+ out->variant.jiffies = bts_get(raw, bts_jiffies);
+ } else {
+ out->qualifier = BTS_BRANCH;
+ out->variant.lbr.from_ip = bts_get(raw, bts_from);
+ out->variant.lbr.to_ip = bts_get(raw, bts_to);
+ }
}
-static int ptrace_bts_read_record(struct task_struct *child,
- long index,
+static int ptrace_bts_read_record(struct task_struct *child, size_t index,
struct bts_struct __user *out)
{
struct bts_struct ret;
- int retval;
- int bts_end;
- int bts_index;
+ const void *bts_record;
+ size_t bts_index, bts_end;
+ int error;
- if (!child->thread.ds_area_msr)
- return -ENXIO;
+ error = ds_get_bts_end(child, &bts_end);
+ if (error < 0)
+ return error;
- if (index < 0)
- return -EINVAL;
-
- bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
if (bts_end <= index)
return -EINVAL;
+ error = ds_get_bts_index(child, &bts_index);
+ if (error < 0)
+ return error;
+
/* translate the ptrace bts index into the ds bts index */
- bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
- bts_index -= (index + 1);
- if (bts_index < 0)
- bts_index += bts_end;
+ bts_index += bts_end - (index + 1);
+ if (bts_end <= bts_index)
+ bts_index -= bts_end;
+
+ error = ds_access_bts(child, bts_index, &bts_record);
+ if (error < 0)
+ return error;
- retval = ds_read_bts((void *)child->thread.ds_area_msr,
- bts_index, &ret);
- if (retval < 0)
- return retval;
+ ptrace_bts_translate_record(&ret, bts_record);
if (copy_to_user(out, &ret, sizeof(ret)))
return -EFAULT;
@@ -601,101 +671,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
return sizeof(ret);
}
-static int ptrace_bts_clear(struct task_struct *child)
-{
- if (!child->thread.ds_area_msr)
- return -ENXIO;
-
- return ds_clear((void *)child->thread.ds_area_msr);
-}
-
static int ptrace_bts_drain(struct task_struct *child,
long size,
struct bts_struct __user *out)
{
- int end, i;
- void *ds = (void *)child->thread.ds_area_msr;
-
- if (!ds)
- return -ENXIO;
+ struct bts_struct ret;
+ const unsigned char *raw;
+ size_t end, i;
+ int error;
- end = ds_get_bts_index(ds);
- if (end <= 0)
- return end;
+ error = ds_get_bts_index(child, &end);
+ if (error < 0)
+ return error;
if (size < (end * sizeof(struct bts_struct)))
return -EIO;
- for (i = 0; i < end; i++, out++) {
- struct bts_struct ret;
- int retval;
+ error = ds_access_bts(child, 0, (const void **)&raw);
+ if (error < 0)
+ return error;
- retval = ds_read_bts(ds, i, &ret);
- if (retval < 0)
- return retval;
+ for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
+ ptrace_bts_translate_record(&ret, raw);
if (copy_to_user(out, &ret, sizeof(ret)))
return -EFAULT;
}
- ds_clear(ds);
+ error = ds_clear_bts(child);
+ if (error < 0)
+ return error;
return end;
}
+static void ptrace_bts_ovfl(struct task_struct *child)
+{
+ send_sig(child->thread.bts_ovfl_signal, child, 0);
+}
+
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
{
struct ptrace_bts_config cfg;
- int bts_size, ret = 0;
- void *ds;
+ int error = 0;
+
+ error = -EOPNOTSUPP;
+ if (!bts_cfg.sizeof_bts)
+ goto errout;
+ error = -EIO;
if (cfg_size < sizeof(cfg))
- return -EIO;
+ goto errout;
+ error = -EFAULT;
if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
- return -EFAULT;
+ goto errout;
- if ((int)cfg.size < 0)
- return -EINVAL;
+ error = -EINVAL;
+ if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
+ !(cfg.flags & PTRACE_BTS_O_ALLOC))
+ goto errout;
- bts_size = 0;
- ds = (void *)child->thread.ds_area_msr;
- if (ds) {
- bts_size = ds_get_bts_size(ds);
- if (bts_size < 0)
- return bts_size;
- }
- cfg.size = PAGE_ALIGN(cfg.size);
+ if (cfg.flags & PTRACE_BTS_O_ALLOC) {
+ ds_ovfl_callback_t ovfl = NULL;
+ unsigned int sig = 0;
+
+ /* we ignore the error in case we were not tracing child */
+ (void)ds_release_bts(child);
+
+ if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+ if (!cfg.signal)
+ goto errout;
+
+ sig = cfg.signal;
+ ovfl = ptrace_bts_ovfl;
+ }
- if (bts_size != cfg.size) {
- ret = ptrace_bts_realloc(child, cfg.size,
- cfg.flags & PTRACE_BTS_O_CUT_SIZE);
- if (ret < 0)
+ error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
+ if (error < 0)
goto errout;
- ds = (void *)child->thread.ds_area_msr;
+ child->thread.bts_ovfl_signal = sig;
}
- if (cfg.flags & PTRACE_BTS_O_SIGNAL)
- ret = ds_set_overflow(ds, DS_O_SIGNAL);
- else
- ret = ds_set_overflow(ds, DS_O_WRAP);
- if (ret < 0)
+ error = -EINVAL;
+ if (!child->thread.ds_ctx && cfg.flags)
goto errout;
if (cfg.flags & PTRACE_BTS_O_TRACE)
- child->thread.debugctlmsr |= ds_debugctl_mask();
+ child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
else
- child->thread.debugctlmsr &= ~ds_debugctl_mask();
+ child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
if (cfg.flags & PTRACE_BTS_O_SCHED)
set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
else
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
- ret = sizeof(cfg);
+ error = sizeof(cfg);
out:
if (child->thread.debugctlmsr)
@@ -703,10 +778,10 @@ out:
else
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
- return ret;
+ return error;
errout:
- child->thread.debugctlmsr &= ~ds_debugctl_mask();
+ child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
goto out;
}
@@ -715,29 +790,40 @@ static int ptrace_bts_status(struct task_struct *child,
long cfg_size,
struct ptrace_bts_config __user *ucfg)
{
- void *ds = (void *)child->thread.ds_area_msr;
struct ptrace_bts_config cfg;
+ size_t end;
+ const void *base, *max;
+ int error;
if (cfg_size < sizeof(cfg))
return -EIO;
- memset(&cfg, 0, sizeof(cfg));
+ error = ds_get_bts_end(child, &end);
+ if (error < 0)
+ return error;
- if (ds) {
- cfg.size = ds_get_bts_size(ds);
+ error = ds_access_bts(child, /* index = */ 0, &base);
+ if (error < 0)
+ return error;
- if (ds_get_overflow(ds) == DS_O_SIGNAL)
- cfg.flags |= PTRACE_BTS_O_SIGNAL;
+ error = ds_access_bts(child, /* index = */ end, &max);
+ if (error < 0)
+ return error;
- if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
- child->thread.debugctlmsr & ds_debugctl_mask())
- cfg.flags |= PTRACE_BTS_O_TRACE;
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.size = (max - base);
+ cfg.signal = child->thread.bts_ovfl_signal;
+ cfg.bts_size = sizeof(struct bts_struct);
- if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
- cfg.flags |= PTRACE_BTS_O_SCHED;
- }
+ if (cfg.signal)
+ cfg.flags |= PTRACE_BTS_O_SIGNAL;
- cfg.bts_size = sizeof(struct bts_struct);
+ if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+ child->thread.debugctlmsr & bts_cfg.debugctl_mask)
+ cfg.flags |= PTRACE_BTS_O_TRACE;
+
+ if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+ cfg.flags |= PTRACE_BTS_O_SCHED;
if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
return -EFAULT;
@@ -745,89 +831,38 @@ static int ptrace_bts_status(struct task_struct *child,
return sizeof(cfg);
}
-
static int ptrace_bts_write_record(struct task_struct *child,
const struct bts_struct *in)
{
- int retval;
+ unsigned char bts_record[BTS_MAX_RECORD_SIZE];
- if (!child->thread.ds_area_msr)
- return -ENXIO;
+ BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
- retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
- if (retval)
- return retval;
+ memset(bts_record, 0, bts_cfg.sizeof_bts);
+ switch (in->qualifier) {
+ case BTS_INVALID:
+ break;
- return sizeof(*in);
-}
+ case BTS_BRANCH:
+ bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
+ bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
+ break;
-static int ptrace_bts_realloc(struct task_struct *child,
- int size, int reduce_size)
-{
- unsigned long rlim, vm;
- int ret, old_size;
+ case BTS_TASK_ARRIVES:
+ case BTS_TASK_DEPARTS:
+ bts_set(bts_record, bts_from, bts_escape);
+ bts_set(bts_record, bts_qual, in->qualifier);
+ bts_set(bts_record, bts_jiffies, in->variant.jiffies);
+ break;
- if (size < 0)
+ default:
return -EINVAL;
-
- old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
- if (old_size < 0)
- return old_size;
-
- ret = ds_free((void **)&child->thread.ds_area_msr);
- if (ret < 0)
- goto out;
-
- size >>= PAGE_SHIFT;
- old_size >>= PAGE_SHIFT;
-
- current->mm->total_vm -= old_size;
- current->mm->locked_vm -= old_size;
-
- if (size == 0)
- goto out;
-
- rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->total_vm + size;
- if (rlim < vm) {
- ret = -ENOMEM;
-
- if (!reduce_size)
- goto out;
-
- size = rlim - current->mm->total_vm;
- if (size <= 0)
- goto out;
}
- rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->locked_vm + size;
- if (rlim < vm) {
- ret = -ENOMEM;
-
- if (!reduce_size)
- goto out;
-
- size = rlim - current->mm->locked_vm;
- if (size <= 0)
- goto out;
- }
-
- ret = ds_allocate((void **)&child->thread.ds_area_msr,
- size << PAGE_SHIFT);
- if (ret < 0)
- goto out;
-
- current->mm->total_vm += size;
- current->mm->locked_vm += size;
-
-out:
- if (child->thread.ds_area_msr)
- set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
- else
- clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
- return ret;
+ /* The writing task will be the switched-to task on a context
+ * switch. It needs to write into the switched-from task's BTS
+ * buffer. */
+ return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
}
void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -840,7 +875,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
ptrace_bts_write_record(tsk, &rec);
}
-#endif /* X86_BTS */
+
+static const struct bts_configuration bts_cfg_netburst = {
+ .sizeof_bts = sizeof(long) * 3,
+ .sizeof_field = sizeof(long),
+ .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
+};
+
+static const struct bts_configuration bts_cfg_pentium_m = {
+ .sizeof_bts = sizeof(long) * 3,
+ .sizeof_field = sizeof(long),
+ .debugctl_mask = (1<<6)|(1<<7)
+};
+
+static const struct bts_configuration bts_cfg_core2 = {
+ .sizeof_bts = 8 * 3,
+ .sizeof_field = 8,
+ .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+};
+
+static inline void bts_configure(const struct bts_configuration *cfg)
+{
+ bts_cfg = *cfg;
+}
+
+void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
+{
+ switch (c->x86) {
+ case 0x6:
+ switch (c->x86_model) {
+ case 0xD:
+ case 0xE: /* Pentium M */
+ bts_configure(&bts_cfg_pentium_m);
+ break;
+ case 0xF: /* Core2 */
+ case 0x1C: /* Atom */
+ bts_configure(&bts_cfg_core2);
+ break;
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+ break;
+ case 0xF:
+ switch (c->x86_model) {
+ case 0x0:
+ case 0x1:
+ case 0x2: /* Netburst */
+ bts_configure(&bts_cfg_netburst);
+ break;
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+ break;
+ default:
+ /* sorry, don't know about them */
+ break;
+ }
+}
+#endif /* CONFIG_X86_PTRACE_BTS */
/*
* Called by kernel/ptrace.c when detaching..
@@ -853,15 +947,15 @@ void ptrace_disable(struct task_struct *child)
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
- if (child->thread.ds_area_msr) {
-#ifdef X86_BTS
- ptrace_bts_realloc(child, 0, 0);
-#endif
- child->thread.debugctlmsr &= ~ds_debugctl_mask();
- if (!child->thread.debugctlmsr)
- clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
- clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
- }
+#ifdef CONFIG_X86_PTRACE_BTS
+ (void)ds_release_bts(child);
+
+ child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+ clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+#endif /* CONFIG_X86_PTRACE_BTS */
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -981,7 +1075,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
/*
* These bits need more cooking - not enabled yet:
*/
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
case PTRACE_BTS_CONFIG:
ret = ptrace_bts_config
(child, data, (struct ptrace_bts_config __user *)addr);
@@ -993,7 +1087,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_SIZE:
- ret = ptrace_bts_get_size(child);
+ ret = ds_get_bts_index(child, /* pos = */ NULL);
break;
case PTRACE_BTS_GET:
@@ -1002,14 +1096,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_CLEAR:
- ret = ptrace_bts_clear(child);
+ ret = ds_clear_bts(child);
break;
case PTRACE_BTS_DRAIN:
ret = ptrace_bts_drain
(child, data, (struct bts_struct __user *) addr);
break;
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
default:
ret = ptrace_request(child, request, addr, data);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 362d4e7f2d38..9838f2539dfc 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -670,6 +670,10 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
+#ifdef CONFIG_X86_64
+ check_efer();
+#endif
+
#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
/*
* Must be before kernel pagetables are setup
@@ -738,7 +742,6 @@ void __init setup_arch(char **cmdline_p)
#else
num_physpages = max_pfn;
- check_efer();
/* How many end-of-memory variables you have, grandma! */
/* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 4445d26efd47..2a2435d3037d 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -27,6 +27,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/vdso.h>
+#include <asm/syscalls.h>
#include "sigframe.h"
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 1e1933892b4f..4d32487805ef 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -28,6 +28,7 @@
#include <asm/ia32_unistd.h>
#include <asm/mce.h>
#include <asm/syscall.h>
+#include <asm/syscalls.h>
#include "sigframe.h"
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b3f916..a66c93550a0d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
#else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
@@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid;
static cpumask_t cpu_sibling_setup_map;
/* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7066cb855a60..1884a8d12bfa 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -22,6 +22,8 @@
#include <linux/uaccess.h>
#include <linux/unistd.h>
+#include <asm/syscalls.h>
+
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 3b360ef33817..c9288c883e20 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -16,6 +16,7 @@
#include <asm/uaccess.h>
#include <asm/ia32.h>
+#include <asm/syscalls.h>
asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long off)
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c17487..3d1be4f0fac5 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
#define __NO_STUBS
#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
#include <asm/unistd_64.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
typedef void (*sys_call_ptr_t)(void);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index ffe3c664afc0..bbecf8b6bf96 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -36,6 +36,7 @@
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
#include <asm/time.h>
+#include <asm/timer.h>
#include "do_timer.h"
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index ab6bf375a307..6bb7b8579e70 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -10,6 +10,7 @@
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/proto.h>
+#include <asm/syscalls.h>
#include "tls.h"
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 38f566fa27d2..4eeb5cf9720d 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -46,6 +46,7 @@
#include <asm/io.h>
#include <asm/tlbflush.h>
#include <asm/irq.h>
+#include <asm/syscalls.h>
/*
* Known problems:
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db54..edfb09f30479 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
{
u32 *ldt_entry = (u32 *)desc;
- vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
+ vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
}
static void vmi_load_sp0(struct tss_struct *tss,
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 0c029e8959c7..7766d36983fc 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -61,7 +61,7 @@ static void vsmp_irq_enable(void)
native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
}
-static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
switch (type) {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0bfe2bd305eb..3da2508eb22a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -711,6 +711,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
u64 *spte;
int young = 0;
+ /* always return old for EPT */
+ if (!shadow_accessed_mask)
+ return 0;
+
spte = rmap_next(kvm, rmapp, NULL);
while (spte) {
int _young;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e2ee264740c7..8233b86c778c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -62,6 +62,7 @@ static int npt = 1;
module_param(npt, int, S_IRUGO);
static void kvm_reput_irq(struct vcpu_svm *svm);
+static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
@@ -878,6 +879,10 @@ set:
static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
+ unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
+
+ if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
+ force_new_asid(vcpu);
vcpu->arch.cr4 = cr4;
if (!npt_enabled)
@@ -1027,6 +1032,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
(u32)fault_address, (u32)(fault_address >> 32),
handler);
+ /*
+ * FIXME: Tis shouldn't be necessary here, but there is a flush
+ * missing in the MMU code. Until we find this bug, flush the
+ * complete TLB here on an NPF
+ */
+ if (npt_enabled)
+ svm_flush_tlb(&svm->vcpu);
if (event_injection)
kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2a69773e3b26..7041cc52b562 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3301,8 +3301,7 @@ static int __init vmx_init(void)
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
- kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
- VMX_EPT_FAKE_DIRTY_MASK, 0ull,
+ kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK);
kvm_enable_tdp();
} else
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 425a13436b3f..23e8373507ad 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -370,8 +370,6 @@ enum vmcs_field {
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
-#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
-#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3d317836be9e..3f2cf11f201a 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,13 +10,15 @@
#include <asm/e820.h>
#include <asm/setup.h>
+#include <mach_ipi.h>
+
#ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1)
#else
#define DEFAULT_SEND_IPI (0)
#endif
-int no_broadcast=DEFAULT_SEND_IPI;
+int no_broadcast = DEFAULT_SEND_IPI;
/**
* pre_intr_init_hook - initialisation prior to setting up interrupt vectors
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe67b42..8f92cac4e6db 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm-generic/sections.h>
+#include <asm/traps.h>
/*
* Page fault error code bits
@@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
return 0;
}
-void do_invalid_op(struct pt_regs *, unsigned long);
-
static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_F00F_BUG
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d37f29376b0c..6b9a9358b330 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -47,6 +47,7 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
#include <asm/cacheflush.h>
+#include <asm/smp.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
@@ -458,11 +459,7 @@ static void __init pagetable_init(void)
{
pgd_t *pgd_base = swapper_pg_dir;
- paravirt_pagetable_setup_start(pgd_base);
-
permanent_kmaps_init(pgd_base);
-
- paravirt_pagetable_setup_done(pgd_base);
}
#ifdef CONFIG_ACPI_SLEEP
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4b6e6a29ae3..cac6da54203b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -421,7 +421,7 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
return;
}
-int __initdata early_ioremap_debug;
+static int __initdata early_ioremap_debug;
static int __init early_ioremap_debug_setup(char *str)
{
@@ -547,7 +547,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
}
-int __initdata early_ioremap_nested;
+static int __initdata early_ioremap_nested;
static int __init check_early_ioremap_leak(void)
{
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 0227694f7dab..8a5f1614a3d5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -295,10 +295,12 @@ static void nmi_cpu_shutdown(void *dummy)
static void nmi_shutdown(void)
{
- struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
+ struct op_msrs *msrs;
+
nmi_enabled = 0;
on_each_cpu(nmi_cpu_shutdown, NULL, 1);
unregister_die_notifier(&profile_exceptions_nb);
+ msrs = &get_cpu_var(cpu_msrs);
model->shutdown(msrs);
free_msrs();
put_cpu_var(cpu_msrs);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3cbf08f..a4e201b47f64 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1324,7 +1324,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
.pte_val = xen_pte_val,
- .pte_flags = native_pte_val,
+ .pte_flags = native_pte_flags,
.pgd_val = xen_pgd_val,
.make_pte = xen_make_pte,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b6acc3a0af46..d67901083888 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -42,7 +42,7 @@ char * __init xen_memory_setup(void)
e820.nr_map = 0;
- e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM);
+ e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
/*
* Even though this is normal, usable memory under Xen, reserve