From 241771ef016b5c0c83cd7a4372a74321c973c1e6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Dec 2008 10:39:53 +0100 Subject: performance counters: x86 support Implement performance counters for x86 Intel CPUs. It's simplified right now: the PERFMON CPU feature is assumed, which is available in Core2 and later Intel CPUs. The design is flexible to be extended to more CPU types as well. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hardirq_32.h | 1 + arch/x86/include/asm/hw_irq.h | 2 ++ arch/x86/include/asm/intel_arch_perfmon.h | 34 +++++++++++++++++--------- arch/x86/include/asm/irq_vectors.h | 5 ++++ arch/x86/include/asm/mach-default/entry_arch.h | 5 ++++ arch/x86/include/asm/pda.h | 1 + arch/x86/include/asm/thread_info.h | 4 ++- arch/x86/include/asm/unistd_32.h | 1 + arch/x86/include/asm/unistd_64.h | 3 ++- 9 files changed, 42 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index 5ca135e72f2b..b3e475dc9338 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -9,6 +9,7 @@ typedef struct { unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int apic_perf_irqs; /* arch dependent */ unsigned int irq0_irqs; unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 8de644b6b959..aa93e53b85ee 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -30,6 +30,8 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void error_interrupt(void); +extern void perf_counter_interrupt(void); + extern void spurious_interrupt(void); extern void thermal_interrupt(void); extern void reschedule_interrupt(void); diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h index fa0fd068bc2e..71598a9eab61 100644 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ b/arch/x86/include/asm/intel_arch_perfmon.h @@ -1,22 +1,24 @@ #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H #define _ASM_X86_INTEL_ARCH_PERFMON_H -#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 -#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 -#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 -#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ - (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 union cpuid10_eax { struct { @@ -28,4 +30,12 @@ union cpuid10_eax { unsigned int full; }; +#ifdef CONFIG_PERF_COUNTERS +extern void init_hw_perf_counters(void); +extern void perf_counters_lapic_init(int nmi); +#else +static inline void init_hw_perf_counters(void) { } +static inline void perf_counters_lapic_init(int nmi) { } +#endif + #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f941..b8d277f1252f 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -86,6 +86,11 @@ */ #define LOCAL_TIMER_VECTOR 0xef +/* + * Performance monitoring interrupt vector: + */ +#define LOCAL_PERF_VECTOR 0xee + /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h index 6b1add8e31dd..ad31e5d90e90 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/mach-default/entry_arch.h @@ -25,10 +25,15 @@ BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) * a much simpler SMP time architecture: */ #ifdef CONFIG_X86_LOCAL_APIC + BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +#ifdef CONFIG_PERF_COUNTERS +BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) +#endif + #ifdef CONFIG_X86_MCE_P4THERMAL BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #endif diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2fbfff88df37..90a8d9d4206b 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -30,6 +30,7 @@ struct x8664_pda { short isidle; struct mm_struct *active_mm; unsigned apic_timer_irqs; + unsigned apic_perf_irqs; unsigned irq0_irqs; unsigned irq_resched_count; unsigned irq_call_count; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad2..810bf266d134 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -80,6 +80,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_PERF_COUNTERS 11 /* notify perf counter work */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -103,6 +104,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -135,7 +137,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index f2bba78430a4..7e47658b0a6f 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -338,6 +338,7 @@ #define __NR_dup3 330 #define __NR_pipe2 331 #define __NR_inotify_init1 332 +#define __NR_perf_counter_open 333 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index d2e415e6666f..53025feaf88d 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -653,7 +653,8 @@ __SYSCALL(__NR_dup3, sys_dup3) __SYSCALL(__NR_pipe2, sys_pipe2) #define __NR_inotify_init1 294 __SYSCALL(__NR_inotify_init1, sys_inotify_init1) - +#define __NR_perf_counter_open 295 +__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR -- cgit v1.2.3 From 9b194e831fb2c322ed81a373e49620f34edc2778 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 20:22:35 +0100 Subject: x86: implement atomic64_t on 32-bit Impact: new API Implement the atomic64_t APIs on 32-bit as well. Will be used by the performance counters code. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic_32.h | 218 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index ad5b9f6ecddf..9927e01b03c2 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -255,5 +255,223 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +/* An 64bit atomic type */ + +typedef struct { + unsigned long long counter; +} atomic64_t; + +#define ATOMIC64_INIT(val) { (val) } + +/** + * atomic64_read - read atomic64 variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +#define __atomic64_read(ptr) ((ptr)->counter) + +static inline unsigned long long +cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new) +{ + asm volatile( + + LOCK_PREFIX "cmpxchg8b (%[ptr])\n" + + : "=A" (old) + + : [ptr] "D" (ptr), + "A" (old), + "b" (ll_low(new)), + "c" (ll_high(new)) + + : "memory"); + + return old; +} + +static inline unsigned long long +atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, + unsigned long long new_val) +{ + return cmpxchg8b(&ptr->counter, old_val, new_val); +} + +/** + * atomic64_set - set atomic64 variable + * @ptr: pointer to type atomic64_t + * @new_val: value to assign + * + * Atomically sets the value of @ptr to @new_val. + */ +static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) +{ + unsigned long long old_val; + + do { + old_val = atomic_read(ptr); + } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); +} + +/** + * atomic64_read - read atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically reads the value of @ptr and returns it. + */ +static inline unsigned long long atomic64_read(atomic64_t *ptr) +{ + unsigned long long curr_val; + + do { + curr_val = __atomic64_read(ptr); + } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val); + + return curr_val; +} + +/** + * atomic64_add_return - add and return + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr and returns @delta + *@ptr + */ +static inline unsigned long long +atomic64_add_return(unsigned long long delta, atomic64_t *ptr) +{ + unsigned long long old_val, new_val; + + do { + old_val = atomic_read(ptr); + new_val = old_val + delta; + + } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); + + return new_val; +} + +static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr) +{ + return atomic64_add_return(-delta, ptr); +} + +static inline long atomic64_inc_return(atomic64_t *ptr) +{ + return atomic64_add_return(1, ptr); +} + +static inline long atomic64_dec_return(atomic64_t *ptr) +{ + return atomic64_sub_return(1, ptr); +} + +/** + * atomic64_add - add integer to atomic64 variable + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr. + */ +static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) +{ + atomic64_add_return(delta, ptr); +} + +/** + * atomic64_sub - subtract the atomic64 variable + * @delta: integer value to subtract + * @ptr: pointer to type atomic64_t + * + * Atomically subtracts @delta from @ptr. + */ +static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) +{ + atomic64_add(-delta, ptr); +} + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @delta: integer value to subtract + * @ptr: pointer to type atomic64_t + * + * Atomically subtracts @delta from @ptr and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int +atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) +{ + unsigned long long old_val = atomic64_sub_return(delta, ptr); + + return old_val == 0; +} + +/** + * atomic64_inc - increment atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically increments @ptr by 1. + */ +static inline void atomic64_inc(atomic64_t *ptr) +{ + atomic64_add(1, ptr); +} + +/** + * atomic64_dec - decrement atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically decrements @ptr by 1. + */ +static inline void atomic64_dec(atomic64_t *ptr) +{ + atomic64_sub(1, ptr); +} + +/** + * atomic64_dec_and_test - decrement and test + * @ptr: pointer to type atomic64_t + * + * Atomically decrements @ptr by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic64_dec_and_test(atomic64_t *ptr) +{ + return atomic64_sub_and_test(1, ptr); +} + +/** + * atomic64_inc_and_test - increment and test + * @ptr: pointer to type atomic64_t + * + * Atomically increments @ptr by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_inc_and_test(atomic64_t *ptr) +{ + return atomic64_sub_and_test(-1, ptr); +} + +/** + * atomic64_add_negative - add and test if negative + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int +atomic64_add_negative(unsigned long long delta, atomic64_t *ptr) +{ + long long old_val = atomic64_add_return(delta, ptr); + + return old_val < 0; +} + #include #endif /* _ASM_X86_ATOMIC_32_H */ -- cgit v1.2.3 From 5c167b8585c8d91206b395d57011ead7711e322f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 09:02:19 +0100 Subject: x86, perfcounters: rename intel_arch_perfmon.h => perf_counter.h Impact: rename include file We'll be providing an asm/perf_counter.h to the generic perfcounter code, so use the already existing x86 file for this purpose and rename it. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel_arch_perfmon.h | 41 ------------------------------- arch/x86/include/asm/perf_counter.h | 41 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 41 deletions(-) delete mode 100644 arch/x86/include/asm/intel_arch_perfmon.h create mode 100644 arch/x86/include/asm/perf_counter.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h deleted file mode 100644 index 71598a9eab61..000000000000 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H -#define _ASM_X86_INTEL_ARCH_PERFMON_H - -#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 -#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 - -#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 -#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 - -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) - -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ - (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) - -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 - -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -extern void perf_counters_lapic_init(int nmi); -#else -static inline void init_hw_perf_counters(void) { } -static inline void perf_counters_lapic_init(int nmi) { } -#endif - -#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h new file mode 100644 index 000000000000..9dadce1124ee --- /dev/null +++ b/arch/x86/include/asm/perf_counter.h @@ -0,0 +1,41 @@ +#ifndef _ASM_X86_PERF_COUNTER_H +#define _ASM_X86_PERF_COUNTER_H + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +#ifdef CONFIG_PERF_COUNTERS +extern void init_hw_perf_counters(void); +extern void perf_counters_lapic_init(int nmi); +#else +static inline void init_hw_perf_counters(void) { } +static inline void perf_counters_lapic_init(int nmi) { } +#endif + +#endif /* _ASM_X86_PERF_COUNTER_H */ -- cgit v1.2.3 From eb2b861810d4ff72454c83996b891df4e0aaff9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 09:09:13 +0100 Subject: x86, perfcounters: prepare for fixed-mode PMCs Impact: refactor the x86 code for fixed-mode PMCs Extend the data structures and rename the existing facilities to allow for a 'generic' versus 'fixed' counter distinction. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 9dadce1124ee..dd5a4a559e2d 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -1,6 +1,13 @@ #ifndef _ASM_X86_PERF_COUNTER_H #define _ASM_X86_PERF_COUNTER_H +/* + * Performance counter hw details: + */ + +#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_FIXED 3 + #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -20,6 +27,10 @@ #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ union cpuid10_eax { struct { unsigned int version_id:8; -- cgit v1.2.3 From 703e937c83bbad79075a7846e062e447c2fee6a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 10:51:15 +0100 Subject: perfcounters: add fixed-mode PMC enumeration Enumerate fixed-mode PMCs based on CPUID, and feed that into the perfcounter code. Does not use fixed-mode PMCs yet. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index dd5a4a559e2d..945a315e6d62 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -41,6 +41,29 @@ union cpuid10_eax { unsigned int full; }; +union cpuid10_edx { + struct { + unsigned int num_counters_fixed:4; + unsigned int reserved:28; + } split; + unsigned int full; +}; + + +/* + * Fixed-purpose performance counters: + */ + +/* Instr_Retired.Any: */ +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 + +/* CPU_CLK_Unhalted.Core: */ +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a + +/* CPU_CLK_Unhalted.Ref: */ +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b + + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); -- cgit v1.2.3 From 862a1a5f346fe7e9181ea51eaae48cf2cd70f746 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 13:09:20 +0100 Subject: x86, perfcounters: refactor code for fixed-function PMCs Impact: clean up Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 945a315e6d62..13745deb16c8 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -8,6 +8,10 @@ #define X86_PMC_MAX_GENERIC 8 #define X86_PMC_MAX_FIXED 3 +#define X86_PMC_IDX_GENERIC 0 +#define X86_PMC_IDX_FIXED 32 +#define X86_PMC_IDX_MAX 64 + #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -54,6 +58,15 @@ union cpuid10_edx { * Fixed-purpose performance counters: */ +/* + * All 3 fixed-mode PMCs are configured via this single MSR: + */ +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d + +/* + * The counts are available in three separate MSRs: + */ + /* Instr_Retired.Any: */ #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 @@ -63,7 +76,6 @@ union cpuid10_edx { /* CPU_CLK_Unhalted.Ref: */ #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); -- cgit v1.2.3 From 2f18d1e8d07ae67dd0afce875287756d4bd31a46 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 22 Dec 2008 11:10:42 +0100 Subject: x86, perfcounters: add support for fixed-function pmcs Impact: extend performance counter support on x86 Intel CPUs Modern Intel CPUs have 3 "fixed-function" performance counters, which count these hardware events: Instr_Retired.Any CPU_CLK_Unhalted.Core CPU_CLK_Unhalted.Ref Add support for them to the performance counters subsystem. Their use is transparent to user-space: the counter scheduler is extended to automatically recognize the cases where a fixed-function PMC can be utilized instead of a generic PMC. In such cases the generic PMC is kept available for more counters. The above fixed-function events map to these generic counter hw events: PERF_COUNT_INSTRUCTIONS PERF_COUNT_CPU_CYCLES PERF_COUNT_BUS_CYCLES (The 'bus' cycles are in reality often CPU-ish cycles, just with a fixed frequency.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 13745deb16c8..2e08ed736647 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -23,6 +23,11 @@ #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) +/* + * Includes eventsel and unit mask as well: + */ +#define ARCH_PERFMON_EVENT_MASK 0xffff + #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 @@ -69,12 +74,15 @@ union cpuid10_edx { /* Instr_Retired.Any: */ #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) /* CPU_CLK_Unhalted.Core: */ #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) /* CPU_CLK_Unhalted.Ref: */ #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); -- cgit v1.2.3 From 73ca2f8380311115723c7afe811f3ed1f0ba945e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 16 Feb 2009 01:08:17 +0100 Subject: perfcounters: remove duplicate definition of LOCAL_PERF_VECTOR Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index b66b518ff000..b07278c55e9e 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -111,11 +111,6 @@ */ #define LOCAL_PERF_VECTOR 0xee -/* - * Performance monitoring interrupt vector: - */ -#define LOCAL_PERF_VECTOR 0xee - /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority -- cgit v1.2.3 From 7fd7d83d49914f03aefffba6aee09032fcd54cce Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:24:03 -0800 Subject: x86/pvops: replace arch_enter_lazy_cpu_mode with arch_start_context_switch Impact: simplification, prepare for later changes Make lazy cpu mode more specific to context switching, so that it makes sense to do more context-switch specific things in the callbacks. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/include/asm/paravirt.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0617d5cc9712..7b28abac323f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1420,19 +1420,17 @@ void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); void paravirt_leave_lazy(enum paravirt_lazy_mode mode); -#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -static inline void arch_enter_lazy_cpu_mode(void) +#define __HAVE_ARCH_START_CONTEXT_SWITCH +static inline void arch_start_context_switch(void) { PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); } -static inline void arch_leave_lazy_cpu_mode(void) +static inline void arch_end_context_switch(void) { PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -void arch_flush_lazy_cpu_mode(void); - #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { -- cgit v1.2.3 From b407fc57b815b2016186220baabc76cc8264206e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:46:21 -0800 Subject: x86/paravirt: flush pending mmu updates on context switch Impact: allow preemption during lazy mmu updates If we're in lazy mmu mode when context switching, leave lazy mmu mode, but remember the task's state in TIF_LAZY_MMU_UPDATES. When we resume the task, check this flag and re-enter lazy mmu mode if its set. This sets things up for allowing lazy mmu mode while preemptible, though that won't actually be active until the next change. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/include/asm/paravirt.h | 1 - arch/x86/include/asm/thread_info.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7b28abac323f..58d2481b01a6 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1418,7 +1418,6 @@ void paravirt_enter_lazy_cpu(void); void paravirt_leave_lazy_cpu(void); void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); -void paravirt_leave_lazy(enum paravirt_lazy_mode mode); #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(void) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index df9d5f78385e..2f34d643b567 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -94,6 +94,7 @@ struct thread_info { #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -115,6 +116,7 @@ struct thread_info { #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ -- cgit v1.2.3 From 224101ed69d3fbb486868e0f6e0f9fa37302efb4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Feb 2009 11:18:57 -0800 Subject: x86/paravirt: finish change from lazy cpu to context switch start/end Impact: fix lazy context switch API Pass the previous and next tasks into the context switch start end calls, so that the called functions can properly access the task state (esp in end_context_switch, in which the next task is not yet completely current). Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra --- arch/x86/include/asm/paravirt.h | 17 ++++++++++------- arch/x86/include/asm/pgtable.h | 2 ++ 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 58d2481b01a6..dfdee0ca57d3 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -56,6 +56,7 @@ struct desc_ptr; struct tss_struct; struct mm_struct; struct desc_struct; +struct task_struct; /* * Wrapper type for pointers to code which uses the non-standard @@ -203,7 +204,8 @@ struct pv_cpu_ops { void (*swapgs)(void); - struct pv_lazy_ops lazy_mode; + void (*start_context_switch)(struct task_struct *prev); + void (*end_context_switch)(struct task_struct *next); }; struct pv_irq_ops { @@ -1414,20 +1416,21 @@ enum paravirt_lazy_mode { }; enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_enter_lazy_cpu(void); -void paravirt_leave_lazy_cpu(void); +void paravirt_start_context_switch(struct task_struct *prev); +void paravirt_end_context_switch(struct task_struct *next); + void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); #define __HAVE_ARCH_START_CONTEXT_SWITCH -static inline void arch_start_context_switch(void) +static inline void arch_start_context_switch(struct task_struct *prev) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); + PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); } -static inline void arch_end_context_switch(void) +static inline void arch_end_context_switch(struct task_struct *next) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); + PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d0812e155f1d..24e42836e921 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -83,6 +83,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) #define pte_val(x) native_pte_val(x) #define __pte(x) native_make_pte(x) +#define arch_end_context_switch(prev) do {} while(0) + #endif /* CONFIG_PARAVIRT */ /* -- cgit v1.2.3 From a2bcd4731f77cb77ae4b5e4a3d7f5471cf346c33 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 29 Mar 2009 23:47:48 +0200 Subject: x86/mm: further cleanups of fault.c's include file section Impact: cleanup Eliminate more than 20 unnecessary #include lines in fault.c Also fix include file dependency bug in asm/traps.h. (this was masked before, by implicit inclusion) Signed-off-by: Ingo Molnar LKML-Reference: Acked-by: H. Peter Anvin --- arch/x86/include/asm/traps.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0d5342515b86..37fb07a9cda0 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -2,6 +2,7 @@ #define _ASM_X86_TRAPS_H #include +#include /* TRAP_TRACE, ... */ #ifdef CONFIG_X86_32 #define dotraplinkage -- cgit v1.2.3 From 68509cdcde6583ee1a9542899d1270449c7d5903 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Mar 2009 03:59:04 -0700 Subject: x86-64: remove PGE from must-have feature list PGE may not be available when running paravirtualized, so test the cpuid bit before using it. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/required-features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index d5cd6c586881..a4737dddfd58 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -50,7 +50,7 @@ #ifdef CONFIG_X86_64 #define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) -#define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) +#define NEED_PGE 0 #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -- cgit v1.2.3 From 1e7449730853e7c9ae9a2458b2ced7ba12559a0e Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Mon, 9 Feb 2009 12:05:46 -0800 Subject: Xen: Add virt_to_pfn helper function Signed-off-by: Alex Nixon --- arch/x86/include/asm/xen/page.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 1a918dde46b5..018a0a400799 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) +#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) +#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) static inline unsigned long pte_mfn(pte_t pte) -- cgit v1.2.3 From 5f241e65f2be4661a33e1937e1c829252a80b2b8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 16 Mar 2009 17:08:48 -0700 Subject: x86-64: non-paravirt systems always has PSE and PGE A paravirtualized system may not have PSE or PGE available to guests, so they are not required features. However, without paravirt we can assume that any x86-64 implementation will have them available. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/required-features.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index a4737dddfd58..64cf2d24fad1 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -48,9 +48,15 @@ #endif #ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT +/* Paravirtualized systems may not have PSE or PGE available */ #define NEED_PSE 0 -#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_PGE 0 +#else +#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) +#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) +#endif +#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -- cgit v1.2.3 From b6c5a71da1477d261bc36254fe1f20d32b57598d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 16 Mar 2009 21:00:00 +1100 Subject: perf_counter: abstract wakeup flag setting in core to fix powerpc build Impact: build fix for powerpc Commit bd753921015e7905 ("perf_counter: software counter event infrastructure") introduced a use of TIF_PERF_COUNTERS into the core perfcounter code. This breaks the build on powerpc because we use a flag in a per-cpu area to signal wakeups on powerpc rather than a thread_info flag, because the thread_info flags have to be manipulated with atomic operations and are thus slower than per-cpu flags. This fixes the by changing the core to use an abstracted set_perf_counter_pending() function, which is defined on x86 to set the TIF_PERF_COUNTERS flag and on powerpc to set the per-cpu flag (paca->perf_counter_pending). It changes the previous powerpc definition of set_perf_counter_pending to not take an argument and adds a clear_perf_counter_pending, so as to simplify the definition on x86. On x86, set_perf_counter_pending() is defined as a macro. Defining it as a static inline in arch/x86/include/asm/perf_counters.h causes compile failures because gets included early in , and the definitions of set_tsk_thread_flag etc. are therefore not available in . (On powerpc this problem is avoided by defining set_perf_counter_pending etc. in .) Signed-off-by: Paul Mackerras --- arch/x86/include/asm/perf_counter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 2e08ed736647..1662043b340f 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,6 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +#define set_perf_counter_pending() \ + set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); -- cgit v1.2.3 From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:02 +0200 Subject: perf_counter: unify and fix delayed counter wakeup While going over the wakeup code I noticed delayed wakeups only work for hardware counters but basically all software counters rely on them. This patch unifies and generalizes the delayed wakeup to fix this issue. Since we're dealing with NMI context bits here, use a cmpxchg() based single link list implementation to track counters that have pending wakeups. [ This should really be generic code for delayed wakeups, but since we cannot use cmpxchg()/xchg() in generic code, I've let it live in the perf_counter code. -- Eric Dumazet could use it to aggregate the network wakeups. ] Furthermore, the x86 method of using TIF flags was flawed in that its quite possible to end up setting the bit on the idle task, loosing the wakeup. The powerpc method uses per-cpu storage and does appear to be sufficient. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.153932974@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 5 +++-- arch/x86/include/asm/thread_info.h | 4 +--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 1662043b340f..e2b0e66b2353 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,8 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -#define set_perf_counter_pending() \ - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); +#define set_perf_counter_pending() do { } while (0) +#define clear_perf_counter_pending() do { } while (0) +#define test_perf_counter_pending() (0) #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 3ffd5d2a3676..8820a73ae090 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,7 +83,6 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_PERF_COUNTERS 11 /* notify perf counter work */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -107,7 +106,6 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -141,7 +139,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ -- cgit v1.2.3 From b6276f353bf490add62dcf7db0ebd75baa3e1a37 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:03 +0200 Subject: perf_counter: x86: self-IPI for pending work Implement set_perf_counter_pending() with a self-IPI so that it will run ASAP in a usable context. For now use a second IRQ vector, because the primary vector pokes the apic in funny ways that seem to confuse things. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.724626696@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/entry_arch.h | 1 + arch/x86/include/asm/hardirq.h | 1 + arch/x86/include/asm/hw_irq.h | 1 + arch/x86/include/asm/irq_vectors.h | 5 +++++ arch/x86/include/asm/perf_counter.h | 3 ++- 5 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c2e6bedaf258..fe24d2802490 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -50,6 +50,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) #ifdef CONFIG_PERF_COUNTERS BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) +BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) #endif #ifdef CONFIG_X86_MCE_P4THERMAL diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 25454427ceea..f5ebe2aaca4b 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -14,6 +14,7 @@ typedef struct { #endif unsigned int generic_irqs; /* arch dependent */ unsigned int apic_perf_irqs; + unsigned int apic_pending_irqs; #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ae80f64973e0..7309c0ad6902 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -30,6 +30,7 @@ extern void apic_timer_interrupt(void); extern void generic_interrupt(void); extern void error_interrupt(void); extern void perf_counter_interrupt(void); +extern void perf_pending_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79bbb47c..545bb811ccb5 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -116,6 +116,11 @@ */ #define GENERIC_INTERRUPT_VECTOR 0xed +/* + * Performance monitoring pending work vector: + */ +#define LOCAL_PENDING_VECTOR 0xec + /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index e2b0e66b2353..d08dd52cb8ff 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,7 +84,8 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -#define set_perf_counter_pending() do { } while (0) +extern void set_perf_counter_pending(void); + #define clear_perf_counter_pending() do { } while (0) #define test_perf_counter_pending() (0) -- cgit v1.2.3 From 98c2aaf8be5baf7193be37fb28bce8e7327158bc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 7 Apr 2009 11:30:17 +0200 Subject: x86, perfcounters: add atomic64_xchg() Complete atomic64_t support on the 32-bit side by adding atomic64_xch(). Cc: Peter Zijlstra LKML-Reference: <20090406094518.445450972@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic_32.h | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 977250ed8b89..aff9f1fcdcd7 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -291,19 +291,37 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, } /** - * atomic64_set - set atomic64 variable + * atomic64_xchg - xchg atomic64 variable * @ptr: pointer to type atomic64_t * @new_val: value to assign + * @old_val: old value that was there * - * Atomically sets the value of @ptr to @new_val. + * Atomically xchgs the value of @ptr to @new_val and returns + * the old value. */ -static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) + +static inline unsigned long long +atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) { unsigned long long old_val; do { old_val = atomic_read(ptr); } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); + + return old_val; +} + +/** + * atomic64_set - set atomic64 variable + * @ptr: pointer to type atomic64_t + * @new_val: value to assign + * + * Atomically sets the value of @ptr to @new_val. + */ +static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) +{ + atomic64_xchg(ptr, new_val); } /** -- cgit v1.2.3 From e2b371f00a6f529f6362654239bdec8dcd510760 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:35 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping When a ptraced task is unlinked, we need to stop branch tracing for that task. Since the unlink is called with interrupts disabled, and we need interrupts enabled to stop branch tracing, we defer the work. Collect all branch tracing related stuff in a branch tracing context. Reviewed-by: Oleg Nesterov Signed-off-by: Markus Metzger Cc: Andrew Morton Cc: Peter Zijlstra Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144550.712401000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 34c52370f2fe..2483807e06e7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -458,10 +458,6 @@ struct thread_struct { /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ struct ds_context *ds_ctx; #endif /* CONFIG_X86_DS */ -#ifdef CONFIG_X86_PTRACE_BTS -/* the signal to send on a bts buffer overflow */ - unsigned int bts_ovfl_signal; -#endif /* CONFIG_X86_PTRACE_BTS */ }; static inline unsigned long native_get_debugreg(int regno) -- cgit v1.2.3 From 15879d042164650b93d83281ad5f87ad323bfbfe Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:38 +0200 Subject: x86, bts: use trace_clock_global() for timestamps Rename the bts_struct timestamp field to event. Use trace_clock_global() for time measurement. Reported-by: Ingo Molnar Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144553.773216000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a8f672ba100c..772f141afb9a 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -170,9 +170,9 @@ struct bts_struct { } lbr; /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ struct { - __u64 jiffies; + __u64 clock; pid_t pid; - } timestamp; + } event; } variant; }; -- cgit v1.2.3 From 35bb7600c17762bb129588c1877d2717fe325289 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:39 +0200 Subject: x86, debugctlmsr: add _on_cpu variants to debugctlmsr functions Add functions to get and set the debugctlmsr on different cpus. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144554.738772000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2483807e06e7..1efeb497f1f9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -785,6 +785,21 @@ static inline unsigned long get_debugctlmsr(void) return debugctlmsr; } +static inline unsigned long get_debugctlmsr_on_cpu(int cpu) +{ + u64 debugctlmsr = 0; + u32 val1, val2; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; +#endif + rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); + debugctlmsr = val1 | ((u64)val2 << 32); + + return debugctlmsr; +} + static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR @@ -794,6 +809,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +static inline void update_debugctlmsr_on_cpu(int cpu, + unsigned long debugctlmsr) +{ +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return; +#endif + wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, + (u32)((u64)debugctlmsr), + (u32)((u64)debugctlmsr >> 32)); +} + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: -- cgit v1.2.3 From de79f54f5347ad7ec6ff55ccbb6d4ab2a21f6a93 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:40 +0200 Subject: x86, bts, hw-branch-tracer: add _noirq variants to the debug store interface The hw-branch-tracer uses debug store functions from an on_each_cpu() context, which is simply wrong since the functions may sleep. Add _noirq variants for most functions, which may be called with interrupts disabled. Separate per-cpu and per-task tracing and allow per-cpu tracing to be controlled from any cpu. Make the hw-branch-tracer use the new debug store interface, synchronize with hotplug cpu event using get/put_online_cpus(), and remove the unnecessary spinlock. Make the ptrace bts and the ds selftest code use the new interface. Defer the ds selftest. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144555.658136000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 57 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 12 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 772f141afb9a..413e127e567d 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -15,8 +15,8 @@ * - buffer allocation (memory accounting) * * - * Copyright (C) 2007-2008 Intel Corporation. - * Markus Metzger , 2007-2008 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger , 2007-2009 */ #ifndef _ASM_X86_DS_H @@ -83,8 +83,10 @@ enum ds_feature { * The interrupt threshold is independent from the overflow callback * to allow users to use their own overflow interrupt handling mechanism. * - * task: the task to request recording for; - * NULL for per-cpu recording on the current cpu + * The function might sleep. + * + * task: the task to request recording for + * cpu: the cpu to request recording for * base: the base pointer for the (non-pageable) buffer; * size: the size of the provided buffer in bytes * ovfl: pointer to a function to be called on buffer overflow; @@ -93,19 +95,28 @@ enum ds_feature { * -1 if no interrupt threshold is requested. * flags: a bit-mask of the above flags */ -extern struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags); +extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); /* * Release BTS or PEBS resources * Suspend and resume BTS or PEBS tracing * + * Must be called with irq's enabled. + * * tracer: the tracer handle returned from ds_request_~() */ extern void ds_release_bts(struct bts_tracer *tracer); @@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); extern void ds_suspend_pebs(struct pebs_tracer *tracer); extern void ds_resume_pebs(struct pebs_tracer *tracer); +/* + * Release BTS or PEBS resources + * Suspend and resume BTS or PEBS tracing + * + * Cpu tracers must call this on the traced cpu. + * Task tracers must call ds_release_~_noirq() for themselves. + * + * May be called with irq's disabled. + * + * Returns 0 if successful; + * -EPERM if the cpu tracer does not trace the current cpu. + * -EPERM if the task tracer does not trace itself. + * + * tracer: the tracer handle returned from ds_request_~() + */ +extern int ds_release_bts_noirq(struct bts_tracer *tracer); +extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); +extern int ds_resume_bts_noirq(struct bts_tracer *tracer); +extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); +extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); +extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); + /* * The raw DS buffer state as it is used for BTS and PEBS recording. -- cgit v1.2.3 From 2311f0de21c17b2a8b960677a9cccfbfa52beb35 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:46 +0200 Subject: x86, ds: add leakage warning Add a warning in case a debug store context is not removed before the task it is attached to is freed. Remove the old warning at thread exit. It is too early. Declare the debug store context field in thread_struct unconditionally. Remove ds_copy_thread() and ds_exit_thread() and do the work directly in process*.c. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144601.254472000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 9 --------- arch/x86/include/asm/processor.h | 4 +--- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 413e127e567d..149e5208e967 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -285,21 +285,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); */ extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); -/* - * Task clone/init and cleanup work - */ -extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); -extern void ds_exit_thread(struct task_struct *tsk); - #else /* CONFIG_X86_DS */ struct cpuinfo_x86; static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} static inline void ds_switch_to(struct task_struct *prev, struct task_struct *next) {} -static inline void ds_copy_thread(struct task_struct *tsk, - struct task_struct *father) {} -static inline void ds_exit_thread(struct task_struct *tsk) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1efeb497f1f9..7c39de7e709a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -454,10 +454,8 @@ struct thread_struct { unsigned io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; -#ifdef CONFIG_X86_DS -/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ + /* Debug Store context; see asm/ds.h */ struct ds_context *ds_ctx; -#endif /* CONFIG_X86_DS */ }; static inline unsigned long native_get_debugreg(int regno) -- cgit v1.2.3 From 0f4814065ff8c24ca8bfd75c9b73502be152c287 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:48 +0200 Subject: x86, ptrace: add bts context unconditionally Add the ptrace bts context field to task_struct unconditionally. Initialize the field directly in copy_process(). Remove all the unneeded functionality used to initialize that field. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144603.292754000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ptrace.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index e304b66abeea..5cdd19f20b5b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -extern void x86_ptrace_untrace(struct task_struct *); -extern void x86_ptrace_fork(struct task_struct *child, - unsigned long clone_flags); +#ifdef CONFIG_X86_PTRACE_BTS +extern void ptrace_bts_untrace(struct task_struct *tsk); -#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) -#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) +#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) +#endif /* CONFIG_X86_PTRACE_BTS */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 017bc617657c928cb9a0c45a7a7e9f4e66695347 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:52 +0200 Subject: x86, ds: support Core i7 Add debug store support for Core i7. Core i7 adds a reset value for each performance counter and a new PEBS record format. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144607.088997000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 149e5208e967..70dac199b093 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -234,8 +234,12 @@ struct bts_trace { struct pebs_trace { struct ds_trace ds; - /* the PEBS reset value */ - unsigned long long reset_value; + /* the number of valid counters in the below array */ + unsigned int counters; + +#define MAX_PEBS_COUNTERS 4 + /* the counter reset value */ + unsigned long long counter_reset[MAX_PEBS_COUNTERS]; }; @@ -270,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer); * Returns 0 on success; -Eerrno on error * * tracer: the tracer handle returned from ds_request_pebs() + * counter: the index of the counter * value: the new counter reset value */ -extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); +extern int ds_set_pebs_reset(struct pebs_tracer *tracer, + unsigned int counter, u64 value); /* * Initialization -- cgit v1.2.3 From fcb2ac5bdfa3a7a04fb9749b916f64400f4c35a8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 8 Apr 2009 13:31:58 +0200 Subject: x86_32: introduce restore_fpu_checking() Impact: cleanup, prepare FPU code unificaton Like on x86_64, return an error from restore_fpu and kill the task if it fails. Also rename restore_fpu to restore_fpu_checking which allows ifdefs to be removed in math_state_restore(). Signed-off-by: Jiri Slaby LKML-Reference: <1239190320-23952-1-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 71c9e5183982..09a2d6dfd85b 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -185,12 +185,10 @@ static inline void tolerant_fwait(void) asm volatile("fnclex ; fwait"); } -static inline void restore_fpu(struct task_struct *tsk) +static inline int restore_fpu_checking(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_XSAVE) { - xrstor_checking(&tsk->thread.xstate->xsave); - return; - } + if (task_thread_info(tsk)->status & TS_XSAVE) + return xrstor_checking(&tsk->thread.xstate->xsave); /* * The "nop" is needed to make the instructions the same * length. @@ -200,6 +198,7 @@ static inline void restore_fpu(struct task_struct *tsk) "fxrstor %1", X86_FEATURE_FXSR, "m" (tsk->thread.xstate->fxsave)); + return 0; } /* We need a safe address that is cheap to find and that is already -- cgit v1.2.3 From 34ba476a01e128aad51e02f9be854584e9ec73cf Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 8 Apr 2009 13:31:59 +0200 Subject: x86: unify restore_fpu_checking Impact: cleanup On x86_32, separate f*rstor to an inline function which makes restore_fpu_checking the same on both platforms -> move it outside the ifdefs. Signed-off-by: Jiri Slaby LKML-Reference: <1239190320-23952-2-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 09a2d6dfd85b..7a6f21d95cf4 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -75,14 +75,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return err; } -static inline int restore_fpu_checking(struct task_struct *tsk) -{ - if (task_thread_info(tsk)->status & TS_XSAVE) - return xrstor_checking(&tsk->thread.xstate->xsave); - else - return fxrstor_checking(&tsk->thread.xstate->fxsave); -} - /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. The kernel data segment can be sometimes 0 and sometimes @@ -185,10 +177,9 @@ static inline void tolerant_fwait(void) asm volatile("fnclex ; fwait"); } -static inline int restore_fpu_checking(struct task_struct *tsk) +/* perform fxrstor iff the processor has extended states, otherwise frstor */ +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { - if (task_thread_info(tsk)->status & TS_XSAVE) - return xrstor_checking(&tsk->thread.xstate->xsave); /* * The "nop" is needed to make the instructions the same * length. @@ -197,7 +188,8 @@ static inline int restore_fpu_checking(struct task_struct *tsk) "nop ; frstor %1", "fxrstor %1", X86_FEATURE_FXSR, - "m" (tsk->thread.xstate->fxsave)); + "m" (*fx)); + return 0; } @@ -261,6 +253,14 @@ end: #endif /* CONFIG_X86_64 */ +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + return xrstor_checking(&tsk->thread.xstate->xsave); + else + return fxrstor_checking(&tsk->thread.xstate->fxsave); +} + /* * Signal frame handlers... */ -- cgit v1.2.3 From 4ecf458492c2d97b3f9d850a5f92d79792e0a7e7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 8 Apr 2009 13:32:00 +0200 Subject: x86_64: fix incorrect comments Impact: cleanup The comments which fxrstor_checking and fxsave_uset refer to is now in fxsave. Change the comments appropriately. Signed-off-by: Jiri Slaby Cc: Jiri Slaby LKML-Reference: <1239190320-23952-3-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 7a6f21d95cf4..63d185087d91 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -67,7 +67,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err) -#if 0 /* See comment in __save_init_fpu() below. */ +#if 0 /* See comment in fxsave() below. */ : [fx] "r" (fx), "m" (*fx), "0" (0)); #else : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); @@ -112,7 +112,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err), "=m" (*fx) -#if 0 /* See comment in __fxsave_clear() below. */ +#if 0 /* See comment in fxsave() below. */ : [fx] "r" (fx), "0" (0)); #else : [fx] "cdaSDb" (fx), "0" (0)); -- cgit v1.2.3 From afd9fceec55225d33be878927056a548c2eef26c Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 9 Apr 2009 15:16:17 +0200 Subject: x86: cacheinfo: use cached K8 NB_MISC devices instead of scanning for it Impact: avoid code duplication Signed-off-by: Andreas Herrmann Cc: Andrew Morton Cc: Mark Langsdorf LKML-Reference: <20090409131617.GI31527@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/k8.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index 54c8cc53b24d..c23b3d171be5 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -12,4 +12,12 @@ extern int cache_k8_northbridges(void); extern void k8_flush_garts(void); extern int k8_scan_nodes(unsigned long start, unsigned long end); +#ifdef CONFIG_K8_NB +#define node_to_k8_nb_misc(node) \ + (node < num_k8_northbridges) ? k8_northbridges[node] : NULL +#else +#define node_to_k8_nb_misc(node) NULL +#endif + + #endif /* _ASM_X86_K8_H */ -- cgit v1.2.3 From 2c1b284e4fa260fd922b9a65c99169e2630c6862 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 11 Apr 2009 00:03:10 +0530 Subject: x86: clean up declarations and variables Impact: cleanup, no code changed - syscalls.h update declarations due to unifications - irq.c declare smp_generic_interrupt() before it gets used - process.c declare sys_fork() and sys_vfork() before they get used - tsc.c rename tsc_khz shadowed variable - apic/probe_32.c declare apic_default before it gets used - apic/nmi.c prev_nmi_count should be unsigned - apic/io_apic.c declare smp_irq_move_cleanup_interrupt() before it gets used - mm/init.c declare direct_gbpages and free_initrd_mem before they get used Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 3 +++ arch/x86/include/asm/hw_irq.h | 4 ++++ arch/x86/include/asm/pgtable.h | 2 ++ arch/x86/include/asm/pgtable_64.h | 6 ------ arch/x86/include/asm/syscalls.h | 45 +++++++++++++++++++++------------------ 5 files changed, 33 insertions(+), 27 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 42f2f8377422..5773660c8cd5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -478,6 +478,9 @@ static inline unsigned int read_apic_id(void) extern void default_setup_apic_routing(void); #ifdef CONFIG_X86_32 + +extern struct apic apic_default; + /* * Set up the logical destination ID. * diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b762ea49bd70..be9ae4111c94 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -78,7 +78,11 @@ extern void eisa_set_level_irq(unsigned int irq); /* SMP */ extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); +extern void smp_generic_interrupt(struct pt_regs *); extern void smp_error_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_IO_APIC +extern asmlinkage void smp_irq_move_cleanup_interrupt(void); +#endif #ifdef CONFIG_SMP extern void smp_reschedule_interrupt(struct pt_regs *); extern void smp_call_function_interrupt(struct pt_regs *); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 29d96d168bc0..3f8d09d94eb3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -503,6 +503,8 @@ static inline int pgd_none(pgd_t pgd) #ifndef __ASSEMBLY__ +extern int direct_gbpages; + /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 6b87bc6d5018..abde308fdb0f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -25,10 +25,6 @@ extern pgd_t init_level4_pgt[]; extern void paging_init(void); -#endif /* !__ASSEMBLY__ */ - -#ifndef __ASSEMBLY__ - #define pte_ERROR(e) \ printk("%s:%d: bad pte %p(%016lx).\n", \ __FILE__, __LINE__, &(e), pte_val(e)) @@ -135,8 +131,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define update_mmu_cache(vma, address, pte) do { } while (0) -extern int direct_gbpages; - /* Encode and de-code a swap entry */ #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 7043408f6904..372b76edd63f 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -1,7 +1,7 @@ /* * syscalls.h - Linux syscall interfaces (arch-specific) * - * Copyright (c) 2008 Jaswinder Singh + * Copyright (c) 2008 Jaswinder Singh Rajput * * This file is released under the GPLv2. * See the file COPYING for more details. @@ -12,50 +12,55 @@ #include #include -#include #include +#include /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); +/* kernel/process.c */ +int sys_fork(struct pt_regs *); +int sys_vfork(struct pt_regs *); + /* kernel/ldt.c */ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); +/* kernel/signal.c */ +long sys_rt_sigreturn(struct pt_regs *); + /* kernel/tls.c */ asmlinkage int sys_set_thread_area(struct user_desc __user *); asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 +/* kernel/ioport.c */ +long sys_iopl(struct pt_regs *); + /* kernel/process_32.c */ -int sys_fork(struct pt_regs *); int sys_clone(struct pt_regs *); -int sys_vfork(struct pt_regs *); int sys_execve(struct pt_regs *); -/* kernel/signal_32.c */ +/* kernel/signal.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); int sys_sigaltstack(struct pt_regs *); unsigned long sys_sigreturn(struct pt_regs *); -long sys_rt_sigreturn(struct pt_regs *); - -/* kernel/ioport.c */ -long sys_iopl(struct pt_regs *); /* kernel/sys_i386_32.c */ +struct mmap_arg_struct; +struct sel_arg_struct; +struct oldold_utsname; +struct old_utsname; + asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -struct mmap_arg_struct; asmlinkage int old_mmap(struct mmap_arg_struct __user *); -struct sel_arg_struct; asmlinkage int old_select(struct sel_arg_struct __user *); asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); -struct old_utsname; asmlinkage int sys_uname(struct old_utsname __user *); -struct oldold_utsname; asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ @@ -65,29 +70,27 @@ int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ /* X86_64 only */ +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned int, struct pt_regs *); + /* kernel/process_64.c */ -asmlinkage long sys_fork(struct pt_regs *); asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); -asmlinkage long sys_vfork(struct pt_regs *); asmlinkage long sys_execve(char __user *, char __user * __user *, char __user * __user *, struct pt_regs *); long sys_arch_prctl(int, unsigned long); -/* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned int, struct pt_regs *); - -/* kernel/signal_64.c */ +/* kernel/signal.c */ asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, struct pt_regs *); -long sys_rt_sigreturn(struct pt_regs *); /* kernel/sys_x86_64.c */ +struct new_utsname; + asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -struct new_utsname; asmlinkage long sys_uname(struct new_utsname __user *); #endif /* CONFIG_X86_32 */ -- cgit v1.2.3 From 66aa230e437d89ca56224135f617e2d8e391a3ef Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 14 Apr 2009 12:54:29 +0530 Subject: x86: page_types.h unification of declarations Impact: unification of declarations, cleanup Unification of declarations: moved init_memory_mapping, initmem_init and free_initmem from page_XX_types.h to page_types.h Signed-off-by: Jaswinder Singh Rajput Acked-by: Pekka Enberg Cc: Andrew Morton LKML-Reference: <1239693869.3033.31.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_32_types.h | 4 ---- arch/x86/include/asm/page_64_types.h | 6 ------ arch/x86/include/asm/page_types.h | 6 ++++++ 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 0f915ae649a7..6f1b7331313f 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE; extern int sysctl_legacy_va_layout; extern void find_low_pfn_range(void); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); -extern void initmem_init(unsigned long, unsigned long); -extern void free_initmem(void); extern void setup_bootmem_allocator(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d38c91b70248..3f587188ae68 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -71,12 +71,6 @@ extern unsigned long __phys_addr(unsigned long); #define vmemmap ((struct page *)VMEMMAP_START) -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - -extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); -extern void free_initmem(void); - extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 826ad37006ab..6473f5ccff85 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; +extern unsigned long init_memory_mapping(unsigned long start, + unsigned long end); + +extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern void free_initmem(void); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PAGE_DEFS_H */ -- cgit v1.2.3 From e7d43a74cb07cbc4b8e9b5e4a914816b33fb0719 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 14 Apr 2009 13:18:28 +0530 Subject: x86: avoid multiple declaration of kstack_depth_to_print Impact: cleanup asm/stacktrace.h is more appropriate so removing other 2 declarations. Signed-off-by: Jaswinder Singh Rajput Cc: Neil Horman LKML-Reference: <1239695308.3033.34.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/traps.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0d5342515b86..9aa3ab262055 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -74,7 +74,6 @@ static inline int get_si_code(unsigned long condition) } extern int panic_on_unrecovered_nmi; -extern int kstack_depth_to_print; void math_error(void __user *); void math_emulate(struct math_emu_info *); -- cgit v1.2.3 From b206525ad1f653b7da35f5827be93770d28eae11 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 14 Apr 2009 23:04:37 +0530 Subject: x86: k8 convert node_to_k8_nb_misc() from a macro to an inline function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Converting node_to_k8_nb_misc() from a macro to an inline function makes compiler see the 'node' parameter in the !CONFIG_K8_NB too, which eliminates these compiler warnings: arch/x86/kernel/cpu/intel_cacheinfo.c: In function ‘show_cache_disable’: arch/x86/kernel/cpu/intel_cacheinfo.c:712: warning: unused variable ‘node’ arch/x86/kernel/cpu/intel_cacheinfo.c: In function ‘store_cache_disable’: arch/x86/kernel/cpu/intel_cacheinfo.c:739: warning: unused variable ‘node’ Signed-off-by: Jaswinder Singh Rajput Cc: Andreas Herrmann Cc: Mark Langsdorf LKML-Reference: <1239730477.2966.26.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/k8.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index c23b3d171be5..c2d1f3b58e5f 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -13,10 +13,15 @@ extern void k8_flush_garts(void); extern int k8_scan_nodes(unsigned long start, unsigned long end); #ifdef CONFIG_K8_NB -#define node_to_k8_nb_misc(node) \ - (node < num_k8_northbridges) ? k8_northbridges[node] : NULL +static inline struct pci_dev *node_to_k8_nb_misc(int node) +{ + return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; +} #else -#define node_to_k8_nb_misc(node) NULL +static inline struct pci_dev *node_to_k8_nb_misc(int node) +{ + return NULL; +} #endif -- cgit v1.2.3 From 89388913f2c88a2cd15d24abab571b17a2596127 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 21 Apr 2009 11:39:27 +0300 Subject: x86: unify noexec handling This patch unifies noexec handling on 32-bit and 64-bit. [ Impact: cleanup ] Signed-off-by: Pekka Enberg [ mingo@elte.hu: build fix ] LKML-Reference: <1240303167.771.69.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b8238dc8786d..4d258ad76a0f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,7 +273,6 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern int nx_enabled; -extern void set_nx(void); #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); -- cgit v1.2.3 From d7285c6b5c54397fdf112c2fb98ee43193173aa9 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Thu, 23 Apr 2009 10:21:38 -0700 Subject: x86: use native register access for native tlb flushing currently these are paravirtulaized, doesn't appear any callers rely on this (no pv_ops backends are using native_tlb and overriding cr3/4 access). [ Impact: fix lockdep warning with paravirt and function tracer ] Signed-off-by: Chris Wright LKML-Reference: <20090423172138.GR3036@sequoia.sous-sol.org> Signed-off-by: Steven Rostedt --- arch/x86/include/asm/tlbflush.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d3539f998f88..e2927c5f45b1 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -17,7 +17,7 @@ static inline void __native_flush_tlb(void) { - write_cr3(read_cr3()); + native_write_cr3(native_read_cr3()); } static inline void __native_flush_tlb_global(void) @@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - cr4 = read_cr4(); + cr4 = native_read_cr4(); /* clear PGE */ - write_cr4(cr4 & ~X86_CR4_PGE); + native_write_cr4(cr4 & ~X86_CR4_PGE); /* write old PGE again and flush TLBs */ - write_cr4(cr4); + native_write_cr4(cr4); raw_local_irq_restore(flags); } -- cgit v1.2.3 From edc953fa4ebc0265ef3b1754fe116a9fd4264e15 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 28 Apr 2009 11:13:46 -0400 Subject: x86: clean up alternative.h Alternative header duplicates assembly that could be merged in one single macro. Merging this into this macro also allows to directly declare ALTERNATIVE() statements within assembly code. Uses a __stringify() of the feature bits rather than passing a "i" operand. Leave the old %0 operand as-is (set to 0), unused to stay compatible with API. (v2: tab alignment fixes) [ Impact: cleanup ] Signed-off-by: Mathieu Desnoyers LKML-Reference: <20090428151346.GA31212@Krystal> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 59 +++++++++++++++----------------------- 1 file changed, 23 insertions(+), 36 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index f6aa18eadf71..1a37bcdc8606 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -3,6 +3,7 @@ #include #include +#include #include /* @@ -74,6 +75,22 @@ static inline void alternatives_smp_switch(int smp) {} const unsigned char *const *find_nop_table(void); +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + \ + "661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" + /* * Alternative instructions for different CPU types or capabilities. * @@ -87,18 +104,7 @@ const unsigned char *const *find_nop_table(void); * without volatile and memory clobber. */ #define alternative(oldinstr, newinstr, feature) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature) : "memory") + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") /* * Alternative inline assembly with input. @@ -109,35 +115,16 @@ const unsigned char *const *find_nop_table(void); * Best is to use constraints that are fixed size (like (%1) ... "r") * If you use variable sized constraints like "m" or "g" in the * replacement make sure to pad to the worst case length. + * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature), ##input) + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c[feat]\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" : output : [feat] "i" (feature), ##input) + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : output : "i" (0), ## input) /* * use this macro(s) if you need more than one output parameter -- cgit v1.2.3 From 12d161147f828192b5bcc33166f468a827832767 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:10 +0000 Subject: x86: hookup sys_rt_tgsigqueueinfo Make the new sys_rt_tgsigqueueinfo available for x86. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/unistd_32.h | 1 + arch/x86/include/asm/unistd_64.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6e72d74cf8dc..708dae61262d 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -340,6 +340,7 @@ #define __NR_inotify_init1 332 #define __NR_preadv 333 #define __NR_pwritev 334 +#define __NR_rt_tgsigqueueinfo 335 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index f81829462325..4e2b05404400 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) __SYSCALL(__NR_preadv, sys_preadv) #define __NR_pwritev 296 __SYSCALL(__NR_pwritev, sys_pwritev) +#define __NR_rt_tgsigqueueinfo 297 +__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) #ifndef __NO_STUBS -- cgit v1.2.3 From bf293c17b26b8854241df08b9b63f7270cbde012 Mon Sep 17 00:00:00 2001 From: Remis Lima Baima Date: Thu, 30 Apr 2009 18:36:23 +0200 Subject: x86: added 'ifndef _ASM_X86_IOMAP_H' to iomap.h iomap.h misses the include guards. [ Impact: cleanup ] Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann LKML-Reference: <200904301836.23885.arnd@arndb.de> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iomap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 86af26091d6c..0e9fe1d9d971 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -1,3 +1,6 @@ +#ifndef _ASM_X86_IOMAP_H +#define _ASM_X86_IOMAP_H + /* * Copyright © 2008 Ingo Molnar * @@ -31,3 +34,5 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); void iounmap_atomic(void *kvaddr, enum km_type type); + +#endif /* _ASM_X86_IOMAP_H */ -- cgit v1.2.3 From c898faf91b3ec6b0f6efa35831b3984fa3331db0 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 5 May 2009 17:28:56 -0400 Subject: x86: 46 bit physical address support on 64 bits Extend the maximum addressable memory on x86-64 from 2^44 to 2^46 bytes. This requires some shuffling around of the vmalloc and virtual memmap memory areas, to keep them away from the direct mapping of up to 64TB of physical memory. This patch also introduces a guard hole between the vmalloc area and the virtual memory map space. There's really no good reason why we wouldn't have a guard hole there. [ Impact: future hardware enablement ] Signed-off-by: Rik van Riel LKML-Reference: <20090505172856.6820db22@cuia.bos.redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page_64_types.h | 2 +- arch/x86/include/asm/pgtable_64_types.h | 8 ++++---- arch/x86/include/asm/sparsemem.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 3f587188ae68..6fadb020bd2b 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -47,7 +47,7 @@ #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -/* See Documentation/x86_64/mm.txt for a description of the memory map. */ +/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 46 #define __VIRTUAL_MASK_SHIFT 48 diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index fbf42b8e0383..766ea16fbbbd 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -51,11 +51,11 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) - +/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) -#define VMALLOC_START _AC(0xffffc20000000000, UL) -#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) -#define VMEMMAP_START _AC(0xffffe20000000000, UL) +#define VMALLOC_START _AC(0xffffc90000000000, UL) +#define VMALLOC_END _AC(0xffffe8ffffffffff, UL) +#define VMEMMAP_START _AC(0xffffea0000000000, UL) #define MODULES_VADDR _AC(0xffffffffa0000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index e3cc3c063ec5..4517d6b93188 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,7 +27,7 @@ #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ # define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */ +# define MAX_PHYSMEM_BITS 46 #endif #endif /* CONFIG_SPARSEMEM */ -- cgit v1.2.3 From bf8b9a63c18a1a7777571650de0c9f4fd4368ca0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 8 May 2009 20:53:58 +0530 Subject: x86: msr-index.h remove duplicate MSR C001_0015 declaration MSRC001_0015 Hardware Configuration Register (HWCR) is already defined as MSR_K7_HWCR. And HWCR is available for >= K7. So MSR_K8_HWCR is not required and no-one is using it. [ Impact: cleanup, no object code change ] Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/msr-index.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec41fc16c167..4d58d04fca83 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -121,7 +121,6 @@ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_HWCR 0xc0010015 #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 -- cgit v1.2.3 From 6cac5a924668a56c7ccefc345805f1fe0536a90e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 29 Mar 2009 19:56:29 -0700 Subject: xen/x86-64: fix breakpoints and hardware watchpoints Native x86-64 uses the IST mechanism to run int3 and debug traps on an alternative stack. Xen does not do this, and so the frames were being misinterpreted by the ptrace code. This change special-cases these two exceptions by using Xen variants which run on the normal kernel stack properly. Impact: avoid crash or bad data when IST trap is invoked under Xen Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/traps.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0d5342515b86..c44e5002f2ff 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -13,6 +13,9 @@ asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); +asmlinkage void xen_debug(void); +asmlinkage void xen_int3(void); +asmlinkage void xen_stack_segment(void); asmlinkage void overflow(void); asmlinkage void bounds(void); asmlinkage void invalid_op(void); -- cgit v1.2.3 From d756f4adb9d8a86e347a2d5435bb5cc95744733e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 4 May 2009 03:29:52 +0400 Subject: x86, 32-bit: ifdef out struct thread_struct::fs After commit 464d1a78fbf8cf6c7fd970e7b3e2db50a320ce28 aka "[PATCH] i386: Convert i386 PDA code to use %fs" %fs saved during context switch moved from thread_struct to pt_regs and value on thread_struct became unused. [ Impact: reduce thread_struct size on 32-bit ] Signed-off-by: Alexey Dobriyan Cc: containers@lists.linux-foundation.org LKML-Reference: <20090503232952.GI16631@x200.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c2cceae709c8..a6732ff7b016 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -428,7 +428,9 @@ struct thread_struct { unsigned short gsindex; #endif unsigned long ip; +#ifdef CONFIG_X86_64 unsigned long fs; +#endif unsigned long gs; /* Hardware debugging registers: */ unsigned long debugreg0; @@ -874,7 +876,6 @@ static inline void spin_lock_prefetch(const void *x) .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ - .fs = __KERNEL_PERCPU, \ } /* -- cgit v1.2.3 From 0c23590f00f85467b318ad0c20c36796a5bd4c60 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 4 May 2009 03:30:15 +0400 Subject: x86, 64-bit: ifdef out struct thread_struct::ip struct thread_struct::ip isn't used on x86_64, struct pt_regs::ip is used instead. kgdb should be reading 0 always, but I can't check it. [ Impact: (potentially) reduce thread_struct size on 64-bit ] Signed-off-by: Alexey Dobriyan Cc: containers@lists.linux-foundation.org LKML-Reference: <20090503233015.GJ16631@x200.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a6732ff7b016..a9ba7436821e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -427,7 +427,9 @@ struct thread_struct { unsigned short fsindex; unsigned short gsindex; #endif +#ifdef CONFIG_X86_32 unsigned long ip; +#endif #ifdef CONFIG_X86_64 unsigned long fs; #endif -- cgit v1.2.3 From 2ff799d3cff1ecb274049378b28120ee5c1c5e5f Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Mon, 11 May 2009 20:09:14 +0530 Subject: sched: Don't export sched_mc_power_savings on multi-socket single core system Fix to prevent sched_mc_power_saving from being exported through sysfs for multi-scoket single core system. Max cores should be always greater than one (1). My earlier patch that introduced fix for not exporting 'sched_mc_power_saving' on laptops broke it on multi-socket single core system. This fix addresses issue on both laptop and multi-socket single core system. Below are the Test results: 1. Single socket - multi-core Before Patch: Does not export 'sched_mc_power_saving' After Patch: Does not export 'sched_mc_power_saving' Result: Pass 2. Multi Socket - single core Before Patch: exports 'sched_mc_power_saving' After Patch: Does not export 'sched_mc_power_saving' Result: Pass 3. Multi Socket - Multi core Before Patch: exports 'sched_mc_power_saving' After Patch: exports 'sched_mc_power_saving' [ Impact: make the sched_mc_power_saving control available more consistently ] Signed-off-by: Mahesh Salgaonkar Cc: Suresh B Siddha Cc: Venkatesh Pallipadi Cc: Peter Zijlstra LKML-Reference: <20090511143914.GB4853@dirshya.in.ibm.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f44b49abca49..066ef590d7e0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -203,7 +203,8 @@ struct pci_bus; void x86_pci_root_bus_res_quirks(struct pci_bus *b); #ifdef CONFIG_SMP -#define mc_capable() (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids) +#define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ + (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)) #define smt_capable() (smp_num_siblings > 1) #endif -- cgit v1.2.3 From 37ba7ab5e33cebc25c68fffe33e9f21e7c2014e8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 11 May 2009 15:56:08 -0700 Subject: x86, boot: make kernel_alignment adjustable; new bzImage fields Make the kernel_alignment field adjustable; this allows us to set it to a large value (intended to be 16 MB to avoid ZONE_DMA contention, memory holes and other weirdness) while a smart bootloader can still force a loading at a lesser alignment if absolutely necessary. Also export pref_address (preferred loading address, corresponding to the link-time address) and init_size, the total amount of linear memory the kernel will require during initialization. [ Impact: allows better kernel placement, gives bootloader more info ] Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/boot.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 6ba23dd9fc92..418e632d4a80 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -8,11 +8,26 @@ #ifdef __KERNEL__ +#include + /* Physical address where kernel should be loaded. */ #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) +/* Minimum kernel alignment, as a power of two */ +#ifdef CONFIG_x86_64 +#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT +#else +#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) +#endif +#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) + +#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ + (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) +#error "Invalid value for CONFIG_PHYSICAL_ALIGN" +#endif + #ifdef CONFIG_KERNEL_BZIP2 #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ -- cgit v1.2.3 From 5031296c57024a78ddad4edfc993367dbf4abb98 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 7 May 2009 16:54:11 -0700 Subject: x86: add extension fields for bootloader type and version A long ago, in days of yore, it all began with a god named Thor. There were vikings and boats and some plans for a Linux kernel header. Unfortunately, a single 8-bit field was used for bootloader type and version. This has generally worked without *too* much pain, but we're getting close to flat running out of ID fields. Add extension fields for both type and version. The type will be extended if it the old field is 0xE; the version is a simple MSB extension. Keep /proc/sys/kernel/bootloader_type containing (type << 4) + (ver & 0xf) for backwards compatiblity, but also add /proc/sys/kernel/bootloader_version which contains the full version number. [ Impact: new feature to support more bootloaders ] Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/bootparam.h | 3 ++- arch/x86/include/asm/processor.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 433adaebf9b6..1724e8de317c 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h @@ -50,7 +50,8 @@ struct setup_header { __u32 ramdisk_size; __u32 bootsect_kludge; __u16 heap_end_ptr; - __u16 _pad1; + __u8 ext_loader_ver; + __u8 ext_loader_type; __u32 cmd_line_ptr; __u32 initrd_addr_max; __u32 kernel_alignment; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fcf4d92e7e04..6384d25121ca 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -814,6 +814,7 @@ extern unsigned int BIOS_revision; /* Boot loader type from the setup header: */ extern int bootloader_type; +extern int bootloader_version; extern char ignore_fpu_irq; -- cgit v1.2.3 From 871b72dd1e12afc3f024479531d25a9339d2e3f9 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Mon, 11 May 2009 23:48:27 +0200 Subject: x86: microcode: use smp_call_function_single instead of set_cpus_allowed, cleanup of synchronization logic * Solve issues described in 6f66cbc63081fd70e3191b4dbb796746780e5ae1 in a way that doesn't resort to set_cpus_allowed(); * in fact, only collect_cpu_info and apply_microcode callbacks must run on a target cpu, others will do just fine on any other. smp_call_function_single() (as suggested by Ingo) is used to run these callbacks on a target cpu. * cleanup of synchronization logic of the 'microcode_core' part The generic 'microcode_core' part guarantees that only a single cpu (be it a full-fledged cpu, one of the cores or HT) is being updated at any particular moment of time. In general, there is no need for any additional sync. mechanism in arch-specific parts (the patch removes existing spinlocks). See also the "Synchronization" section in microcode_core.c. * return -EINVAL instead of -1 (which is translated into -EPERM) in microcode_write(), reload_cpu() and mc_sysdev_add(). Other suggestions for an error code? * use 'enum ucode_state' as return value of request_microcode_{fw, user} to gain more flexibility by distinguishing between real error cases and situations when an appropriate ucode was not found (which is not an error per-se). * some minor cleanups Thanks a lot to Hugh Dickins for review/suggestions/testing! Reference: http://marc.info/?l=linux-kernel&m=124025889012541&w=2 [ Impact: refactor and clean up microcode driver locking code ] Signed-off-by: Dmitry Adamushko Acked-by: Hugh Dickins Cc: Andrew Morton Cc: Rusty Russell Cc: Andreas Herrmann Cc: Peter Oruba Cc: Arjan van de Ven LKML-Reference: <1242078507.5560.9.camel@earth> [ did some more cleanups ] Signed-off-by: Ingo Molnar arch/x86/include/asm/microcode.h | 25 ++ arch/x86/kernel/microcode_amd.c | 58 ++---- arch/x86/kernel/microcode_core.c | 326 +++++++++++++++++++++----------------- arch/x86/kernel/microcode_intel.c | 92 +++------- 4 files changed, 261 insertions(+), 240 deletions(-) (~20 new comment lines) --- arch/x86/include/asm/microcode.h | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index c882664716c1..ef51b501e22a 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -9,20 +9,31 @@ struct cpu_signature { struct device; +enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; + struct microcode_ops { - int (*request_microcode_user) (int cpu, const void __user *buf, size_t size); - int (*request_microcode_fw) (int cpu, struct device *device); + enum ucode_state (*request_microcode_user) (int cpu, + const void __user *buf, size_t size); - void (*apply_microcode) (int cpu); + enum ucode_state (*request_microcode_fw) (int cpu, + struct device *device); - int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); void (*microcode_fini_cpu) (int cpu); + + /* + * The generic 'microcode_core' part guarantees that + * the callbacks below run on a target cpu when they + * are being called. + * See also the "Synchronization" section in microcode_core.c. + */ + int (*apply_microcode) (int cpu); + int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); }; struct ucode_cpu_info { - struct cpu_signature cpu_sig; - int valid; - void *mc; + struct cpu_signature cpu_sig; + int valid; + void *mc; }; extern struct ucode_cpu_info ucode_cpu_info[]; -- cgit v1.2.3 From c4f68236e41641494f9c8a418ccc0678c335bbb5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 12 May 2009 11:37:34 -0700 Subject: x86-64: align __PHYSICAL_START, remove __KERNEL_ALIGN Handle the misconfiguration where CONFIG_PHYSICAL_START is incompatible with CONFIG_PHYSICAL_ALIGN. This is a configuration error, but one which arises easily since Kconfig doesn't have the smarts to express the true relationship between these two variables. Hence, align __PHYSICAL_START the same way we align LOAD_PHYSICAL_ADDR in . For non-relocatable kernels, this would cause the boot to fail. [ Impact: fix boot failures for non-relocatable kernels ] Reported-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page_64_types.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d38c91b70248..e11900f2500e 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -32,17 +32,9 @@ */ #define __PAGE_OFFSET _AC(0xffff880000000000, UL) -#define __PHYSICAL_START CONFIG_PHYSICAL_START -#define __KERNEL_ALIGN 0x200000 - -/* - * Make sure kernel is aligned to 2MB address. Catching it at compile - * time is better. Change your config file and compile the kernel - * for a 2MB aligned address (CONFIG_PHYSICAL_START) - */ -#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 -#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" -#endif +#define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \ + (CONFIG_PHYSICAL_ALIGN - 1)) & \ + ~(CONFIG_PHYSICAL_ALIGN - 1)) #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -- cgit v1.2.3 From 888a589f6be07d624e21e2174d98375e9f95911b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 May 2009 13:59:37 -0700 Subject: mm, x86: remove MEMORY_HOTPLUG_RESERVE related code after: | commit b263295dbffd33b0fbff670720fa178c30e3392a | Author: Christoph Lameter | Date: Wed Jan 30 13:30:47 2008 +0100 | | x86: 64-bit, make sparsemem vmemmap the only memory model we don't have MEMORY_HOTPLUG_RESERVE anymore. Historically, x86-64 had an architecture-specific method for memory hotplug whereby it scanned the SRAT for physical memory ranges that could be potentially used for memory hot-add later. By reserving those ranges without physical memory, the memmap would be allocated and left dormant until needed. This depended on the DISCONTIG memory model which has been removed so the code implementing HOTPLUG_RESERVE is now dead. This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE. (Changelog authored by Mel.) v2: updated changelog, and remove hotadd= in doc [ Impact: remove dead code ] Signed-off-by: Yinghai Lu Reviewed-by: Christoph Lameter Reviewed-by: Mel Gorman Workflow-found-OK-by: Andrew Morton LKML-Reference: <4A0C4910.7090508@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numa_64.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 064ed6df4cbe..7feff0648d74 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, extern void numa_init_array(void); extern int numa_off; -extern void srat_reserve_add_area(int nodeid); -extern int hotadd_percent; - extern s16 apicid_to_node[MAX_LOCAL_APIC]; extern unsigned long numa_free_all_bootmem(void); -- cgit v1.2.3 From 7c43769a9776141ec23ca81a1bdd5a9c0512f165 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 May 2009 13:59:37 -0700 Subject: x86, mm: Fix node_possible_map logic Recently there were some changes to the meaning of node_possible_map, and it is quite strange: - the node without memory would be set in node_possible_map - but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map. fix it by adding strict_setup_node_bootmem(). Also, remove unparse_node(). so result will be: 1. cpu_to_node() will return online node only (nearest one) 2. apicid_to_node() still returns the node that could be not online but is set in node_possible_map. 3. node_possible_map will include nodes that mem on it are less NODE_MIN_SIZE v2: after move_cpus_to_node change. [ Impact: get node_possible_map right ] Signed-off-by: Yinghai Lu Tested-by: Jack Steiner LKML-Reference: <4A0C49BE.6080800@kernel.org> [ v3: various small cleanups and comment clarifications ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numa_64.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 7feff0648d74..c4ae822e415f 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -24,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); #ifdef CONFIG_NUMA +/* + * Too small node sizes may confuse the VM badly. Usually they + * result from BIOS bugs. So dont recognize nodes as standalone + * NUMA entities that have less than this amount of RAM listed: + */ +#define NODE_MIN_SIZE (4*1024*1024) + extern void __init init_cpu_to_node(void); extern void __cpuinit numa_set_node(int cpu, int node); extern void __cpuinit numa_clear_node(int cpu); -- cgit v1.2.3 From 4aee2ad461889132bfb5a1518a9580d00e17008c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 19 May 2009 17:07:01 +0530 Subject: x86: asm/processor.h: remove double declaration Remove double declaration of: extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; they are already defined in the same file. [ Impact: cleanup ] Signed-off-by: Jaswinder Singh Rajput LKML-Reference: <1242733021.3377.1.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 34c52370f2fe..85628ea9d9b5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -403,9 +403,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary); extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); -extern unsigned short num_cache_leaves; struct thread_struct { /* Cached TLS descriptors: */ -- cgit v1.2.3 From fefda117ddb324b872312f1f061230e627c9f5ee Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 20 May 2009 12:21:42 +0200 Subject: amd-iommu: add amd_iommu_dump parameter This kernel parameter will be useful to get some AMD IOMMU related information in dmesg that is not necessary for the default user but may be helpful in debug situations. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 95c8cd9d22b5..89dfb3793edd 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -194,6 +194,12 @@ #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +extern bool amd_iommu_dump; +#define DUMP_printk(format, arg...) \ + do { \ + if (amd_iommu_dump) \ + printk(KERN_INFO "AMD IOMMU: " format, ## arg); \ + } while(0); /* * This structure contains generic data for IOMMU protection domains -- cgit v1.2.3 From 3bd221724adb9d642270df0e78b0105fb61e4a1c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2009 15:06:20 +0200 Subject: amd-iommu: introduce for_each_iommu* macros This patch introduces the for_each_iommu and for_each_iommu_safe macros to simplify the developers life when having to iterate over all AMD IOMMUs in the system. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 95c8cd9d22b5..cf5ef172cfca 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -195,6 +195,14 @@ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +/* + * Make iterating over all IOMMUs easier + */ +#define for_each_iommu(iommu) \ + list_for_each_entry((iommu), &amd_iommu_list, list) +#define for_each_iommu_safe(iommu, next) \ + list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) + /* * This structure contains generic data for IOMMU protection domains * independent of their use. -- cgit v1.2.3 From bfd1be1857e5a3385bf146e02e6dc3dd4241bec1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 May 2009 15:33:57 +0200 Subject: amd-iommu: add function to flush tlb for all domains This function is required for suspend/resume support with AMD IOMMU enabled. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index f712344329bc..1750e1f85d3c 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -27,6 +27,7 @@ extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_flush_all_domains(void); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } -- cgit v1.2.3 From 7d7a110c6127b7fc683dc6d764555f2dbd22b054 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 May 2009 15:48:10 +0200 Subject: amd-iommu: add function to flush tlb for all devices This function is required for suspend/resume support with AMD IOMMU enabled. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 1750e1f85d3c..262e02820049 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -28,6 +28,7 @@ extern int amd_iommu_init_dma_ops(void); extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); +extern void amd_iommu_flush_all_devices(void); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } -- cgit v1.2.3 From c3239567a20e90e3026ac5453d5267506ef7b030 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 12 May 2009 10:56:44 +0200 Subject: amd-iommu: introduce aperture_range structure This is a preperation for extended address allocator. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 95c8cd9d22b5..4c64c9bc6839 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -195,6 +195,8 @@ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +#define APERTURE_RANGE_SIZE (128 * 1024 * 1024) + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -209,6 +211,24 @@ struct protection_domain { void *priv; /* private data */ }; +/* + * For dynamic growth the aperture size is split into ranges of 128MB of + * DMA address space each. This struct represents one such range. + */ +struct aperture_range { + + /* address allocation bitmap */ + unsigned long *bitmap; + + /* + * Array of PTE pages for the aperture. In this array we save all the + * leaf pages of the domain page table used for the aperture. This way + * we don't need to walk the page table to find a specific PTE. We can + * just calculate its address in constant time. + */ + u64 *pte_pages[64]; +}; + /* * Data container for a dma_ops specific protection domain */ @@ -224,16 +244,8 @@ struct dma_ops_domain { /* address we start to search for free addresses */ unsigned long next_bit; - /* address allocation bitmap */ - unsigned long *bitmap; - - /* - * Array of PTE pages for the aperture. In this array we save all the - * leaf pages of the domain page table used for the aperture. This way - * we don't need to walk the page table to find a specific PTE. We can - * just calculate its address in constant time. - */ - u64 **pte_pages; + /* address space relevant data */ + struct aperture_range aperture; /* This will be set to true when TLB needs to be flushed */ bool need_flush; -- cgit v1.2.3 From 384de72910a7bf96a02a6d8023fe9e16d872beb2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 15 May 2009 12:30:05 +0200 Subject: amd-iommu: make address allocator aware of multiple aperture ranges This patch changes the AMD IOMMU address allocator to allow up to 32 aperture ranges per dma_ops domain. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 4c64c9bc6839..eca912931a85 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -195,7 +195,12 @@ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ -#define APERTURE_RANGE_SIZE (128 * 1024 * 1024) +#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ +#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) +#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) +#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ +#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) +#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) /* * This structure contains generic data for IOMMU protection domains @@ -227,6 +232,8 @@ struct aperture_range { * just calculate its address in constant time. */ u64 *pte_pages[64]; + + unsigned long offset; }; /* @@ -245,7 +252,7 @@ struct dma_ops_domain { unsigned long next_bit; /* address space relevant data */ - struct aperture_range aperture; + struct aperture_range *aperture[APERTURE_MAX_RANGES]; /* This will be set to true when TLB needs to be flushed */ bool need_flush; -- cgit v1.2.3 From 803b8cb4d9a93b90c67aba2aab7f2c54d595b5b9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 18 May 2009 15:32:48 +0200 Subject: amd-iommu: change dma_dom->next_bit to dma_dom->next_address Simplify the code a little bit by using the same unit for all address space related state in the dma_ops domain structure. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index eca912931a85..4ff4cf1f0809 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -249,7 +249,7 @@ struct dma_ops_domain { unsigned long aperture_size; /* address we start to search for free addresses */ - unsigned long next_bit; + unsigned long next_address; /* address space relevant data */ struct aperture_range *aperture[APERTURE_MAX_RANGES]; -- cgit v1.2.3 From c323d95fa4dbe0b6bf6d59e24a0b7db067dd08a7 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 29 May 2009 13:28:35 +0800 Subject: perf_counter/x86: Always use NMI for performance-monitoring interrupt Always use NMI for performance-monitoring interrupt as there could be racy situations if we switch between irq and nmi mode frequently. Signed-off-by: Yong Wang LKML-Reference: <20090529052835.GA13657@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index d08dd52cb8ff..876ed97147b3 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -91,10 +91,10 @@ extern void set_perf_counter_pending(void); #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); -extern void perf_counters_lapic_init(int nmi); +extern void perf_counters_lapic_init(void); #else static inline void init_hw_perf_counters(void) { } -static inline void perf_counters_lapic_init(int nmi) { } +static inline void perf_counters_lapic_init(void) { } #endif #endif /* _ASM_X86_PERF_COUNTER_H */ -- cgit v1.2.3 From a32881066e58346f2901afe0ebdfbf0c562877e5 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 3 Jun 2009 13:12:55 +0800 Subject: perf_counter/x86: Remove the IRQ (non-NMI) handling bits Remove the IRQ (non-NMI) handling bits as NMI will be used always. Signed-off-by: Yong Wang Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090603051255.GA2791@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/entry_arch.h | 1 - arch/x86/include/asm/hw_irq.h | 1 - arch/x86/include/asm/irq_vectors.h | 5 ----- 3 files changed, 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index fe24d2802490..d750a10ccad6 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -49,7 +49,6 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) #ifdef CONFIG_PERF_COUNTERS -BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 7309c0ad6902..4b4921d7a28e 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -29,7 +29,6 @@ extern void apic_timer_interrupt(void); extern void generic_interrupt(void); extern void error_interrupt(void); -extern void perf_counter_interrupt(void); extern void perf_pending_interrupt(void); extern void spurious_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 545bb811ccb5..4492e19f8391 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -106,11 +106,6 @@ */ #define LOCAL_TIMER_VECTOR 0xef -/* - * Performance monitoring interrupt vector: - */ -#define LOCAL_PERF_VECTOR 0xee - /* * Generic system vector for platform specific use */ -- cgit v1.2.3 From 0e2595cdfd7df9f1128f7185152601ae5417483b Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 20 May 2009 08:10:57 -0500 Subject: x86: Fix UV BAU activation descriptor init The UV tlb shootdown code has a serious initialization error. An array of structures [32*8] is initialized as if it were [32]. The array is indexed by (cpu number on the blade)*8, so the short initialization works for up to 4 cpus on a blade. But above that, we provide an invalid opcode to the hub's broadcast assist unit. This patch changes the allocation of the array to use its symbolic dimensions for better clarity. And initializes all 32*8 entries. Shortened 'UV_ACTIVATION_DESCRIPTOR_SIZE' to 'UV_ADP_SIZE' per Ingo's recommendation. Tested on the UV simulator. Signed-off-by: Cliff Wickman Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 9b0e61bf7a88..bddd44f2f0ab 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -37,7 +37,7 @@ #define UV_CPUS_PER_ACT_STATUS 32 #define UV_ACT_STATUS_MASK 0x3 #define UV_ACT_STATUS_SIZE 2 -#define UV_ACTIVATION_DESCRIPTOR_SIZE 32 +#define UV_ADP_SIZE 32 #define UV_DISTRIBUTION_SIZE 256 #define UV_SW_ACK_NPENDING 8 #define UV_NET_ENDPOINT_INTD 0x38 -- cgit v1.2.3 From 5095f59bda6793a7b8f0856096d6893fe98e0e51 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 5 Jun 2009 23:27:17 +0530 Subject: x86: cpu_debug: Remove model information to reduce encoding-decoding Remove model information, encoding/decoding and reduce bookkeeping. This, besides removing a lot of code and cleaning up the code, also enables these features on many more CPUs that were enumerated before. Reported-by: Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Alan Cox LKML-Reference: <1244224637.8212.6.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_debug.h | 101 +-------------------------------------- 1 file changed, 1 insertion(+), 100 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h index 222802029fa6..d96c1ee3a95c 100644 --- a/arch/x86/include/asm/cpu_debug.h +++ b/arch/x86/include/asm/cpu_debug.h @@ -86,105 +86,7 @@ enum cpu_file_bit { CPU_VALUE_BIT, /* value */ }; -#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) - -/* - * DisplayFamily_DisplayModel Processor Families/Processor Number Series - * -------------------------- ------------------------------------------ - * 05_01, 05_02, 05_04 Pentium, Pentium with MMX - * - * 06_01 Pentium Pro - * 06_03, 06_05 Pentium II Xeon, Pentium II - * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III - * - * 06_09, 060D Pentium M - * - * 06_0E Core Duo, Core Solo - * - * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series, - * Core 2 Quad, Core 2 Extreme, Core 2 Duo, - * Pentium dual-core - * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650 - * - * 06_1C Atom - * - * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4 - * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D - * - * 0F_06 Xeon 7100, 5000 Series, Xeon MP, - * Pentium 4, Pentium D - */ - -/* Register processors bits */ -enum cpu_processor_bit { - CPU_NONE, -/* Intel */ - CPU_INTEL_PENTIUM_BIT, - CPU_INTEL_P6_BIT, - CPU_INTEL_PENTIUM_M_BIT, - CPU_INTEL_CORE_BIT, - CPU_INTEL_CORE2_BIT, - CPU_INTEL_ATOM_BIT, - CPU_INTEL_XEON_P4_BIT, - CPU_INTEL_XEON_MP_BIT, -/* AMD */ - CPU_AMD_K6_BIT, - CPU_AMD_K7_BIT, - CPU_AMD_K8_BIT, - CPU_AMD_0F_BIT, - CPU_AMD_10_BIT, - CPU_AMD_11_BIT, -}; - -#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT) -#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT) -#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT) -#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT) -#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT) -#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT) -#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT) -#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT) - -#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M) -#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2) -#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP) -#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM) -#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM) -#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM) -#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON) -#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON) -#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT) -#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON) -#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON) -#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT) -#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX) -#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE) -#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE) -#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT) -#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE) -#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT) -#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE) - -/* Select all supported Intel CPUs */ -#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE) - -#define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT) -#define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT) -#define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT) -#define CPU_AMD_0F (1 << CPU_AMD_0F_BIT) -#define CPU_AMD_10 (1 << CPU_AMD_10_BIT) -#define CPU_AMD_11 (1 << CPU_AMD_11_BIT) - -#define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11) -#define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS) -#define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS) -#define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS) - -/* Select all supported AMD CPUs */ -#define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS) - -/* Select all supported CPUs */ -#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL) +#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) #define MAX_CPU_FILES 512 @@ -220,7 +122,6 @@ struct cpu_debug_range { unsigned min; /* Register range min */ unsigned max; /* Register range max */ unsigned flag; /* Supported flags */ - unsigned model; /* Supported models */ }; #endif /* _ASM_X86_CPU_DEBUG_H */ -- cgit v1.2.3 From a4046f8d299e00e9855ae292527c2d66a42670eb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 7 Jun 2009 12:19:37 +0400 Subject: x86, nmi: Use predefined numbers instead of hardcoded one [ Impact: cleanup ] Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090607081937.GC4547@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c45a0a568dff..c97264409934 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -64,7 +64,7 @@ static inline int nmi_watchdog_active(void) * but since they are power of two we could use a * cheaper way --cvg */ - return nmi_watchdog & 0x3; + return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); } #endif -- cgit v1.2.3 From c4ed3f04ba9defe22aa729d1646f970f791c03d7 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 8 Jun 2009 10:44:05 -0500 Subject: x86, UV: Fix macros for multiple coherency domains Fix bug in the SGI UV macros that support systems with multiple coherency domains. The macros used for referencing global MMR (chipset registers) are failing to correctly "or" the NASID (node identifier) bits that reside above M+N. These high bits are supplied automatically by the chipset for memory accesses coming from the processor socket. However, the bits must be present for references to the special global MMR space used to map chipset registers. (See uv_hub.h for more details ...) The bug results in references to invalid/incorrect nodes. Signed-off-by: Jack Steiner Cc: LKML-Reference: <20090608154405.GA16395@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index d3a98ea1062e..341070f7ad5c 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -133,6 +133,7 @@ struct uv_scir_s { struct uv_hub_info_s { unsigned long global_mmr_base; unsigned long gpa_mask; + unsigned int gnode_extra; unsigned long gnode_upper; unsigned long lowmem_remap_top; unsigned long lowmem_remap_base; @@ -159,7 +160,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); * p - PNODE (local part of nsids, right shifted 1) */ #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) -#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) +#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) +#define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1) #define UV_LOCAL_MMR_BASE 0xf4000000UL #define UV_GLOBAL_MMR32_BASE 0xf8000000UL @@ -173,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) #define UV_GLOBAL_MMR64_PNODE_BITS(p) \ - ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) + ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) #define UV_APIC_PNODE_SHIFT 6 -- cgit v1.2.3 From 42937e81a82b6bbc51a309c83da140b3a7ca5945 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 8 Jun 2009 15:55:09 +0200 Subject: x86: Detect use of extended APIC ID for AMD CPUs Booting a 32-bit kernel on Magny-Cours results in the following panic: ... Using APIC driver default ... Overriding APIC driver with bigsmp ... Getting VERSION: 80050010 Getting VERSION: 80050010 Getting ID: 10000000 Getting ID: ef000000 Getting LVT0: 700 Getting LVT1: 10000 Kernel panic - not syncing: Boot APIC ID in local APIC unexpected (16 vs 0) Pid: 1, comm: swapper Not tainted 2.6.30-rcX #2 Call Trace: [] ? panic+0x38/0xd3 [] ? native_smp_prepare_cpus+0x259/0x31f [] ? kernel_init+0x3e/0x141 [] ? kernel_init+0x0/0x141 [] ? kernel_thread_helper+0x7/0x10 The reason is that default_get_apic_id handled extension of local APIC ID field just in case of XAPIC. Thus for this AMD CPU, default_get_apic_id() returns 0 and bigsmp_get_apic_id() returns 16 which leads to the respective kernel panic. This patch introduces a Linux specific feature flag to indicate support for extended APIC id (8 bits instead of 4 bits width) and sets the flag on AMD CPUs if applicable. Signed-off-by: Andreas Herrmann Cc: LKML-Reference: <20090608135509.GA12431@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 +- arch/x86/include/asm/cpufeature.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 42f2f8377422..9b2c04910e0d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -410,7 +410,7 @@ static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_XAPIC(ver)) + if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) return (x >> 24) & 0xFF; else return (x >> 24) & 0x0F; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb83b1c397aa..78dee4f0f7aa 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -94,6 +94,7 @@ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ +#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ -- cgit v1.2.3 From 0b8c3d5ab000c22889af7f9409799a6cdc31a2b2 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Tue, 9 Jun 2009 10:40:50 -0400 Subject: x86: Clear TS in irq_ts_save() when in an atomic section The dynamic FPU context allocation changes caused the padlock driver to generate the below warning. Fix it by masking TS when doing padlock encryption operations in an atomic section. This solves: BUG: sleeping function called from invalid context at mm/slub.c:1602 in_atomic(): 1, irqs_disabled(): 0, pid: 82, name: cryptomgr_test Pid: 82, comm: cryptomgr_test Not tainted 2.6.29.4-168.test7.fc11.x86_64 #1 Call Trace: [] __might_sleep+0x10b/0x110 [] kmem_cache_alloc+0x37/0xf1 [] init_fpu+0x49/0x8a [] math_state_restore+0x3e/0xbc [] do_device_not_available+0x9/0xb [] device_not_available+0x1b/0x20 [] ? aes_crypt+0x66/0x74 [padlock_aes] [] ? blkcipher_walk_next+0x257/0x2e0 [] ? blkcipher_walk_first+0x18e/0x19d [] aes_encrypt+0x9d/0xe5 [padlock_aes] [] crypt+0x6b/0x114 [xts] [] ? aes_encrypt+0x0/0xe5 [padlock_aes] [] ? aes_encrypt+0x0/0xe5 [padlock_aes] [] encrypt+0x49/0x4b [xts] [] async_encrypt+0x3c/0x3e [] test_skcipher+0x1da/0x658 [] ? crypto_spawn_tfm+0x8e/0xb1 [] ? __crypto_alloc_tfm+0x11b/0x15f [] ? crypto_spawn_tfm+0x8e/0xb1 [] ? skcipher_geniv_init+0x2b/0x47 [] ? async_chainiv_init+0x5c/0x61 [] alg_test_skcipher+0x63/0x9b [] alg_test+0x12d/0x175 [] cryptomgr_test+0x38/0x54 [] ? cryptomgr_test+0x0/0x54 [] kthread+0x4d/0x78 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0x78 [] ? child_rip+0x0/0x20 Signed-off-by: Chuck Ebbert Cc: Suresh Siddha LKML-Reference: <20090609104050.50158cfe@dhcp-100-2-144.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 71c9e5183982..4aab52f8e41a 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -305,18 +305,18 @@ static inline void kernel_fpu_end(void) /* * Some instructions like VIA's padlock instructions generate a spurious * DNA fault but don't modify SSE registers. And these instructions - * get used from interrupt context aswell. To prevent these kernel instructions - * in interrupt context interact wrongly with other user/kernel fpu usage, we + * get used from interrupt context as well. To prevent these kernel instructions + * in interrupt context interacting wrongly with other user/kernel fpu usage, we * should use them only in the context of irq_ts_save/restore() */ static inline int irq_ts_save(void) { /* - * If we are in process context, we are ok to take a spurious DNA fault. - * Otherwise, doing clts() in process context require pre-emption to - * be disabled or some heavy lifting like kernel_fpu_begin() + * If in process context and not atomic, we can take a spurious DNA fault. + * Otherwise, doing clts() in process context requires disabling preemption + * or some heavy lifting like kernel_fpu_begin() */ - if (!in_interrupt()) + if (!in_atomic()) return 0; if (read_cr0() & X86_CR0_TS) { -- cgit v1.2.3 From d510d6cc653bc4b3094ea73afe12600d0ab445b3 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 25 Feb 2009 17:22:28 +0800 Subject: KVM: Enable MSI-X for KVM assigned device This patch finally enable MSI-X. What we need for MSI-X: 1. Intercept one page in MMIO region of device. So that we can get guest desired MSI-X table and set up the real one. Now this have been done by guest, and transfer to kernel using ioctl KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY. 2. Information for incoming interrupt. Now one device can have more than one interrupt, and they are all handled by one workqueue structure. So we need to identify them. The previous patch enable gsi_msg_pending_bitmap get this done. 3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X message address/data. We used same entry number for the host and guest here, so that it's easy to find the correlated guest gsi. What we lack for now: 1. The PCI spec said nothing can existed with MSI-X table in the same page of MMIO region, except pending bits. The patch ignore pending bits as the first step (so they are always 0 - no pending). 2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch didn't support this, and Linux also don't work in this way. 3. The patch didn't implement MSI-X mask all and mask single entry. I would implement the former in driver/pci/msi.c later. And for single entry, userspace should have reposibility to handle it. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index dc3f6cf11704..125be8b19568 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -16,6 +16,7 @@ #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI #define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_MSIX /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -- cgit v1.2.3 From e1035715ef8d3171e29f9c6aee6f40d57b3fead5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 5 Mar 2009 16:34:59 +0200 Subject: KVM: change the way how lowest priority vcpu is calculated The new way does not require additional loop over vcpus to calculate the one with lowest priority as one is chosen during delivery bitmap construction. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f0faf58044ff..46276273a1a1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -286,6 +286,7 @@ struct kvm_vcpu_arch { u64 shadow_efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ + int32_t apic_arb_prio; int mp_state; int sipi_vector; u64 ia32_misc_enable_msr; @@ -400,7 +401,6 @@ struct kvm_arch{ struct hlist_head irq_ack_notifier_list; int vapics_in_nmi_mode; - int round_robin_prev_vcpu; unsigned int tss_addr; struct page *apic_access_page; -- cgit v1.2.3 From 78646121e9a2fcf7977cc15966420e572a450bc3 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 23 Mar 2009 12:12:11 +0200 Subject: KVM: Fix interrupt unhalting a vcpu when it shouldn't kvm_vcpu_block() unhalts vpu on an interrupt/timer without checking if interrupt window is actually opened. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 46276273a1a1..8351c4d00ac0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -521,7 +521,7 @@ struct kvm_x86_ops { void (*inject_pending_irq)(struct kvm_vcpu *vcpu); void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, struct kvm_run *run); - + int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); int (*get_mt_mask_shift)(void); -- cgit v1.2.3 From 82725b20e22fb85377f61a16f6d0d5cfc28b45d3 Mon Sep 17 00:00:00 2001 From: "Dong, Eddie" Date: Mon, 30 Mar 2009 16:21:08 +0800 Subject: KVM: MMU: Emulate #PF error code of reserved bits violation Detect, indicate, and propagate page faults where reserved bits are set. Take care to handle the different paging modes, each of which has different sets of reserved bits. [avi: fix pte reserved bits for efer.nxe=0] Signed-off-by: Eddie Dong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8351c4d00ac0..548b97d284d3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -261,6 +261,7 @@ struct kvm_mmu { union kvm_mmu_page_role base_role; u64 *pae_root; + u64 rsvd_bits_mask[2][4]; }; struct kvm_vcpu_arch { @@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); #endif /* _ASM_X86_KVM_HOST_H */ -- cgit v1.2.3 From 9645bb56b31a1b70ab9e470387b5264cafc04aa9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 31 Mar 2009 11:31:54 +0300 Subject: KVM: MMU: Use different shadows when EFER.NXE changes A pte that is shadowed when the guest EFER.NXE=1 is not valid when EFER.NXE=0; if bit 63 is set, the pte should cause a fault, and since the shadow EFER always has NX enabled, this won't happen. Fix by using a different shadow page table for different EFER.NXE bits. This allows vcpus to run correctly with different values of EFER.NXE, and for transitions on this bit to be handled correctly without requiring a full flush. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 548b97d284d3..3fc46238476c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -185,6 +185,7 @@ union kvm_mmu_page_role { unsigned access:3; unsigned invalid:1; unsigned cr4_pge:1; + unsigned nxe:1; }; }; -- cgit v1.2.3 From 64a7ec066813443440bfc9f60a9e76a47cfa6b2b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 30 Mar 2009 16:03:29 +0300 Subject: KVM: Fix unneeded instruction skipping during task switching. There is no need to skip instruction if the reason for a task switch is a task gate in IDT and access to it is caused by an external even. The problem is currently solved only for VMX since there is no reliable way to skip an instruction in SVM. We should emulate it instead. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/svm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 82ada75f3ebf..85574b7c1bc1 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EVTINJ_VALID_ERR (1 << 11) #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI -- cgit v1.2.3 From c2d0ee46e6e633a3c23ecbcb9b03ad731906cd79 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sun, 5 Apr 2009 14:54:47 -0300 Subject: KVM: MMU: remove global page optimization logic Complexity to fix it not worthwhile the gains, as discussed in http://article.gmane.org/gmane.comp.emulators.kvm.devel/28649. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3fc46238476c..0e3a7c6e522c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -213,7 +213,6 @@ struct kvm_mmu_page { int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; - bool global; unsigned int unsync_children; union { u64 *parent_pte; /* !multimapped */ @@ -395,7 +394,6 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; - struct list_head oos_global_pages; struct iommu_domain *iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; @@ -425,7 +423,6 @@ struct kvm_vm_stat { u32 mmu_recycled; u32 mmu_cache_miss; u32 mmu_unsync; - u32 mmu_unsync_global; u32 remote_tlb_flush; u32 lpages; }; @@ -640,7 +637,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From ba8afb6b0a2c7e06da760ffe5d078245058619b5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 12 Apr 2009 13:36:57 +0300 Subject: KVM: x86 emulator: Add new mode of instruction emulation: skip In the new mode instruction is decoded, but not executed. The EIP is moved to point after the instruction. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0e3a7c6e522c..cb306cff2b49 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -562,6 +562,7 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) +#define EMULTYPE_SKIP (1 << 2) int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long cr2, u16 error_code, int emulation_type); void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); -- cgit v1.2.3 From 863e8e658ee9ac6e5931b295eb7428456e450a0f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 21 Apr 2009 17:44:57 +0300 Subject: KVM: VMX: Consolidate userspace and kernel interrupt injection for VMX Use the same callback to inject irq/nmi events no matter what irqchip is in use. Only from VMX for now. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cb306cff2b49..5edae351b5dc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -517,7 +517,7 @@ struct kvm_x86_ops { void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); bool (*exception_injected)(struct kvm_vcpu *vcpu); - void (*inject_pending_irq)(struct kvm_vcpu *vcpu); + void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run); void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, struct kvm_run *run); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 1cb948ae86f3d95cce58fac51d00766825f5f783 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 21 Apr 2009 17:45:02 +0300 Subject: KVM: Remove exception_injected() callback. It always return false for VMX/SVM now. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5edae351b5dc..ea3741edbec3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -516,7 +516,6 @@ struct kvm_x86_ops { void (*set_irq)(struct kvm_vcpu *vcpu, int vec); void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); - bool (*exception_injected)(struct kvm_vcpu *vcpu); void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run); void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, struct kvm_run *run); -- cgit v1.2.3 From 1d6ed0cb95a2f0839e1a31f1971dc37cd60c258a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 21 Apr 2009 17:45:03 +0300 Subject: KVM: Remove inject_pending_vectors() callback It is the same as inject_pending_irq() for VMX/SVM now. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ea3741edbec3..aa5a54eb4da4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -517,8 +517,6 @@ struct kvm_x86_ops { void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run); - void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, - struct kvm_run *run); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); -- cgit v1.2.3 From c4282df98ae0993983924c00ed76428a6609d68b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 21 Apr 2009 17:45:07 +0300 Subject: KVM: Get rid of arch.interrupt_window_open & arch.nmi_window_open They are recalculated before each use anyway. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index aa5a54eb4da4..53533ea17555 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -266,7 +266,6 @@ struct kvm_mmu { struct kvm_vcpu_arch { u64 host_tsc; - int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); /* @@ -360,7 +359,6 @@ struct kvm_vcpu_arch { bool nmi_pending; bool nmi_injected; - bool nmi_window_open; struct mtrr_state_type mtrr_state; u32 pat; -- cgit v1.2.3 From 95ba82731374eb1c2af4dd442526c4b314f0e8b6 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 21 Apr 2009 17:45:08 +0300 Subject: KVM: SVM: Add NMI injection support Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 53533ea17555..dd9ecd3de90d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -512,10 +512,15 @@ struct kvm_x86_ops { unsigned char *hypercall_addr); int (*get_irq)(struct kvm_vcpu *vcpu); void (*set_irq)(struct kvm_vcpu *vcpu, int vec); + void (*set_nmi)(struct kvm_vcpu *vcpu); void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); - void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); + int (*nmi_allowed)(struct kvm_vcpu *vcpu); + void (*enable_nmi_window)(struct kvm_vcpu *vcpu); + void (*enable_irq_window)(struct kvm_vcpu *vcpu); + void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); int (*get_mt_mask_shift)(void); @@ -763,6 +768,7 @@ enum { #define HF_GIF_MASK (1 << 0) #define HF_HIF_MASK (1 << 1) #define HF_VINTR_MASK (1 << 2) +#define HF_NMI_MASK (1 << 3) /* * Hardware virtualization extension instructions may fault if a -- cgit v1.2.3 From 14d0bc1f7c8226d5088e7182c3b53e0c7e91d1af Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 21 Apr 2009 17:45:11 +0300 Subject: KVM: Get rid of get_irq() callback It just returns pending IRQ vector from the queue for VMX/SVM. Get IRQ directly from the queue before migration and put it back after. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd9ecd3de90d..3e94d0513208 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -510,7 +510,6 @@ struct kvm_x86_ops { void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); - int (*get_irq)(struct kvm_vcpu *vcpu); void (*set_irq)(struct kvm_vcpu *vcpu, int vec); void (*set_nmi)(struct kvm_vcpu *vcpu); void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, -- cgit v1.2.3 From 4b12f0de33a64dfc624b2480f55b674f7fa23ef2 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 27 Apr 2009 20:35:42 +0800 Subject: KVM: Replace get_mt_mask_shift with get_mt_mask Shadow_mt_mask is out of date, now it have only been used as a flag to indicate if TDP enabled. Get rid of it and use tdp_enabled instead. Also put memory type logical in kvm_x86_ops->get_mt_mask(). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3e94d0513208..8a6f6b643dfe 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -522,7 +522,7 @@ struct kvm_x86_ops { void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); - int (*get_mt_mask_shift)(void); + u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -536,7 +536,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); @@ -550,6 +550,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, gpa_t addr, unsigned long *ret); +u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; -- cgit v1.2.3 From 522c68c4416de3cd3e11a9ff10d58e776a69ae1e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 27 Apr 2009 20:35:43 +0800 Subject: KVM: Enable snooping control for supported hardware Memory aliases with different memory type is a problem for guest. For the guest without assigned device, the memory type of guest memory would always been the same as host(WB); but for the assigned device, some part of memory may be used as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of host memory type then be a potential issue. Snooping control can guarantee the cache correctness of memory go through the DMA engine of VT-d. [avi: fix build on ia64] Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8a6f6b643dfe..253d8f669cf6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -393,6 +393,7 @@ struct kvm_arch{ struct list_head active_mmu_pages; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; + int iommu_flags; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; -- cgit v1.2.3 From d6a8c875f35a6e1b3fb3f21e93eabb183b1f39ee Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 20 Apr 2009 18:10:07 +0200 Subject: KVM: Drop request_nmi from stats The stats entry request_nmi is no longer used as the related user space interface was dropped. So clean it up. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 253d8f669cf6..ab7de4a11955 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -441,7 +441,6 @@ struct kvm_vcpu_stat { u32 halt_exits; u32 halt_wakeup; u32 request_irq_exits; - u32 request_nmi_exits; u32 irq_exits; u32 host_state_reload; u32 efer_reload; -- cgit v1.2.3 From 069ebaa4644521e8e80b6849ace4dee53f93f55e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 10 May 2009 14:37:56 +0300 Subject: x86: Add cpu features MOVBE and POPCNT Add cpu feature bit support for the MOVBE and POPCNT instructions. Signed-off-by: Avi Kivity --- arch/x86/include/asm/cpufeature.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb83b1c397aa..9c63bf37ad53 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -115,6 +115,8 @@ #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ +#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ #define X86_FEATURE_AES (4*32+25) /* AES instructions */ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ -- cgit v1.2.3 From 2809f5d2c4cfad171167b131bb2a21ab65eba40f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 12 May 2009 16:21:05 -0400 Subject: KVM: Replace ->drop_interrupt_shadow() by ->set_interrupt_shadow() This patch replaces drop_interrupt_shadow with the more general set_interrupt_shadow, that can either drop or raise it, depending on its parameter. It also adds ->get_interrupt_shadow() for future use. Signed-off-by: Glauber Costa CC: H. Peter Anvin CC: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/include/asm/kvm_x86_emulate.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ab7de4a11955..16d1481aa231 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -508,6 +508,8 @@ struct kvm_x86_ops { void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); + u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); void (*set_irq)(struct kvm_vcpu *vcpu, int vec); @@ -519,7 +521,6 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 6a159732881a..be40d6e2b6bb 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -143,6 +143,9 @@ struct decode_cache { struct fetch_cache fetch; }; +#define X86_SHADOW_INT_MOV_SS 1 +#define X86_SHADOW_INT_STI 2 + struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; -- cgit v1.2.3 From 310b5d306c1aee7ebe32f702c0e33e7988d50646 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 12 May 2009 16:21:06 -0400 Subject: KVM: Deal with interrupt shadow state for emulated instructions We currently unblock shadow interrupt state when we skip an instruction, but failing to do so when we actually emulate one. This blocks interrupts in key instruction blocks, in particular sti; hlt; sequences If the instruction emulated is an sti, we have to block shadow interrupts. The same goes for mov ss. pop ss also needs it, but we don't currently emulate it. Without this patch, I cannot boot gpxe option roms at vmx machines. This is described at https://bugzilla.redhat.com/show_bug.cgi?id=494469 Signed-off-by: Glauber Costa CC: H. Peter Anvin CC: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_x86_emulate.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index be40d6e2b6bb..b7ed2c423116 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -155,6 +155,9 @@ struct x86_emulate_ctxt { int mode; u32 cs_base; + /* interruptibility state, as a result of execution of STI or MOV SS */ + int interruptibility; + /* decode cache */ struct decode_cache decode; }; -- cgit v1.2.3 From 923c61bbc6413e87e5f6b0bae663d202a8de0537 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 11 May 2009 13:35:48 +0300 Subject: KVM: Remove irq_pending bitmap Only one interrupt vector can be injected from userspace irqchip at any given time so no need to store it in a bitmap. Put it into interrupt queue directly. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 16d1481aa231..977a785a9d75 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -266,8 +266,6 @@ struct kvm_mmu { struct kvm_vcpu_arch { u64 host_tsc; - unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ - DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); /* * rip and regs accesses must go through * kvm_{register,rip}_{read,write} functions. -- cgit v1.2.3 From 66fd3f7f901f29a557a473af595bf11b270b9ac2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 11 May 2009 13:35:50 +0300 Subject: KVM: Do not re-execute INTn instruction. Re-inject event instead. This is what Intel suggest. Also use correct instruction length when re-injecting soft fault/interrupt. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 977a785a9d75..1d6c3f757cb6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -319,6 +319,8 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; + u8 event_exit_inst_len; + struct kvm_queued_exception { bool pending; bool has_error_code; @@ -328,6 +330,7 @@ struct kvm_vcpu_arch { struct kvm_queued_interrupt { bool pending; + bool soft; u8 nr; } interrupt; @@ -510,7 +513,7 @@ struct kvm_x86_ops { u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); - void (*set_irq)(struct kvm_vcpu *vcpu, int vec); + void (*set_irq)(struct kvm_vcpu *vcpu); void (*set_nmi)(struct kvm_vcpu *vcpu); void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); -- cgit v1.2.3 From 44c11430b52cbad0a467bc023a802d122dfd285c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 11 May 2009 13:35:52 +0300 Subject: KVM: inject NMI after IRET from a previous NMI, not before. If NMI is received during handling of another NMI it should be injected immediately after IRET from previous NMI handler, but SVM intercept IRET before instruction execution so we can't inject pending NMI at this point and there is not way to request exit when NMI window opens. This patch fix SVM code to open NMI window after IRET by single stepping over IRET instruction. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1d6c3f757cb6..82129437e873 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -358,6 +358,7 @@ struct kvm_vcpu_arch { unsigned int time_offset; struct page *time_page; + bool singlestep; /* guest is single stepped by KVM */ bool nmi_pending; bool nmi_injected; @@ -771,6 +772,7 @@ enum { #define HF_HIF_MASK (1 << 1) #define HF_VINTR_MASK (1 << 2) #define HF_NMI_MASK (1 << 3) +#define HF_IRET_MASK (1 << 4) /* * Hardware virtualization extension instructions may fault if a -- cgit v1.2.3 From 56b237e31abf4d6dbc6e2a0214049b9a23be4883 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Thu, 4 Jun 2009 15:04:08 -0700 Subject: KVM: VMX: Rename rmode.active to rmode.vm86_active That way the interpretation of rmode.active becomes more clear with unrestricted guest code. Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 82129437e873..eabdc1cfab5c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -335,7 +335,7 @@ struct kvm_vcpu_arch { } interrupt; struct { - int active; + int vm86_active; u8 save_iopl; struct kvm_save_segment { u16 selector; -- cgit v1.2.3 From a0861c02a981c943573478ea13b29b1fb958ee5b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 8 Jun 2009 17:37:09 +0800 Subject: KVM: Add VT-x machine check support VT-x needs an explicit MC vector intercept to handle machine checks in the hyper visor. It also has a special option to catch machine checks that happen during VT entry. Do these interceptions and forward them to the Linux machine check handler. Make it always look like user space is interrupted because the machine check handler treats kernel/user space differently. Thanks to Jiang Yunhong for help and testing. Cc: stable@kernel.org Signed-off-by: Andi Kleen Signed-off-by: Huang Ying Signed-off-by: Avi Kivity --- arch/x86/include/asm/vmx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 498f944010b9..11be5ad2e0e9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -247,6 +247,7 @@ enum vmcs_field { #define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_EPT_VIOLATION 48 -- cgit v1.2.3 From 6bc1096d7ab3621b3ffcf06616d1f4e0325d903d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 May 2009 12:12:01 +0200 Subject: x86: MSR: add a struct representation of an MSR Add a struct representing a 64bit MSR pair consisting of a low and high register part and convert msr_info to use it. Also, rename msr-on-cpu.c to msr.c. Side note: Put the cpumask.h include in __KERNEL__ space thus fixing an allmodconfig build failure in the headers_check target. CC: H. Peter Anvin Signed-off-by: Borislav Petkov --- arch/x86/include/asm/msr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 638bf6241807..5e1213216e2b 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -12,6 +12,17 @@ #include #include +#include + +struct msr { + union { + struct { + u32 l; + u32 h; + }; + u64 q; + }; +}; static inline unsigned long long native_read_tscp(unsigned int *aux) { -- cgit v1.2.3 From b034c19f9f61c8b6f2435aa2e77f52348ebde767 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 May 2009 13:52:19 +0200 Subject: x86: MSR: add methods for writing of an MSR on several CPUs Provide for concurrent MSR writes on all the CPUs in the cpumask. Also, add a temporary workaround for smp_call_function_many which skips the CPU we're executing on. Bart: zero out rv struct which is allocated on stack. CC: H. Peter Anvin Signed-off-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- arch/x86/include/asm/msr.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 5e1213216e2b..22603764e7db 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -227,6 +227,8 @@ do { \ #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); +void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); #else /* CONFIG_SMP */ @@ -240,6 +242,16 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); return 0; } +static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, + struct msr *msrs) +{ + rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); +} +static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, + struct msr *msrs) +{ + wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); +} static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { -- cgit v1.2.3 From 5b0ed5263cb089500052f8c1ab6e0706bebf0d83 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Jun 2009 12:42:49 +0100 Subject: x86: fix ktermios-termio conversion The legacy TCSETA{,W,F} ioctls failed to set the termio->c_line field on x86. This adds a missing get_user. The same ioctls also fail to report faulting user pointers, which we keep ignoring. Signed-off-by: Arnd Bergmann Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- arch/x86/include/asm/termios.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h index f72956331c49..c4ee8056baca 100644 --- a/arch/x86/include/asm/termios.h +++ b/arch/x86/include/asm/termios.h @@ -67,6 +67,7 @@ static inline int user_termio_to_kernel_termios(struct ktermios *termios, SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); + get_user(termios->c_line, &termio->c_line); return copy_from_user(termios->c_cc, termio->c_cc, NCC); } -- cgit v1.2.3