From b3ac891b67bd4b1fc728d1c784cad1212dea433d Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:22 +0100 Subject: x86: Add support for lock prefix in alternatives The current lock prefix UP/SMP alternative code doesn't allow LOCK_PREFIX to be used in alternatives code. This patch solves the problem by adding a new LOCK_PREFIX_ALTERNATIVE_PATCH macro that only records the lock prefix location but does not emit the prefix. The user of this macro can then start any alternative sequence with "lock" and have it UP/SMP patched. To make this work, the UP/SMP alternative code is changed to do the lock/DS prefix switching only if the byte actually contains a lock or DS prefix. Thus, if an alternative without the "lock" is selected, it will now do nothing instead of clobbering the code. Changes in v2: - Naming change - Change label to not conflict with alternatives Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-2-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 3b5b828767b6..55fee12cea6d 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -28,12 +28,14 @@ */ #ifdef CONFIG_SMP -#define LOCK_PREFIX \ +#define LOCK_PREFIX_HERE \ ".section .smp_locks,\"a\"\n" \ _ASM_ALIGN "\n" \ - _ASM_PTR "661f\n" /* address */ \ + _ASM_PTR "671f\n" /* address */ \ ".previous\n" \ - "661:\n\tlock; " + "671:" + +#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " #else /* ! CONFIG_SMP */ #define LOCK_PREFIX "" -- cgit v1.2.3 From 9c76b38476b18c45f97098a10b0176b321eba3ea Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:23 +0100 Subject: x86-32: Allow UP/SMP lock replacement in cmpxchg64 Use the functionality just introduced in the previous patch: mark the lock prefixes in cmpxchg64 alternatives for UP removal. Changes in v2: - Naming change Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-3-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cmpxchg_32.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ffb9bb6b6c37..8859e12dd3cf 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -271,7 +271,8 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); __typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __old = (o); \ __typeof__(*(ptr)) __new = (n); \ - alternative_io("call cmpxchg8b_emu", \ + alternative_io(LOCK_PREFIX_HERE \ + "call cmpxchg8b_emu", \ "lock; cmpxchg8b (%%esi)" , \ X86_FEATURE_CX8, \ "=A" (__ret), \ -- cgit v1.2.3 From a7e926abc3adfbd2e5e20d2b46177adb4e313915 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:25 +0100 Subject: x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/atomic64_32.h | 278 ++++++++++++++++++++++++++++--------- 1 file changed, 211 insertions(+), 67 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 03027bf28de5..2a934aa19a43 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -14,109 +14,193 @@ typedef struct { #define ATOMIC64_INIT(val) { (val) } -extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); +#ifdef CONFIG_X86_CMPXCHG64 +#define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" +#else +#define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) +#endif + +#define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) + +/** + * atomic64_cmpxchg - cmpxchg atomic64 variable + * @p: pointer to type atomic64_t + * @o: expected value + * @n: new value + * + * Atomically sets @v to @n if it was equal to @o and returns + * the old value. + */ + +static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) +{ + return cmpxchg64(&v->counter, o, n); +} /** * atomic64_xchg - xchg atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign + * @v: pointer to type atomic64_t + * @n: value to assign * - * Atomically xchgs the value of @ptr to @new_val and returns + * Atomically xchgs the value of @v to @n and returns * the old value. */ -extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); +static inline long long atomic64_xchg(atomic64_t *v, long long n) +{ + long long o; + unsigned high = (unsigned)(n >> 32); + unsigned low = (unsigned)n; + asm volatile(ATOMIC64_ALTERNATIVE(xchg) + : "=A" (o), "+b" (low), "+c" (high) + : "S" (v) + : "memory" + ); + return o; +} /** * atomic64_set - set atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign + * @v: pointer to type atomic64_t + * @n: value to assign * - * Atomically sets the value of @ptr to @new_val. + * Atomically sets the value of @v to @n. */ -extern void atomic64_set(atomic64_t *ptr, u64 new_val); +static inline void atomic64_set(atomic64_t *v, long long i) +{ + unsigned high = (unsigned)(i >> 32); + unsigned low = (unsigned)i; + asm volatile(ATOMIC64_ALTERNATIVE(set) + : "+b" (low), "+c" (high) + : "S" (v) + : "eax", "edx", "memory" + ); +} /** * atomic64_read - read atomic64 variable - * @ptr: pointer to type atomic64_t + * @v: pointer to type atomic64_t * - * Atomically reads the value of @ptr and returns it. + * Atomically reads the value of @v and returns it. */ -static inline u64 atomic64_read(atomic64_t *ptr) +static inline long long atomic64_read(atomic64_t *v) { - u64 res; - - /* - * Note, we inline this atomic64_t primitive because - * it only clobbers EAX/EDX and leaves the others - * untouched. We also (somewhat subtly) rely on the - * fact that cmpxchg8b returns the current 64-bit value - * of the memory location we are touching: - */ - asm volatile( - "mov %%ebx, %%eax\n\t" - "mov %%ecx, %%edx\n\t" - LOCK_PREFIX "cmpxchg8b %1\n" - : "=&A" (res) - : "m" (*ptr) - ); - - return res; -} - -extern u64 atomic64_read(atomic64_t *ptr); + long long r; + asm volatile(ATOMIC64_ALTERNATIVE(read) + : "=A" (r), "+c" (v) + : : "memory" + ); + return r; + } /** * atomic64_add_return - add and return - * @delta: integer value to add - * @ptr: pointer to type atomic64_t + * @i: integer value to add + * @v: pointer to type atomic64_t * - * Atomically adds @delta to @ptr and returns @delta + *@ptr + * Atomically adds @i to @v and returns @i + *@v */ -extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE(add_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} /* * Other variants with different arithmetic operators: */ -extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); -extern u64 atomic64_inc_return(atomic64_t *ptr); -extern u64 atomic64_dec_return(atomic64_t *ptr); +static inline long long atomic64_sub_return(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE(sub_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} + +static inline long long atomic64_inc_return(atomic64_t *v) +{ + long long a; + asm volatile(ATOMIC64_ALTERNATIVE(inc_return) + : "=A" (a) + : "S" (v) + : "memory", "ecx" + ); + return a; +} + +static inline long long atomic64_dec_return(atomic64_t *v) +{ + long long a; + asm volatile(ATOMIC64_ALTERNATIVE(dec_return) + : "=A" (a) + : "S" (v) + : "memory", "ecx" + ); + return a; +} /** * atomic64_add - add integer to atomic64 variable - * @delta: integer value to add - * @ptr: pointer to type atomic64_t + * @i: integer value to add + * @v: pointer to type atomic64_t * - * Atomically adds @delta to @ptr. + * Atomically adds @i to @v. */ -extern void atomic64_add(u64 delta, atomic64_t *ptr); +static inline long long atomic64_add(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} /** * atomic64_sub - subtract the atomic64 variable - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t + * @i: integer value to subtract + * @v: pointer to type atomic64_t * - * Atomically subtracts @delta from @ptr. + * Atomically subtracts @i from @v. */ -extern void atomic64_sub(u64 delta, atomic64_t *ptr); +static inline long long atomic64_sub(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} /** * atomic64_sub_and_test - subtract value from variable and test result - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr and returns + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ -extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); +static inline int atomic64_sub_and_test(long long i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} /** * atomic64_inc - increment atomic64 variable - * @ptr: pointer to type atomic64_t + * @v: pointer to type atomic64_t * - * Atomically increments @ptr by 1. + * Atomically increments @v by 1. */ -extern void atomic64_inc(atomic64_t *ptr); +static inline void atomic64_inc(atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) + : : "S" (v) + : "memory", "eax", "ecx", "edx" + ); +} /** * atomic64_dec - decrement atomic64 variable @@ -124,37 +208,97 @@ extern void atomic64_inc(atomic64_t *ptr); * * Atomically decrements @ptr by 1. */ -extern void atomic64_dec(atomic64_t *ptr); +static inline void atomic64_dec(atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) + : : "S" (v) + : "memory", "eax", "ecx", "edx" + ); +} /** * atomic64_dec_and_test - decrement and test - * @ptr: pointer to type atomic64_t + * @v: pointer to type atomic64_t * - * Atomically decrements @ptr by 1 and + * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -extern int atomic64_dec_and_test(atomic64_t *ptr); +static inline int atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} /** * atomic64_inc_and_test - increment and test - * @ptr: pointer to type atomic64_t + * @v: pointer to type atomic64_t * - * Atomically increments @ptr by 1 + * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ -extern int atomic64_inc_and_test(atomic64_t *ptr); +static inline int atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} /** * atomic64_add_negative - add and test if negative - * @delta: integer value to add - * @ptr: pointer to type atomic64_t + * @i: integer value to add + * @v: pointer to type atomic64_t * - * Atomically adds @delta to @ptr and returns true + * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ -extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); +static inline int atomic64_add_negative(long long i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + unsigned low = (unsigned)u; + unsigned high = (unsigned)(u >> 32); + asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" + : "+A" (a), "+c" (v), "+S" (low), "+D" (high) + : : "memory"); + return (int)a; +} + + +static inline int atomic64_inc_not_zero(atomic64_t *v) +{ + int r; + asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) + : "=a" (r) + : "S" (v) + : "ecx", "edx", "memory" + ); + return r; +} + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long r; + asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) + : "=A" (r) + : "S" (v) + : "ecx", "memory" + ); + return r; +} + +#undef ATOMIC64_ALTERNATIVE +#undef ATOMIC64_ALTERNATIVE_ #endif /* _ASM_X86_ATOMIC64_32_H */ -- cgit v1.2.3 From d7f6de1e9c4a12e11ba7186c70f0f40caa76f590 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 26 Feb 2010 12:22:41 +0100 Subject: x86: Implement atomic[64]_dec_if_positive() Add support for atomic_dec_if_positive(), and atomic64_dec_if_positive() for x86-64. atomic64_dec_if_positive() for x86-32 was already implemented in a previous patch. Signed-off-by: Luca Barbieri LKML-Reference: <1267183361-20775-2-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/atomic.h | 23 +++++++++++++++++++++++ arch/x86/include/asm/atomic64_64.h | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 8f8217b9bdac..706c69492c14 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -246,6 +246,29 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +/* + * atomic_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline int atomic_dec_if_positive(atomic_t *v) +{ + int c, old, dec; + c = atomic_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + /** * atomic_inc_short - increment of a short integer * @v: pointer to type int diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 51c5b4056929..4d6e2cd6c88c 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -221,4 +221,27 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + long c, old, dec; + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + #endif /* _ASM_X86_ATOMIC64_64_H */ -- cgit v1.2.3 From ced918eb748ce30b3aace549fd17540e40ffdca0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 Feb 2010 16:47:10 +0000 Subject: i8253: Convert i8253_lock to raw_spinlock i8253_lock needs to be a real spinlock in preempt-rt, i.e. it can not be converted to a sleeping lock. Convert it to raw_spinlock and fix up all users. Signed-off-by: Thomas Gleixner Acked-by: Ralf Baechle Acked-by: Dmitry Torokhov Acked-by: Takashi Iwai Cc: Jens Axboe LKML-Reference: <20100217163751.030764372@linutronix.de> --- arch/x86/include/asm/i8253.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index 1edbf89680fd..fc1f579fb965 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h @@ -6,7 +6,7 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 -extern spinlock_t i8253_lock; +extern raw_spinlock_t i8253_lock; extern struct clock_event_device *global_clock_event; -- cgit v1.2.3 From cbb9d729f3433c9c2660b01dc52e6deb89488886 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 15 Jan 2010 14:41:15 +0100 Subject: x86/amd-iommu: Make iommu_map_page and alloc_pte aware of page sizes This patch changes the old map_size parameter of alloc_pte to a page_size parameter which can be used more easily to alloc a pte for intermediate page sizes. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ba19ad4c47d0..5e8da56755dd 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -172,6 +172,34 @@ (~((1ULL << (12 + ((lvl) * 9))) - 1))) #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) +/* + * Returns the page table level to use for a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_LEVEL(pagesize) \ + ((__ffs(pagesize) - 12) / 9) +/* + * Returns the number of ptes to use for a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_PTE_COUNT(pagesize) \ + (1ULL << ((__ffs(pagesize) - 12) % 9)) + +/* + * Aligns a given io-virtual address to a given page size + * Pagesize is expected to be a power-of-two + */ +#define PAGE_SIZE_ALIGN(address, pagesize) \ + ((address) & ~((pagesize) - 1)) +/* + * Creates an IOMMU PTE for an address an a given pagesize + * The PTE has no permission bits set + * Pagesize is expected to be a power-of-two larger than 4096 + */ +#define PAGE_SIZE_PTE(address, pagesize) \ + (((address) | ((pagesize) - 1)) & \ + (~(pagesize >> 1)) & PM_ADDR_MASK) + #define IOMMU_PTE_P (1ULL << 0) #define IOMMU_PTE_TV (1ULL << 1) #define IOMMU_PTE_U (1ULL << 59) -- cgit v1.2.3 From 24cd772315c19e4d9409d0d21367ec1ebab3149f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Jan 2010 17:27:39 +0100 Subject: x86/amd-iommu: Make iommu_unmap_page and fetch_pte aware of page sizes This patch extends the functionality of iommu_unmap_page and fetch_pte to support arbitrary page sizes. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 5e8da56755dd..b150c74e0d48 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -200,6 +200,12 @@ (((address) | ((pagesize) - 1)) & \ (~(pagesize >> 1)) & PM_ADDR_MASK) +/* + * Takes a PTE value with mode=0x07 and returns the page size it maps + */ +#define PTE_PAGE_SIZE(pte) \ + (1ULL << (1 + ffz(((pte) | 0xfffULL)))) + #define IOMMU_PTE_P (1ULL << 0) #define IOMMU_PTE_TV (1ULL << 1) #define IOMMU_PTE_U (1ULL << 59) -- cgit v1.2.3 From ef21f683a045a79b6aa86ad81e5fdfc0d5ddd250 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Mar 2010 13:12:23 +0100 Subject: perf, x86: use LBR for PEBS IP+1 fixup Use the LBR to fix up the PEBS IP+1 issue. As said, PEBS reports the next instruction, here we use the LBR to find the last branch and from that construct the actual IP. If the IP matches the LBR-TO, we use LBR-FROM, otherwise we use the LBR-TO address as the beginning of the last basic block and decode forward. Once we find a match to the current IP, we use the previous location. This patch introduces a new ABI element: PERF_RECORD_MISC_EXACT, which conveys that the reported IP (PERF_SAMPLE_IP) is the exact instruction that caused the event (barring CPU errata). The fixup can fail due to various reasons: 1) LBR contains invalid data (quite possible) 2) part of the basic block got paged out 3) the reported IP isn't part of the basic block (see 1) Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: "Zhang, Yanmin" Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.619375431@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a885a7..a9038c951619 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void); #define PERF_EVENT_INDEX_OFFSET 0 +/* + * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. + * This flag is otherwise unused and ABI specified to be 0, so nobody should + * care what we do with it. + */ +#define PERF_EFLAGS_EXACT (1UL << 3) + +#define perf_misc_flags(regs) \ +({ int misc = 0; \ + if (user_mode(regs)) \ + misc |= PERF_RECORD_MISC_USER; \ + else \ + misc |= PERF_RECORD_MISC_KERNEL; \ + if (regs->flags & PERF_EFLAGS_EXACT) \ + misc |= PERF_RECORD_MISC_EXACT; \ + misc; }) + +#define perf_instruction_pointer(regs) ((regs)->ip) + #else static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } -- cgit v1.2.3 From 30a813ae035d3e220a89609adce878e045c49547 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Mar 2010 13:49:21 +0100 Subject: x86: Move MAX_INSN_SIZE into asm/insn.h Since there's now two users for this, place it in a common header. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.923774125@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/insn.h | 2 ++ arch/x86/include/asm/kprobes.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 96c2e0ad04ca..88c765e16410 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -68,6 +68,8 @@ struct insn { const insn_byte_t *next_byte; }; +#define MAX_INSN_SIZE 16 + #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) #define X86_MODRM_RM(modrm) ((modrm) & 0x07) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4ffa345a8ccb..547882539157 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -24,6 +24,7 @@ #include #include #include +#include #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t; #define RELATIVEJUMP_SIZE 5 #define RELATIVECALL_OPCODE 0xe8 #define RELATIVE_ADDR_SIZE 4 -#define MAX_INSN_SIZE 16 #define MAX_STACK_SIZE 64 #define MIN_STACK_SIZE(ADDR) \ (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ -- cgit v1.2.3 From 6f4edd69e40aba4f45bf9558c1e9a950d79ab4e4 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 10 Mar 2010 14:44:58 -0600 Subject: x86, UV: Clean up UV headers for MMR definitions Update UV mmr definitions header file. Eliminate definitions no longer needed. Move 2 definitions from tlb_uv.c into the header file where they belong. Signed-off-by: Jack Steiner LKML-Reference: <20100310204458.GA28835@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_mmrs.h | 528 ++++++-------------------------------- 1 file changed, 79 insertions(+), 449 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 2cae46c7c8a2..b2f2d2e05cec 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -1,4 +1,3 @@ - /* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -14,14 +13,26 @@ #define UV_MMR_ENABLE (1UL << 63) +/* ========================================================================= */ +/* UVH_BAU_DATA_BROADCAST */ +/* ========================================================================= */ +#define UVH_BAU_DATA_BROADCAST 0x61688UL +#define UVH_BAU_DATA_BROADCAST_32 0x0440 + +#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 +#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL + +union uvh_bau_data_broadcast_u { + unsigned long v; + struct uvh_bau_data_broadcast_s { + unsigned long enable : 1; /* RW */ + unsigned long rsvd_1_63: 63; /* */ + } s; +}; + /* ========================================================================= */ /* UVH_BAU_DATA_CONFIG */ /* ========================================================================= */ -#define UVH_LB_BAU_MISC_CONTROL 0x320170UL -#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL -/* 1011 timebase 7 (168millisec) * 3 ticks -> 500ms */ #define UVH_BAU_DATA_CONFIG 0x61680UL #define UVH_BAU_DATA_CONFIG_32 0x0438 @@ -603,6 +614,68 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 +/* ========================================================================= */ +/* UVH_LB_BAU_MISC_CONTROL */ +/* ========================================================================= */ +#define UVH_LB_BAU_MISC_CONTROL 0x320170UL +#define UVH_LB_BAU_MISC_CONTROL_32 0x00a10 + +#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 +#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL +#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 +#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL +#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 +#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL +#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 +#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL +#define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 +#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL +#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 +#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL +#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 +#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL +#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 +#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL +#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 +#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL +#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 +#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL +#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 +#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL +#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 +#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL +#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 +#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL +#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 +#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL + +union uvh_lb_bau_misc_control_u { + unsigned long v; + struct uvh_lb_bau_misc_control_s { + unsigned long rejection_delay : 8; /* RW */ + unsigned long apic_mode : 1; /* RW */ + unsigned long force_broadcast : 1; /* RW */ + unsigned long force_lock_nop : 1; /* RW */ + unsigned long csi_agent_presence_vector : 3; /* RW */ + unsigned long descriptor_fetch_mode : 1; /* RW */ + unsigned long enable_intd_soft_ack_mode : 1; /* RW */ + unsigned long intd_soft_ack_timeout_period : 4; /* RW */ + unsigned long enable_dual_mapping_mode : 1; /* RW */ + unsigned long vga_io_port_decode_enable : 1; /* RW */ + unsigned long vga_io_port_16_bit_decode : 1; /* RW */ + unsigned long suppress_dest_registration : 1; /* RW */ + unsigned long programmed_initial_priority : 3; /* RW */ + unsigned long use_incoming_priority : 1; /* RW */ + unsigned long enable_programmed_initial_priority : 1; /* RW */ + unsigned long rsvd_29_47 : 19; /* */ + unsigned long fun : 16; /* RW */ + } s; +}; + /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ /* ========================================================================= */ @@ -680,334 +753,6 @@ union uvh_lb_bau_sb_descriptor_base_u { } s; }; -/* ========================================================================= */ -/* UVH_LB_MCAST_AOERR0_RPT_ENABLE */ -/* ========================================================================= */ -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL - -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_SHFT 22 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_MASK 0x0000000000400000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_SHFT 23 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_MASK 0x0000000000800000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 24 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000001000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 25 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000002000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 26 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000004000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 27 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000008000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 28 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000010000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 29 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000020000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 30 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000040000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 31 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000080000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 32 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000100000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 33 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000200000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 34 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000400000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 35 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000800000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 36 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000001000000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 37 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000002000000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 38 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000004000000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 39 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000008000000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 40 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000010000000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 41 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000020000000000UL -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 42 -#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000040000000000UL - -union uvh_lb_mcast_aoerr0_rpt_enable_u { - unsigned long v; - struct uvh_lb_mcast_aoerr0_rpt_enable_s { - unsigned long mcast_obese_msg : 1; /* RW */ - unsigned long mcast_data_sb_err : 1; /* RW */ - unsigned long mcast_nack_buff_parity : 1; /* RW */ - unsigned long mcast_timeout : 1; /* RW */ - unsigned long mcast_inactive_reply : 1; /* RW */ - unsigned long mcast_upgrade_error : 1; /* RW */ - unsigned long mcast_reg_count_underflow : 1; /* RW */ - unsigned long mcast_rep_obese_msg : 1; /* RW */ - unsigned long ucache_req_runt_msg : 1; /* RW */ - unsigned long ucache_req_obese_msg : 1; /* RW */ - unsigned long ucache_req_data_sb_err : 1; /* RW */ - unsigned long ucache_rep_runt_msg : 1; /* RW */ - unsigned long ucache_rep_obese_msg : 1; /* RW */ - unsigned long ucache_rep_data_sb_err : 1; /* RW */ - unsigned long ucache_rep_command_err : 1; /* RW */ - unsigned long ucache_pend_timeout : 1; /* RW */ - unsigned long macc_req_runt_msg : 1; /* RW */ - unsigned long macc_req_obese_msg : 1; /* RW */ - unsigned long macc_req_data_sb_err : 1; /* RW */ - unsigned long macc_rep_runt_msg : 1; /* RW */ - unsigned long macc_rep_obese_msg : 1; /* RW */ - unsigned long macc_rep_data_sb_err : 1; /* RW */ - unsigned long macc_amo_timeout : 1; /* RW */ - unsigned long macc_put_timeout : 1; /* RW */ - unsigned long macc_spurious_event : 1; /* RW */ - unsigned long ioh_destination_table_parity : 1; /* RW */ - unsigned long get_had_error_reply : 1; /* RW */ - unsigned long get_timeout : 1; /* RW */ - unsigned long lock_manager_had_error_reply : 1; /* RW */ - unsigned long put_had_error_reply : 1; /* RW */ - unsigned long put_timeout : 1; /* RW */ - unsigned long sb_activation_overrun : 1; /* RW */ - unsigned long completed_gb_activation_had_error_reply : 1; /* RW */ - unsigned long completed_gb_activation_timeout : 1; /* RW */ - unsigned long descriptor_buffer_0_parity : 1; /* RW */ - unsigned long descriptor_buffer_1_parity : 1; /* RW */ - unsigned long socket_destination_table_parity : 1; /* RW */ - unsigned long bau_reply_payload_corruption : 1; /* RW */ - unsigned long io_port_destination_table_parity : 1; /* RW */ - unsigned long intd_soft_ack_timeout : 1; /* RW */ - unsigned long int_rep_obese_msg : 1; /* RW */ - unsigned long int_rep_command_err : 1; /* RW */ - unsigned long int_timeout : 1; /* RW */ - unsigned long rsvd_43_63 : 21; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_LOCAL_INT0_CONFIG */ -/* ========================================================================= */ -#define UVH_LOCAL_INT0_CONFIG 0x61000UL - -#define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0 -#define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8 -#define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11 -#define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12 -#define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_LOCAL_INT0_CONFIG_P_SHFT 13 -#define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_LOCAL_INT0_CONFIG_T_SHFT 15 -#define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_LOCAL_INT0_CONFIG_M_SHFT 16 -#define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32 -#define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_local_int0_config_u { - unsigned long v; - struct uvh_local_int0_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_LOCAL_INT0_ENABLE */ -/* ========================================================================= */ -#define UVH_LOCAL_INT0_ENABLE 0x65000UL - -#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0 -#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL -#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1 -#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL -#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2 -#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL -#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3 -#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL -#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4 -#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL -#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5 -#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL -#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6 -#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL -#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7 -#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL -#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8 -#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL -#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9 -#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL -#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10 -#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL -#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11 -#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL -#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12 -#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL -#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13 -#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL -#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14 -#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL -#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15 -#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL -#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16 -#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL -#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17 -#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL -#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18 -#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL -#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19 -#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL -#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20 -#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL -#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21 -#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL -#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22 -#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38 -#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL -#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39 -#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL -#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40 -#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL -#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41 -#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL -#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42 -#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL -#define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43 -#define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL -#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44 -#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL - -union uvh_local_int0_enable_u { - unsigned long v; - struct uvh_local_int0_enable_s { - unsigned long lb_hcerr : 1; /* RW */ - unsigned long gr0_hcerr : 1; /* RW */ - unsigned long gr1_hcerr : 1; /* RW */ - unsigned long lh_hcerr : 1; /* RW */ - unsigned long rh_hcerr : 1; /* RW */ - unsigned long xn_hcerr : 1; /* RW */ - unsigned long si_hcerr : 1; /* RW */ - unsigned long lb_aoerr0 : 1; /* RW */ - unsigned long gr0_aoerr0 : 1; /* RW */ - unsigned long gr1_aoerr0 : 1; /* RW */ - unsigned long lh_aoerr0 : 1; /* RW */ - unsigned long rh_aoerr0 : 1; /* RW */ - unsigned long xn_aoerr0 : 1; /* RW */ - unsigned long si_aoerr0 : 1; /* RW */ - unsigned long lb_aoerr1 : 1; /* RW */ - unsigned long gr0_aoerr1 : 1; /* RW */ - unsigned long gr1_aoerr1 : 1; /* RW */ - unsigned long lh_aoerr1 : 1; /* RW */ - unsigned long rh_aoerr1 : 1; /* RW */ - unsigned long xn_aoerr1 : 1; /* RW */ - unsigned long si_aoerr1 : 1; /* RW */ - unsigned long rh_vpi_int : 1; /* RW */ - unsigned long system_shutdown_int : 1; /* RW */ - unsigned long lb_irq_int_0 : 1; /* RW */ - unsigned long lb_irq_int_1 : 1; /* RW */ - unsigned long lb_irq_int_2 : 1; /* RW */ - unsigned long lb_irq_int_3 : 1; /* RW */ - unsigned long lb_irq_int_4 : 1; /* RW */ - unsigned long lb_irq_int_5 : 1; /* RW */ - unsigned long lb_irq_int_6 : 1; /* RW */ - unsigned long lb_irq_int_7 : 1; /* RW */ - unsigned long lb_irq_int_8 : 1; /* RW */ - unsigned long lb_irq_int_9 : 1; /* RW */ - unsigned long lb_irq_int_10 : 1; /* RW */ - unsigned long lb_irq_int_11 : 1; /* RW */ - unsigned long lb_irq_int_12 : 1; /* RW */ - unsigned long lb_irq_int_13 : 1; /* RW */ - unsigned long lb_irq_int_14 : 1; /* RW */ - unsigned long lb_irq_int_15 : 1; /* RW */ - unsigned long l1_nmi_int : 1; /* RW */ - unsigned long stop_clock : 1; /* RW */ - unsigned long asic_to_l1 : 1; /* RW */ - unsigned long l1_to_asic : 1; /* RW */ - unsigned long ltc_int : 1; /* RW */ - unsigned long la_seq_trigger : 1; /* RW */ - unsigned long rsvd_45_63 : 19; /* */ - } s; -}; - /* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ @@ -1111,26 +856,6 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { } s; }; -/* ========================================================================= */ -/* UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR */ -/* ========================================================================= */ -#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL - -#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -union uvh_rh_gam_cfg_overlay_config_mmr_u { - unsigned long v; - struct uvh_rh_gam_cfg_overlay_config_mmr_s { - unsigned long rsvd_0_25: 26; /* */ - unsigned long base : 20; /* RW */ - unsigned long rsvd_46_62: 17; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - /* ========================================================================= */ /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ @@ -1262,101 +987,6 @@ union uvh_rtc1_int_config_u { } s; }; -/* ========================================================================= */ -/* UVH_RTC2_INT_CONFIG */ -/* ========================================================================= */ -#define UVH_RTC2_INT_CONFIG 0x61600UL - -#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 -#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_RTC2_INT_CONFIG_DM_SHFT 8 -#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 -#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 -#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_RTC2_INT_CONFIG_P_SHFT 13 -#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_RTC2_INT_CONFIG_T_SHFT 15 -#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_RTC2_INT_CONFIG_M_SHFT 16 -#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 -#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_rtc2_int_config_u { - unsigned long v; - struct uvh_rtc2_int_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RTC3_INT_CONFIG */ -/* ========================================================================= */ -#define UVH_RTC3_INT_CONFIG 0x61640UL - -#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 -#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_RTC3_INT_CONFIG_DM_SHFT 8 -#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 -#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 -#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_RTC3_INT_CONFIG_P_SHFT 13 -#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_RTC3_INT_CONFIG_T_SHFT 15 -#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_RTC3_INT_CONFIG_M_SHFT 16 -#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 -#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_rtc3_int_config_u { - unsigned long v; - struct uvh_rtc3_int_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RTC_INC_RATIO */ -/* ========================================================================= */ -#define UVH_RTC_INC_RATIO 0x350000UL - -#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 -#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL -#define UVH_RTC_INC_RATIO_RATIO_SHFT 20 -#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL - -union uvh_rtc_inc_ratio_u { - unsigned long v; - struct uvh_rtc_inc_ratio_s { - unsigned long fraction : 20; /* RW */ - unsigned long ratio : 3; /* RW */ - unsigned long rsvd_23_63: 41; /* */ - } s; -}; - /* ========================================================================= */ /* UVH_SI_ADDR_MAP_CONFIG */ /* ========================================================================= */ -- cgit v1.2.3 From a072738e04f0eb26370e39ec679e9a0d65e49aea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 11 Mar 2010 19:54:39 +0300 Subject: perf, x86: Implement initial P4 PMU driver The netburst PMU is way different from the "architectural perfomance monitoring" specification that current CPUs use. P4 uses a tuple of ESCR+CCCR+COUNTER MSR registers to handle perfomance monitoring events. A few implementational details: 1) We need a separate x86_pmu::hw_config helper in struct x86_pmu since register bit-fields are quite different from P6, Core and later cpu series. 2) For the same reason is a x86_pmu::schedule_events helper introduced. 3) hw_perf_event::config consists of packed ESCR+CCCR values. It's allowed since in reality both registers only use a half of their size. Of course before making a real write into a particular MSR we need to unpack the value and extend it to a proper size. 4) The tuple of packed ESCR+CCCR in hw_perf_event::config doesn't describe the memory address of ESCR MSR register so that we need to keep a mapping between these tuples used and available ESCR (various P4 events may use same ESCRs but not simultaneously), for this sake every active event has a per-cpu map of hw_perf_event::idx <--> ESCR addresses. 5) Since hw_perf_event::idx is an offset to counter/control register we need to lift X86_PMC_MAX_GENERIC up, otherwise kernel strips it down to 8 registers and event armed may never be turned off (ie the bit in active_mask is set but the loop never reaches this index to check), thanks to Peter Zijlstra Restrictions: - No cascaded counters support (do we ever need them?) - No dependent events support (so PERF_COUNT_HW_INSTRUCTIONS doesn't work for now) - There are events with same counters which can't work simultaneously (need to use intersected ones due to broken counter 1) - No PERF_COUNT_HW_CACHE_ events yet Todo: - Implement dependent events - Need proper hashing for event opcodes (no linear search, good for debugging stage but not in real loads) - Some events counted during a clock cycle -- need to set threshold for them and count every clock cycle just to get summary statistics (ie to behave the same way as other PMUs do) - Need to swicth to use event_constraints - To support RAW events we need to encode a global list of P4 events into p4_templates - Cache events need to be added Event support status matrix: Event status ----------------------------- cycles works cache-references works cache-misses works branch-misses works bus-cycles partially (does not work on 64bit cpu with HT enabled) instruction doesnt work (needs dependent event [mop tagging]) branches doesnt work Signed-off-by: Cyrill Gorcunov Signed-off-by: Lin Ming Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Robert Richter Cc: Frederic Weisbecker LKML-Reference: <20100311165439.GB5129@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 2 +- arch/x86/include/asm/perf_event_p4.h | 707 +++++++++++++++++++++++++++++++++++ 2 files changed, 708 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/perf_event_p4.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index a9038c951619..124dddd598f3 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -5,7 +5,7 @@ * Performance event hw details: */ -#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_GENERIC 32 #define X86_PMC_MAX_FIXED 3 #define X86_PMC_IDX_GENERIC 0 diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 000000000000..829f4711645f --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h @@ -0,0 +1,707 @@ +/* + * Netburst Perfomance Events (P4, old Xeon) + */ + +#ifndef PERF_EVENT_P4_H +#define PERF_EVENT_P4_H + +#include +#include + +/* + * NetBurst has perfomance MSRs shared between + * threads if HT is turned on, ie for both logical + * processors (mem: in turn in Atom with HT support + * perf-MSRs are not shared and every thread has its + * own perf-MSRs set) + */ +#define ARCH_P4_TOTAL_ESCR (46) +#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ +#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) +#define ARCH_P4_MAX_CCCR (18) +#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) + +#define P4_EVNTSEL_EVENT_MASK 0x7e000000U +#define P4_EVNTSEL_EVENT_SHIFT 25 +#define P4_EVNTSEL_EVENTMASK_MASK 0x01fffe00U +#define P4_EVNTSEL_EVENTMASK_SHIFT 9 +#define P4_EVNTSEL_TAG_MASK 0x000001e0U +#define P4_EVNTSEL_TAG_SHIFT 5 +#define P4_EVNTSEL_TAG_ENABLE 0x00000010U +#define P4_EVNTSEL_T0_OS 0x00000008U +#define P4_EVNTSEL_T0_USR 0x00000004U +#define P4_EVNTSEL_T1_OS 0x00000002U +#define P4_EVNTSEL_T1_USR 0x00000001U + +/* Non HT mask */ +#define P4_EVNTSEL_MASK \ + (P4_EVNTSEL_EVENT_MASK | \ + P4_EVNTSEL_EVENTMASK_MASK | \ + P4_EVNTSEL_TAG_MASK | \ + P4_EVNTSEL_TAG_ENABLE | \ + P4_EVNTSEL_T0_OS | \ + P4_EVNTSEL_T0_USR) + +/* HT mask */ +#define P4_EVNTSEL_MASK_HT \ + (P4_EVNTSEL_MASK | \ + P4_EVNTSEL_T1_OS | \ + P4_EVNTSEL_T1_USR) + +#define P4_CCCR_OVF 0x80000000U +#define P4_CCCR_CASCADE 0x40000000U +#define P4_CCCR_OVF_PMI_T0 0x04000000U +#define P4_CCCR_OVF_PMI_T1 0x08000000U +#define P4_CCCR_FORCE_OVF 0x02000000U +#define P4_CCCR_EDGE 0x01000000U +#define P4_CCCR_THRESHOLD_MASK 0x00f00000U +#define P4_CCCR_THRESHOLD_SHIFT 20 +#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) +#define P4_CCCR_COMPLEMENT 0x00080000U +#define P4_CCCR_COMPARE 0x00040000U +#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U +#define P4_CCCR_ESCR_SELECT_SHIFT 13 +#define P4_CCCR_ENABLE 0x00001000U +#define P4_CCCR_THREAD_SINGLE 0x00010000U +#define P4_CCCR_THREAD_BOTH 0x00020000U +#define P4_CCCR_THREAD_ANY 0x00030000U + +/* Non HT mask */ +#define P4_CCCR_MASK \ + (P4_CCCR_OVF | \ + P4_CCCR_CASCADE | \ + P4_CCCR_OVF_PMI_T0 | \ + P4_CCCR_FORCE_OVF | \ + P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE | \ + P4_CCCR_ESCR_SELECT_MASK | \ + P4_CCCR_ENABLE) + +/* HT mask */ +#define P4_CCCR_MASK_HT \ + (P4_CCCR_MASK | \ + P4_CCCR_THREAD_ANY) + +/* + * format is 32 bit: ee ss aa aa + * where + * ee - 8 bit event + * ss - 8 bit selector + * aa aa - 16 bits reserved for tags/attributes + */ +#define P4_EVENT_PACK(event, selector) (((event) << 24) | ((selector) << 16)) +#define P4_EVENT_UNPACK_EVENT(packed) (((packed) >> 24) & 0xff) +#define P4_EVENT_UNPACK_SELECTOR(packed) (((packed) >> 16) & 0xff) +#define P4_EVENT_PACK_ATTR(attr) ((attr)) +#define P4_EVENT_UNPACK_ATTR(packed) ((packed) & 0xffff) +#define P4_MAKE_EVENT_ATTR(class, name, bit) class##_##name = (1 << bit) +#define P4_EVENT_ATTR(class, name) class##_##name +#define P4_EVENT_ATTR_STR(class, name) __stringify(class##_##name) + +/* + * config field is 64bit width and consists of + * HT << 63 | ESCR << 32 | CCCR + * where HT is HyperThreading bit (since ESCR + * has it reserved we may use it for own purpose) + * + * note that this is NOT the addresses of respective + * ESCR and CCCR but rather an only packed value should + * be unpacked and written to a proper addresses + * + * the base idea is to pack as much info as + * possible + */ +#define p4_config_pack_escr(v) (((u64)(v)) << 32) +#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) +#define p4_config_unpack_escr(v) (((u64)(v)) >> 32) +#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) + +#define p4_config_unpack_emask(v) \ + ({ \ + u32 t = p4_config_unpack_escr((v)); \ + t &= P4_EVNTSEL_EVENTMASK_MASK; \ + t >>= P4_EVNTSEL_EVENTMASK_SHIFT; \ + t; \ + }) + +#define P4_CONFIG_HT_SHIFT 63 +#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) + +static inline u32 p4_config_unpack_opcode(u64 config) +{ + u32 e, s; + + /* + * we don't care about HT presence here since + * event opcode doesn't depend on it + */ + e = (p4_config_unpack_escr(config) & P4_EVNTSEL_EVENT_MASK) >> P4_EVNTSEL_EVENT_SHIFT; + s = (p4_config_unpack_cccr(config) & P4_CCCR_ESCR_SELECT_MASK) >> P4_CCCR_ESCR_SELECT_SHIFT; + + return P4_EVENT_PACK(e, s); +} + +static inline bool p4_is_event_cascaded(u64 config) +{ + u32 cccr = p4_config_unpack_cccr(config); + return !!(cccr & P4_CCCR_CASCADE); +} + +static inline int p4_ht_config_thread(u64 config) +{ + return !!(config & P4_CONFIG_HT); +} + +static inline u64 p4_set_ht_bit(u64 config) +{ + return config | P4_CONFIG_HT; +} + +static inline u64 p4_clear_ht_bit(u64 config) +{ + return config & ~P4_CONFIG_HT; +} + +static inline int p4_ht_active(void) +{ +#ifdef CONFIG_SMP + return smp_num_siblings > 1; +#endif + return 0; +} + +static inline int p4_ht_thread(int cpu) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); +#endif + return 0; +} + +static inline int p4_should_swap_ts(u64 config, int cpu) +{ + return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); +} + +static inline u32 p4_default_cccr_conf(int cpu) +{ + /* + * Note that P4_CCCR_THREAD_ANY is "required" on + * non-HT machines (on HT machines we count TS events + * regardless the state of second logical processor + */ + u32 cccr = P4_CCCR_THREAD_ANY; + + if (!p4_ht_thread(cpu)) + cccr |= P4_CCCR_OVF_PMI_T0; + else + cccr |= P4_CCCR_OVF_PMI_T1; + + return cccr; +} + +static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) +{ + u32 escr = 0; + + if (!p4_ht_thread(cpu)) { + if (!exclude_os) + escr |= P4_EVNTSEL_T0_OS; + if (!exclude_usr) + escr |= P4_EVNTSEL_T0_USR; + } else { + if (!exclude_os) + escr |= P4_EVNTSEL_T1_OS; + if (!exclude_usr) + escr |= P4_EVNTSEL_T1_USR; + } + + return escr; +} + +/* + * Comments below the event represent ESCR restriction + * for this event and counter index per ESCR + * + * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early + * processor builds (family 0FH, models 01H-02H). These MSRs + * are not available on later versions, so that we don't use + * them completely + * + * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly + * working so that we should not use this CCCR and respective + * counter as result + */ +#define P4_TC_DELIVER_MODE P4_EVENT_PACK(0x01, 0x01) + /* + * MSR_P4_TC_ESCR0: 4, 5 + * MSR_P4_TC_ESCR1: 6, 7 + */ + +#define P4_BPU_FETCH_REQUEST P4_EVENT_PACK(0x03, 0x00) + /* + * MSR_P4_BPU_ESCR0: 0, 1 + * MSR_P4_BPU_ESCR1: 2, 3 + */ + +#define P4_ITLB_REFERENCE P4_EVENT_PACK(0x18, 0x03) + /* + * MSR_P4_ITLB_ESCR0: 0, 1 + * MSR_P4_ITLB_ESCR1: 2, 3 + */ + +#define P4_MEMORY_CANCEL P4_EVENT_PACK(0x02, 0x05) + /* + * MSR_P4_DAC_ESCR0: 8, 9 + * MSR_P4_DAC_ESCR1: 10, 11 + */ + +#define P4_MEMORY_COMPLETE P4_EVENT_PACK(0x08, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_LOAD_PORT_REPLAY P4_EVENT_PACK(0x04, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_STORE_PORT_REPLAY P4_EVENT_PACK(0x05, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_MOB_LOAD_REPLAY P4_EVENT_PACK(0x03, 0x02) + /* + * MSR_P4_MOB_ESCR0: 0, 1 + * MSR_P4_MOB_ESCR1: 2, 3 + */ + +#define P4_PAGE_WALK_TYPE P4_EVENT_PACK(0x01, 0x04) + /* + * MSR_P4_PMH_ESCR0: 0, 1 + * MSR_P4_PMH_ESCR1: 2, 3 + */ + +#define P4_BSQ_CACHE_REFERENCE P4_EVENT_PACK(0x0c, 0x07) + /* + * MSR_P4_BSU_ESCR0: 0, 1 + * MSR_P4_BSU_ESCR1: 2, 3 + */ + +#define P4_IOQ_ALLOCATION P4_EVENT_PACK(0x03, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_IOQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x1a, 0x06) + /* + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_FSB_DATA_ACTIVITY P4_EVENT_PACK(0x17, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_BSQ_ALLOCATION P4_EVENT_PACK(0x05, 0x07) + /* + * MSR_P4_BSU_ESCR0: 0, 1 + */ + +#define P4_BSQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x06, 0x07) + /* + * MSR_P4_BSU_ESCR1: 2, 3 + */ + +#define P4_SSE_INPUT_ASSIST P4_EVENT_PACK(0x34, 0x01) + /* + * MSR_P4_FIRM_ESCR: 8, 9 + * MSR_P4_FIRM_ESCR: 10, 11 + */ + +#define P4_PACKED_SP_UOP P4_EVENT_PACK(0x08, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_PACKED_DP_UOP P4_EVENT_PACK(0x0c, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_SCALAR_SP_UOP P4_EVENT_PACK(0x0a, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_SCALAR_DP_UOP P4_EVENT_PACK(0x0e, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_64BIT_MMX_UOP P4_EVENT_PACK(0x02, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_128BIT_MMX_UOP P4_EVENT_PACK(0x1a, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_X87_FP_UOP P4_EVENT_PACK(0x04, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_TC_MISC P4_EVENT_PACK(0x06, 0x01) + /* + * MSR_P4_TC_ESCR0: 4, 5 + * MSR_P4_TC_ESCR1: 6, 7 + */ + +#define P4_GLOBAL_POWER_EVENTS P4_EVENT_PACK(0x13, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_TC_MS_XFER P4_EVENT_PACK(0x05, 0x00) + /* + * MSR_P4_MS_ESCR0: 4, 5 + * MSR_P4_MS_ESCR1: 6, 7 + */ + +#define P4_UOP_QUEUE_WRITES P4_EVENT_PACK(0x09, 0x00) + /* + * MSR_P4_MS_ESCR0: 4, 5 + * MSR_P4_MS_ESCR1: 6, 7 + */ + +#define P4_RETIRED_MISPRED_BRANCH_TYPE P4_EVENT_PACK(0x05, 0x02) + /* + * MSR_P4_TBPU_ESCR0: 4, 5 + * MSR_P4_TBPU_ESCR0: 6, 7 + */ + +#define P4_RETIRED_BRANCH_TYPE P4_EVENT_PACK(0x04, 0x02) + /* + * MSR_P4_TBPU_ESCR0: 4, 5 + * MSR_P4_TBPU_ESCR0: 6, 7 + */ + +#define P4_RESOURCE_STALL P4_EVENT_PACK(0x01, 0x01) + /* + * MSR_P4_ALF_ESCR0: 12, 13, 16 + * MSR_P4_ALF_ESCR1: 14, 15, 17 + */ + +#define P4_WC_BUFFER P4_EVENT_PACK(0x05, 0x05) + /* + * MSR_P4_DAC_ESCR0: 8, 9 + * MSR_P4_DAC_ESCR1: 10, 11 + */ + +#define P4_B2B_CYCLES P4_EVENT_PACK(0x16, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_BNR P4_EVENT_PACK(0x08, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_SNOOP P4_EVENT_PACK(0x06, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_RESPONSE P4_EVENT_PACK(0x04, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_FRONT_END_EVENT P4_EVENT_PACK(0x08, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_EXECUTION_EVENT P4_EVENT_PACK(0x0c, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_REPLAY_EVENT P4_EVENT_PACK(0x09, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_INSTR_RETIRED P4_EVENT_PACK(0x02, 0x04) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_UOP_TYPE P4_EVENT_PACK(0x02, 0x02) + /* + * MSR_P4_RAT_ESCR0: 12, 13, 16 + * MSR_P4_RAT_ESCR1: 14, 15, 17 + */ + +#define P4_BRANCH_RETIRED P4_EVENT_PACK(0x06, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_MISPRED_BRANCH_RETIRED P4_EVENT_PACK(0x03, 0x04) + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + +#define P4_X87_ASSIST P4_EVENT_PACK(0x03, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_MACHINE_CLEAR P4_EVENT_PACK(0x02, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_INSTR_COMPLETED P4_EVENT_PACK(0x07, 0x04) + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + +/* + * a caller should use P4_EVENT_ATTR helper to + * pick the attribute needed, for example + * + * P4_EVENT_ATTR(P4_TC_DELIVER_MODE, DD) + */ +enum P4_EVENTS_ATTR { + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DD, 0), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DB, 1), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DI, 2), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BD, 3), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BB, 4), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BI, 5), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, ID, 6), + + P4_MAKE_EVENT_ATTR(P4_BPU_FETCH_REQUEST, TCMISS, 0), + + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT, 0), + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, MISS, 1), + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT_UK, 2), + + P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, ST_RB_FULL, 2), + P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, 64K_CONF, 3), + + P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, LSC, 0), + P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, SSC, 1), + + P4_MAKE_EVENT_ATTR(P4_LOAD_PORT_REPLAY, SPLIT_LD, 1), + + P4_MAKE_EVENT_ATTR(P4_STORE_PORT_REPLAY, SPLIT_ST, 1), + + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STA, 1), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STD, 3), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), + + P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, DTMISS, 0), + P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, ITMISS, 1), + + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), + + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, DEFAULT, 0), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_READ, 5), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_WRITE, 6), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_UC, 7), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WC, 8), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WT, 9), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WP, 10), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WB, 11), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OWN, 13), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OTHER, 14), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, PREFETCH, 15), + + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OWN, 13), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OTHER, 14), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), + + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV, 0), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN, 1), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_DRV, 3), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OWN, 4), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), + + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE0, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE1, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN0, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN1, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_IO_TYPE, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE0, 11), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE1, 12), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE2, 13), + + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), + + P4_MAKE_EVENT_ATTR(P4_SSE_INPUT_ASSIST, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_PACKED_SP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_PACKED_DP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_SCALAR_SP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_SCALAR_DP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_64BIT_MMX_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_128BIT_MMX_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_X87_FP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_TC_MISC, FLUSH, 4), + + P4_MAKE_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING, 0), + + P4_MAKE_EVENT_ATTR(P4_TC_MS_XFER, CISC, 0), + + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_ROM, 2), + + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), + + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL, 2), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN, 3), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT, 4), + + P4_MAKE_EVENT_ATTR(P4_RESOURCE_STALL, SBFULL, 5), + + P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_EVICTS, 0), + P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_FULL_EVICTS, 1), + + P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS0, 0), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS1, 1), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS2, 2), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS3, 3), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS0, 4), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS1, 5), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS2, 6), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS3, 7), + + P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG, 0), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG, 1), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG, 2), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG, 3), + + P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS, 1), + P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES, 2), + + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNP, 0), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNM, 1), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTP, 2), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTM, 3), + + P4_MAKE_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS, 0), + + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSU, 0), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSO, 1), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAO, 2), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAU, 3), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, PREA, 4), + + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, CLEAR, 0), + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, MOCLEAR, 1), + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, SMCLEAR, 2), + + P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), +}; + +#endif /* PERF_EVENT_P4_H */ -- cgit v1.2.3 From e4495262826d1eabca3529fa6ac22394eb348132 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 15 Mar 2010 12:58:22 +0800 Subject: perf, x86: Enable not tagged retired instruction counting on P4s This should turn on instruction counting on P4s, which was missing in the first version of the new PMU driver. It's inaccurate for now, we still need dependant event to tag mops before we can count them precisely. The result is that the number of instruction may be lifted up. Signed-off-by: Cyrill Gorcunov Signed-off-by: Lin Ming Cc: Peter Zijlstra LKML-Reference: <1268629102.3355.11.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 829f4711645f..b47b9e9ac13f 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -324,8 +324,8 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_SSE_INPUT_ASSIST P4_EVENT_PACK(0x34, 0x01) /* - * MSR_P4_FIRM_ESCR: 8, 9 - * MSR_P4_FIRM_ESCR: 10, 11 + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 */ #define P4_PACKED_SP_UOP P4_EVENT_PACK(0x08, 0x01) @@ -462,8 +462,8 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_INSTR_RETIRED P4_EVENT_PACK(0x02, 0x04) /* - * MSR_P4_CRU_ESCR2: 12, 13, 16 - * MSR_P4_CRU_ESCR3: 14, 15, 17 + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 */ #define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) -- cgit v1.2.3 From 8ea7f544100844307072cae2f5fc108afdef999a Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 16 Mar 2010 10:12:36 +0800 Subject: x86, perf: Fix comments in Pentium-4 PMU definitions Reported-by: Cyrill Gorcunov Signed-off-by: Lin Ming Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1268705556.3379.8.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index b47b9e9ac13f..b842b3238e46 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -319,6 +319,7 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_BSQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x06, 0x07) /* + * NOTE: no ESCR name in docs, it's guessed * MSR_P4_BSU_ESCR1: 2, 3 */ @@ -468,8 +469,8 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) /* - * MSR_P4_CRU_ESCR2: 12, 13, 16 - * MSR_P4_CRU_ESCR3: 14, 15, 17 + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 */ #define P4_UOP_TYPE P4_EVENT_PACK(0x02, 0x02) -- cgit v1.2.3 From d674cd1963129b70bc5f631c51fb30fb73213fb2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 17 Mar 2010 13:37:00 +0300 Subject: x86, apic: Allow to use certain functions without APIC built-in support In case even if the kernel is configured so that no APIC support is built-in we still may allow to use certain apic functions as dummy calls. In particular we start using it in perf-events code. Note that this is not that same as NOOP apic driver (which is used if APIC support is present but no physical APIC is available), this is for the case when we don't have apic code compiled in at all. Signed-off-by: Cyrill Gorcunov Cc: H. Peter Anvin Cc: Yinghai Lu Cc: Yinghai Lu LKML-Reference: <20100317104356.011052632@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b4ac2cdcb64f..1fa03e04ae44 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -373,6 +373,7 @@ extern atomic_t init_deasserted; extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); #endif +#ifdef CONFIG_X86_LOCAL_APIC static inline u32 apic_read(u32 reg) { return apic->read(reg); @@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void) return apic->safe_wait_icr_idle(); } +#else /* CONFIG_X86_LOCAL_APIC */ + +static inline u32 apic_read(u32 reg) { return 0; } +static inline void apic_write(u32 reg, u32 val) { } +static inline u64 apic_icr_read(void) { return 0; } +static inline void apic_icr_write(u32 low, u32 high) { } +static inline void apic_wait_icr_idle(void) { } +static inline u32 safe_apic_wait_icr_idle(void) { return 0; } + +#endif /* CONFIG_X86_LOCAL_APIC */ static inline void ack_APIC_irq(void) { -#ifdef CONFIG_X86_LOCAL_APIC /* * ack_APIC_irq() actually gets compiled as a single instruction * ... yummie. @@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void) /* Docs say use 0 for future compatibility */ apic_write(APIC_EOI, 0); -#endif } static inline unsigned default_get_apic_id(unsigned long x) -- cgit v1.2.3 From f34edbc1cdb0f8f83d94e1d668dd6e41abf0defb Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 18 Mar 2010 18:33:07 +0800 Subject: perf, x86: Add a key to simplify template lookup in Pentium-4 PMU Currently, we use opcode(Event and Event-Selector) + emask to look up template in p4_templates. But cache events (L1-dcache-load-misses, LLC-load-misses, etc) use the same event(P4_REPLAY_EVENT) to do the counting, ie, they have the same opcode and emask. So we can not use current lookup mechanism to find the template for cache events. This patch introduces a "key", which is the index into p4_templates. The low 12 bits of CCCR are reserved, so we can hide the "key" in the low 12 bits of hwc->config. We extract the key from hwc->config and then quickly find the template. Signed-off-by: Lin Ming Reviewed-by: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <1268908387.13901.127.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index b842b3238e46..7d3406a2773c 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -65,6 +65,7 @@ #define P4_CCCR_THREAD_SINGLE 0x00010000U #define P4_CCCR_THREAD_BOTH 0x00020000U #define P4_CCCR_THREAD_ANY 0x00030000U +#define P4_CCCR_RESERVED 0x00000fffU /* Non HT mask */ #define P4_CCCR_MASK \ @@ -116,7 +117,7 @@ #define p4_config_pack_escr(v) (((u64)(v)) << 32) #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) -#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) +#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xfffff000ULL) #define p4_config_unpack_emask(v) \ ({ \ @@ -126,6 +127,8 @@ t; \ }) +#define p4_config_unpack_key(v) (((u64)(v)) & P4_CCCR_RESERVED) + #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) -- cgit v1.2.3 From cb7d6b5053e86598735d9af19930f5929f007b7f Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 18 Mar 2010 18:33:12 +0800 Subject: perf, x86: Add cache events for the Pentium-4 PMU Move the HT bit setting code from p4_pmu_event_map to p4_hw_config. So the cache events can get HT bit set correctly. Tested on my P4 desktop, below 6 cache events work: L1-dcache-load-misses LLC-load-misses dTLB-load-misses dTLB-store-misses iTLB-loads iTLB-load-misses Signed-off-by: Lin Ming Reviewed-by: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <1268908392.13901.128.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/include/asm/perf_event_p4.h | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1cd58cdbc03f..aef562c0a647 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -357,6 +357,8 @@ #define MSR_P4_U2L_ESCR0 0x000003b0 #define MSR_P4_U2L_ESCR1 0x000003b1 +#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 + /* Intel Core-based CPU performance counters */ #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 7d3406a2773c..871249cf4d2b 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -708,4 +708,14 @@ enum P4_EVENTS_ATTR { P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), }; +enum { + KEY_P4_L1D_OP_READ_RESULT_MISS, + KEY_P4_LL_OP_READ_RESULT_MISS, + KEY_P4_DTLB_OP_READ_RESULT_MISS, + KEY_P4_DTLB_OP_WRITE_RESULT_MISS, + KEY_P4_ITLB_OP_READ_RESULT_ACCESS, + KEY_P4_ITLB_OP_READ_RESULT_MISS, + KEY_P4_UOP_TYPE, +}; + #endif /* PERF_EVENT_P4_H */ -- cgit v1.2.3 From 9c8c6bad3137112d2c7bf3d215b736ee4215fa74 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 19 Mar 2010 00:12:56 +0300 Subject: x86, perf: Fix few cosmetic dabs for P4 pmu (comments and constantify) - A few ESCR have escaped fixing at previous attempt. - p4_escr_map is read only, make it const. Nothing serious. Signed-off-by: Cyrill Gorcunov Cc: Lin Ming LKML-Reference: <20100318211256.GH5062@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 871249cf4d2b..2a1a57f71539 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -401,13 +401,13 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_RETIRED_MISPRED_BRANCH_TYPE P4_EVENT_PACK(0x05, 0x02) /* * MSR_P4_TBPU_ESCR0: 4, 5 - * MSR_P4_TBPU_ESCR0: 6, 7 + * MSR_P4_TBPU_ESCR1: 6, 7 */ #define P4_RETIRED_BRANCH_TYPE P4_EVENT_PACK(0x04, 0x02) /* * MSR_P4_TBPU_ESCR0: 4, 5 - * MSR_P4_TBPU_ESCR0: 6, 7 + * MSR_P4_TBPU_ESCR1: 6, 7 */ #define P4_RESOURCE_STALL P4_EVENT_PACK(0x01, 0x01) -- cgit v1.2.3 From 40b7e05e17eef31ff30fe08dfc2424ef653a792c Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Fri, 19 Mar 2010 15:28:58 +0800 Subject: perf, x86: Fix key indexing in Pentium-4 PMU Index 0-6 in p4_templates are reserved for common hardware events. So p4_templates is arranged as below: 0 - 6: common hardware events 7 - N: cache events N+1 - ...: other raw events Reported-by: Cyrill Gorcunov Signed-off-by: Lin Ming Acked-by: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <1268983738.13901.142.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 2a1a57f71539..facf96186b26 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -709,7 +709,7 @@ enum P4_EVENTS_ATTR { }; enum { - KEY_P4_L1D_OP_READ_RESULT_MISS, + KEY_P4_L1D_OP_READ_RESULT_MISS = PERF_COUNT_HW_MAX, KEY_P4_LL_OP_READ_RESULT_MISS, KEY_P4_DTLB_OP_READ_RESULT_MISS, KEY_P4_DTLB_OP_WRITE_RESULT_MISS, -- cgit v1.2.3 From 4bd96a7a8185755b091233b16034c7436cbf57af Mon Sep 17 00:00:00 2001 From: Shane Wang Date: Wed, 10 Mar 2010 14:36:10 +0800 Subject: x86, tboot: Add support for S3 memory integrity protection This patch adds support for S3 memory integrity protection within an Intel(R) TXT launched kernel, for all kernel and userspace memory. All RAM used by the kernel and userspace, as indicated by memory ranges of type E820_RAM and E820_RESERVED_KERN in the e820 table, will be integrity protected. The MAINTAINERS file is also updated to reflect the maintainers of the TXT-related code. All MACing is done in tboot, based on a complexity analysis and tradeoff. v3: Compared with v2, this patch adds a check of array size in tboot.c, and a note to specify which c/s of tboot supports this kind of MACing in intel_txt.txt. Signed-off-by: Shane Wang LKML-Reference: <4B973DDA.6050902@intel.com> Signed-off-by: Joseph Cihula Acked-by: Pavel Machek Acked-by: Rafael J. Wysocki Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/e820.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 0e22296790d3..ec8a52d14ab1 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -45,7 +45,12 @@ #define E820_NVS 4 #define E820_UNUSABLE 5 -/* reserved RAM used by kernel itself */ +/* + * reserved RAM used by kernel itself + * if CONFIG_INTEL_TXT is enabled, memory of this type will be + * included in the S3 integrity calculation and so should not include + * any memory that BIOS might alter over the S3 transition + */ #define E820_RESERVED_KERN 128 #ifndef __ASSEMBLY__ -- cgit v1.2.3 From d814f30105798b6677ecb73ed61d691ff96dada9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 24 Mar 2010 12:09:26 +0800 Subject: x86, perf: Add raw events support for the P4 PMU The adding of raw event support lead to complete code refactoring. I hope is became more readable then it was. The list of changes: 1) The 64bit config field is enough to hold all information we need to track event details. To achieve it we used *own* enum for events selection in ESCR register and map this key into proper value at moment of event enabling. For the same reason we use 12LSB bits in CCCR register -- to track which exactly cache trace event was requested. And we cear this bits at real 'write' moment. 2) There is no per-cpu area reserved for P4 PMU anymore. We don't need it. All is held by config. 3) Now we may use any available counter, ie we try to grab any possible counter. v2: - Lin Ming reported the lack of ESCR selector in CCCR for cache events v3: - Don't loose cache event codes at config unpacking procedure, we may need it one day so no obscure hack behind our back, better to clear reserved bits explicitly when needed (thanks Ming for pointing out) - Lin Ming fixed misplaced opcodes in cache events Signed-off-by: Cyrill Gorcunov Tested-by: Lin Ming Signed-off-by: Lin Ming Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <1269403766.3409.6.camel@minggr.sh.intel.com> [ v4: did a few whitespace fixlets ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 691 +++++++++++++++++++---------------- 1 file changed, 382 insertions(+), 309 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index facf96186b26..b05400a542ff 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -15,38 +15,40 @@ * perf-MSRs are not shared and every thread has its * own perf-MSRs set) */ -#define ARCH_P4_TOTAL_ESCR (46) -#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ -#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) -#define ARCH_P4_MAX_CCCR (18) -#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) - -#define P4_EVNTSEL_EVENT_MASK 0x7e000000U -#define P4_EVNTSEL_EVENT_SHIFT 25 -#define P4_EVNTSEL_EVENTMASK_MASK 0x01fffe00U -#define P4_EVNTSEL_EVENTMASK_SHIFT 9 -#define P4_EVNTSEL_TAG_MASK 0x000001e0U -#define P4_EVNTSEL_TAG_SHIFT 5 -#define P4_EVNTSEL_TAG_ENABLE 0x00000010U -#define P4_EVNTSEL_T0_OS 0x00000008U -#define P4_EVNTSEL_T0_USR 0x00000004U -#define P4_EVNTSEL_T1_OS 0x00000002U -#define P4_EVNTSEL_T1_USR 0x00000001U +#define ARCH_P4_TOTAL_ESCR (46) +#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ +#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) +#define ARCH_P4_MAX_CCCR (18) +#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) + +#define P4_ESCR_EVENT_MASK 0x7e000000U +#define P4_ESCR_EVENT_SHIFT 25 +#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U +#define P4_ESCR_EVENTMASK_SHIFT 9 +#define P4_ESCR_TAG_MASK 0x000001e0U +#define P4_ESCR_TAG_SHIFT 5 +#define P4_ESCR_TAG_ENABLE 0x00000010U +#define P4_ESCR_T0_OS 0x00000008U +#define P4_ESCR_T0_USR 0x00000004U +#define P4_ESCR_T1_OS 0x00000002U +#define P4_ESCR_T1_USR 0x00000001U + +#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) +#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) +#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) /* Non HT mask */ -#define P4_EVNTSEL_MASK \ - (P4_EVNTSEL_EVENT_MASK | \ - P4_EVNTSEL_EVENTMASK_MASK | \ - P4_EVNTSEL_TAG_MASK | \ - P4_EVNTSEL_TAG_ENABLE | \ - P4_EVNTSEL_T0_OS | \ - P4_EVNTSEL_T0_USR) +#define P4_ESCR_MASK \ + (P4_ESCR_EVENT_MASK | \ + P4_ESCR_EVENTMASK_MASK | \ + P4_ESCR_TAG_MASK | \ + P4_ESCR_TAG_ENABLE | \ + P4_ESCR_T0_OS | \ + P4_ESCR_T0_USR) /* HT mask */ -#define P4_EVNTSEL_MASK_HT \ - (P4_EVNTSEL_MASK | \ - P4_EVNTSEL_T1_OS | \ - P4_EVNTSEL_T1_USR) +#define P4_ESCR_MASK_HT \ + (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) #define P4_CCCR_OVF 0x80000000U #define P4_CCCR_CASCADE 0x40000000U @@ -56,7 +58,6 @@ #define P4_CCCR_EDGE 0x01000000U #define P4_CCCR_THRESHOLD_MASK 0x00f00000U #define P4_CCCR_THRESHOLD_SHIFT 20 -#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_COMPLEMENT 0x00080000U #define P4_CCCR_COMPARE 0x00040000U #define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U @@ -67,6 +68,13 @@ #define P4_CCCR_THREAD_ANY 0x00030000U #define P4_CCCR_RESERVED 0x00000fffU +#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) +#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) + +/* Custom bits in reerved CCCR area */ +#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU + + /* Non HT mask */ #define P4_CCCR_MASK \ (P4_CCCR_OVF | \ @@ -81,25 +89,11 @@ P4_CCCR_ENABLE) /* HT mask */ -#define P4_CCCR_MASK_HT \ - (P4_CCCR_MASK | \ - P4_CCCR_THREAD_ANY) +#define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY) -/* - * format is 32 bit: ee ss aa aa - * where - * ee - 8 bit event - * ss - 8 bit selector - * aa aa - 16 bits reserved for tags/attributes - */ -#define P4_EVENT_PACK(event, selector) (((event) << 24) | ((selector) << 16)) -#define P4_EVENT_UNPACK_EVENT(packed) (((packed) >> 24) & 0xff) -#define P4_EVENT_UNPACK_SELECTOR(packed) (((packed) >> 16) & 0xff) -#define P4_EVENT_PACK_ATTR(attr) ((attr)) -#define P4_EVENT_UNPACK_ATTR(packed) ((packed) & 0xffff) -#define P4_MAKE_EVENT_ATTR(class, name, bit) class##_##name = (1 << bit) -#define P4_EVENT_ATTR(class, name) class##_##name -#define P4_EVENT_ATTR_STR(class, name) __stringify(class##_##name) +#define P4_GEN_ESCR_EMASK(class, name, bit) \ + class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) +#define P4_ESCR_EMASK_BIT(class, name) class##__##name /* * config field is 64bit width and consists of @@ -117,35 +111,29 @@ #define p4_config_pack_escr(v) (((u64)(v)) << 32) #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) -#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xfffff000ULL) +#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) #define p4_config_unpack_emask(v) \ ({ \ u32 t = p4_config_unpack_escr((v)); \ - t &= P4_EVNTSEL_EVENTMASK_MASK; \ - t >>= P4_EVNTSEL_EVENTMASK_SHIFT; \ + t = t & P4_ESCR_EVENTMASK_MASK; \ + t = t >> P4_ESCR_EVENTMASK_SHIFT; \ + t; \ + }) + +#define p4_config_unpack_event(v) \ + ({ \ + u32 t = p4_config_unpack_escr((v)); \ + t = t & P4_ESCR_EVENT_MASK; \ + t = t >> P4_ESCR_EVENT_SHIFT; \ t; \ }) -#define p4_config_unpack_key(v) (((u64)(v)) & P4_CCCR_RESERVED) +#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) -static inline u32 p4_config_unpack_opcode(u64 config) -{ - u32 e, s; - - /* - * we don't care about HT presence here since - * event opcode doesn't depend on it - */ - e = (p4_config_unpack_escr(config) & P4_EVNTSEL_EVENT_MASK) >> P4_EVNTSEL_EVENT_SHIFT; - s = (p4_config_unpack_cccr(config) & P4_CCCR_ESCR_SELECT_MASK) >> P4_CCCR_ESCR_SELECT_SHIFT; - - return P4_EVENT_PACK(e, s); -} - static inline bool p4_is_event_cascaded(u64 config) { u32 cccr = p4_config_unpack_cccr(config); @@ -212,19 +200,73 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) if (!p4_ht_thread(cpu)) { if (!exclude_os) - escr |= P4_EVNTSEL_T0_OS; + escr |= P4_ESCR_T0_OS; if (!exclude_usr) - escr |= P4_EVNTSEL_T0_USR; + escr |= P4_ESCR_T0_USR; } else { if (!exclude_os) - escr |= P4_EVNTSEL_T1_OS; + escr |= P4_ESCR_T1_OS; if (!exclude_usr) - escr |= P4_EVNTSEL_T1_USR; + escr |= P4_ESCR_T1_USR; } return escr; } +enum P4_EVENTS { + P4_EVENT_TC_DELIVER_MODE, + P4_EVENT_BPU_FETCH_REQUEST, + P4_EVENT_ITLB_REFERENCE, + P4_EVENT_MEMORY_CANCEL, + P4_EVENT_MEMORY_COMPLETE, + P4_EVENT_LOAD_PORT_REPLAY, + P4_EVENT_STORE_PORT_REPLAY, + P4_EVENT_MOB_LOAD_REPLAY, + P4_EVENT_PAGE_WALK_TYPE, + P4_EVENT_BSQ_CACHE_REFERENCE, + P4_EVENT_IOQ_ALLOCATION, + P4_EVENT_IOQ_ACTIVE_ENTRIES, + P4_EVENT_FSB_DATA_ACTIVITY, + P4_EVENT_BSQ_ALLOCATION, + P4_EVENT_BSQ_ACTIVE_ENTRIES, + P4_EVENT_SSE_INPUT_ASSIST, + P4_EVENT_PACKED_SP_UOP, + P4_EVENT_PACKED_DP_UOP, + P4_EVENT_SCALAR_SP_UOP, + P4_EVENT_SCALAR_DP_UOP, + P4_EVENT_64BIT_MMX_UOP, + P4_EVENT_128BIT_MMX_UOP, + P4_EVENT_X87_FP_UOP, + P4_EVENT_TC_MISC, + P4_EVENT_GLOBAL_POWER_EVENTS, + P4_EVENT_TC_MS_XFER, + P4_EVENT_UOP_QUEUE_WRITES, + P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, + P4_EVENT_RETIRED_BRANCH_TYPE, + P4_EVENT_RESOURCE_STALL, + P4_EVENT_WC_BUFFER, + P4_EVENT_B2B_CYCLES, + P4_EVENT_BNR, + P4_EVENT_SNOOP, + P4_EVENT_RESPONSE, + P4_EVENT_FRONT_END_EVENT, + P4_EVENT_EXECUTION_EVENT, + P4_EVENT_REPLAY_EVENT, + P4_EVENT_INSTR_RETIRED, + P4_EVENT_UOPS_RETIRED, + P4_EVENT_UOP_TYPE, + P4_EVENT_BRANCH_RETIRED, + P4_EVENT_MISPRED_BRANCH_RETIRED, + P4_EVENT_X87_ASSIST, + P4_EVENT_MACHINE_CLEAR, + P4_EVENT_INSTR_COMPLETED, +}; + +#define P4_OPCODE(event) event##_OPCODE +#define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0) +#define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8) +#define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel) + /* * Comments below the event represent ESCR restriction * for this event and counter index per ESCR @@ -238,484 +280,515 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) * working so that we should not use this CCCR and respective * counter as result */ -#define P4_TC_DELIVER_MODE P4_EVENT_PACK(0x01, 0x01) +enum P4_EVENT_OPCODES { + P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01), /* * MSR_P4_TC_ESCR0: 4, 5 * MSR_P4_TC_ESCR1: 6, 7 */ -#define P4_BPU_FETCH_REQUEST P4_EVENT_PACK(0x03, 0x00) + P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00), /* * MSR_P4_BPU_ESCR0: 0, 1 * MSR_P4_BPU_ESCR1: 2, 3 */ -#define P4_ITLB_REFERENCE P4_EVENT_PACK(0x18, 0x03) + P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03), /* * MSR_P4_ITLB_ESCR0: 0, 1 * MSR_P4_ITLB_ESCR1: 2, 3 */ -#define P4_MEMORY_CANCEL P4_EVENT_PACK(0x02, 0x05) + P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05), /* * MSR_P4_DAC_ESCR0: 8, 9 * MSR_P4_DAC_ESCR1: 10, 11 */ -#define P4_MEMORY_COMPLETE P4_EVENT_PACK(0x08, 0x02) + P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02), /* * MSR_P4_SAAT_ESCR0: 8, 9 * MSR_P4_SAAT_ESCR1: 10, 11 */ -#define P4_LOAD_PORT_REPLAY P4_EVENT_PACK(0x04, 0x02) + P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02), /* * MSR_P4_SAAT_ESCR0: 8, 9 * MSR_P4_SAAT_ESCR1: 10, 11 */ -#define P4_STORE_PORT_REPLAY P4_EVENT_PACK(0x05, 0x02) + P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02), /* * MSR_P4_SAAT_ESCR0: 8, 9 * MSR_P4_SAAT_ESCR1: 10, 11 */ -#define P4_MOB_LOAD_REPLAY P4_EVENT_PACK(0x03, 0x02) + P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02), /* * MSR_P4_MOB_ESCR0: 0, 1 * MSR_P4_MOB_ESCR1: 2, 3 */ -#define P4_PAGE_WALK_TYPE P4_EVENT_PACK(0x01, 0x04) + P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04), /* * MSR_P4_PMH_ESCR0: 0, 1 * MSR_P4_PMH_ESCR1: 2, 3 */ -#define P4_BSQ_CACHE_REFERENCE P4_EVENT_PACK(0x0c, 0x07) + P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07), /* * MSR_P4_BSU_ESCR0: 0, 1 * MSR_P4_BSU_ESCR1: 2, 3 */ -#define P4_IOQ_ALLOCATION P4_EVENT_PACK(0x03, 0x06) + P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_IOQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x1a, 0x06) + P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06), /* * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_FSB_DATA_ACTIVITY P4_EVENT_PACK(0x17, 0x06) + P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_BSQ_ALLOCATION P4_EVENT_PACK(0x05, 0x07) + P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07), /* * MSR_P4_BSU_ESCR0: 0, 1 */ -#define P4_BSQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x06, 0x07) + P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07), /* * NOTE: no ESCR name in docs, it's guessed * MSR_P4_BSU_ESCR1: 2, 3 */ -#define P4_SSE_INPUT_ASSIST P4_EVENT_PACK(0x34, 0x01) + P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_PACKED_SP_UOP P4_EVENT_PACK(0x08, 0x01) + P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_PACKED_DP_UOP P4_EVENT_PACK(0x0c, 0x01) + P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_SCALAR_SP_UOP P4_EVENT_PACK(0x0a, 0x01) + P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_SCALAR_DP_UOP P4_EVENT_PACK(0x0e, 0x01) + P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_64BIT_MMX_UOP P4_EVENT_PACK(0x02, 0x01) + P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_128BIT_MMX_UOP P4_EVENT_PACK(0x1a, 0x01) + P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_X87_FP_UOP P4_EVENT_PACK(0x04, 0x01) + P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_TC_MISC P4_EVENT_PACK(0x06, 0x01) + P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01), /* * MSR_P4_TC_ESCR0: 4, 5 * MSR_P4_TC_ESCR1: 6, 7 */ -#define P4_GLOBAL_POWER_EVENTS P4_EVENT_PACK(0x13, 0x06) + P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_TC_MS_XFER P4_EVENT_PACK(0x05, 0x00) + P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00), /* * MSR_P4_MS_ESCR0: 4, 5 * MSR_P4_MS_ESCR1: 6, 7 */ -#define P4_UOP_QUEUE_WRITES P4_EVENT_PACK(0x09, 0x00) + P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00), /* * MSR_P4_MS_ESCR0: 4, 5 * MSR_P4_MS_ESCR1: 6, 7 */ -#define P4_RETIRED_MISPRED_BRANCH_TYPE P4_EVENT_PACK(0x05, 0x02) + P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02), /* * MSR_P4_TBPU_ESCR0: 4, 5 * MSR_P4_TBPU_ESCR1: 6, 7 */ -#define P4_RETIRED_BRANCH_TYPE P4_EVENT_PACK(0x04, 0x02) + P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02), /* * MSR_P4_TBPU_ESCR0: 4, 5 * MSR_P4_TBPU_ESCR1: 6, 7 */ -#define P4_RESOURCE_STALL P4_EVENT_PACK(0x01, 0x01) + P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01), /* * MSR_P4_ALF_ESCR0: 12, 13, 16 * MSR_P4_ALF_ESCR1: 14, 15, 17 */ -#define P4_WC_BUFFER P4_EVENT_PACK(0x05, 0x05) + P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05), /* * MSR_P4_DAC_ESCR0: 8, 9 * MSR_P4_DAC_ESCR1: 10, 11 */ -#define P4_B2B_CYCLES P4_EVENT_PACK(0x16, 0x03) + P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_BNR P4_EVENT_PACK(0x08, 0x03) + P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_SNOOP P4_EVENT_PACK(0x06, 0x03) + P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_RESPONSE P4_EVENT_PACK(0x04, 0x03) + P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_FRONT_END_EVENT P4_EVENT_PACK(0x08, 0x05) + P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_EXECUTION_EVENT P4_EVENT_PACK(0x0c, 0x05) + P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_REPLAY_EVENT P4_EVENT_PACK(0x09, 0x05) + P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_INSTR_RETIRED P4_EVENT_PACK(0x02, 0x04) + P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04), /* * MSR_P4_CRU_ESCR0: 12, 13, 16 * MSR_P4_CRU_ESCR1: 14, 15, 17 */ -#define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) + P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04), /* * MSR_P4_CRU_ESCR0: 12, 13, 16 * MSR_P4_CRU_ESCR1: 14, 15, 17 */ -#define P4_UOP_TYPE P4_EVENT_PACK(0x02, 0x02) + P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02), /* * MSR_P4_RAT_ESCR0: 12, 13, 16 * MSR_P4_RAT_ESCR1: 14, 15, 17 */ -#define P4_BRANCH_RETIRED P4_EVENT_PACK(0x06, 0x05) + P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_MISPRED_BRANCH_RETIRED P4_EVENT_PACK(0x03, 0x04) + P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04), /* * MSR_P4_CRU_ESCR0: 12, 13, 16 * MSR_P4_CRU_ESCR1: 14, 15, 17 */ -#define P4_X87_ASSIST P4_EVENT_PACK(0x03, 0x05) + P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_MACHINE_CLEAR P4_EVENT_PACK(0x02, 0x05) + P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_INSTR_COMPLETED P4_EVENT_PACK(0x07, 0x04) + P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04), /* * MSR_P4_CRU_ESCR0: 12, 13, 16 * MSR_P4_CRU_ESCR1: 14, 15, 17 */ +}; /* - * a caller should use P4_EVENT_ATTR helper to - * pick the attribute needed, for example + * a caller should use P4_ESCR_EMASK_NAME helper to + * pick the EventMask needed, for example * - * P4_EVENT_ATTR(P4_TC_DELIVER_MODE, DD) + * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) */ -enum P4_EVENTS_ATTR { - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DD, 0), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DB, 1), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DI, 2), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BD, 3), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BB, 4), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BI, 5), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, ID, 6), - - P4_MAKE_EVENT_ATTR(P4_BPU_FETCH_REQUEST, TCMISS, 0), - - P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT, 0), - P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, MISS, 1), - P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT_UK, 2), - - P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, ST_RB_FULL, 2), - P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, 64K_CONF, 3), - - P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, LSC, 0), - P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, SSC, 1), - - P4_MAKE_EVENT_ATTR(P4_LOAD_PORT_REPLAY, SPLIT_LD, 1), - - P4_MAKE_EVENT_ATTR(P4_STORE_PORT_REPLAY, SPLIT_ST, 1), - - P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STA, 1), - P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STD, 3), - P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), - P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), - - P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, DTMISS, 0), - P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, ITMISS, 1), - - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), - - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, DEFAULT, 0), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_READ, 5), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_WRITE, 6), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_UC, 7), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WC, 8), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WT, 9), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WP, 10), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WB, 11), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OWN, 13), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OTHER, 14), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, PREFETCH, 15), - - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OWN, 13), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OTHER, 14), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), - - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV, 0), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN, 1), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_DRV, 3), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OWN, 4), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), - - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE0, 0), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE1, 1), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN0, 2), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN1, 3), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_IO_TYPE, 5), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE0, 11), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE1, 12), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE2, 13), - - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), - - P4_MAKE_EVENT_ATTR(P4_SSE_INPUT_ASSIST, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_PACKED_SP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_PACKED_DP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_SCALAR_SP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_SCALAR_DP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_64BIT_MMX_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_128BIT_MMX_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_X87_FP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_TC_MISC, FLUSH, 4), - - P4_MAKE_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING, 0), - - P4_MAKE_EVENT_ATTR(P4_TC_MS_XFER, CISC, 0), - - P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), - P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), - P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_ROM, 2), - - P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), - P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), - P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), - P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), - - P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), - P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL, 2), - P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN, 3), - P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT, 4), - - P4_MAKE_EVENT_ATTR(P4_RESOURCE_STALL, SBFULL, 5), - - P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_EVICTS, 0), - P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_FULL_EVICTS, 1), - - P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, BOGUS, 1), - - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS0, 0), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS1, 1), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS2, 2), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS3, 3), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS0, 4), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS1, 5), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS2, 6), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS3, 7), - - P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, BOGUS, 1), - - P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG, 0), - P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG, 1), - P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG, 2), - P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG, 3), - - P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, BOGUS, 1), +enum P4_ESCR_EMASKS { + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6), + + P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3), + + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), + + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), + + P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7), + + P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3), + + P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1), - P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS, 1), - P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES, 2), - - P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNP, 0), - P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNM, 1), - P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTP, 2), - P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTM, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3), - P4_MAKE_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSU, 0), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSO, 1), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAO, 2), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAU, 3), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, PREA, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4), - P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, CLEAR, 0), - P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, MOCLEAR, 1), - P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, SMCLEAR, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2), - P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), }; -enum { - KEY_P4_L1D_OP_READ_RESULT_MISS = PERF_COUNT_HW_MAX, - KEY_P4_LL_OP_READ_RESULT_MISS, - KEY_P4_DTLB_OP_READ_RESULT_MISS, - KEY_P4_DTLB_OP_WRITE_RESULT_MISS, - KEY_P4_ITLB_OP_READ_RESULT_ACCESS, - KEY_P4_ITLB_OP_READ_RESULT_MISS, - KEY_P4_UOP_TYPE, +/* P4 PEBS: stale for a while */ +#define P4_PEBS_METRIC_MASK 0x00001fffU +#define P4_PEBS_UOB_TAG 0x01000000U +#define P4_PEBS_ENABLE 0x02000000U + +/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ +#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 +#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 +#define P4_PEBS__dtlb_load_miss_retired 0x3000004 +#define P4_PEBS__dtlb_store_miss_retired 0x3000004 +#define P4_PEBS__dtlb_all_miss_retired 0x3000004 +#define P4_PEBS__tagged_mispred_branch 0x3018000 +#define P4_PEBS__mob_load_replay_retired 0x3000200 +#define P4_PEBS__split_load_retired 0x3000400 +#define P4_PEBS__split_store_retired 0x3000400 + +#define P4_VERT__1stl_cache_load_miss_retired 0x0000001 +#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 +#define P4_VERT__dtlb_load_miss_retired 0x0000001 +#define P4_VERT__dtlb_store_miss_retired 0x0000002 +#define P4_VERT__dtlb_all_miss_retired 0x0000003 +#define P4_VERT__tagged_mispred_branch 0x0000010 +#define P4_VERT__mob_load_replay_retired 0x0000001 +#define P4_VERT__split_load_retired 0x0000001 +#define P4_VERT__split_store_retired 0x0000002 + +enum P4_CACHE_EVENTS { + P4_CACHE__NONE, + + P4_CACHE__1stl_cache_load_miss_retired, + P4_CACHE__2ndl_cache_load_miss_retired, + P4_CACHE__dtlb_load_miss_retired, + P4_CACHE__dtlb_store_miss_retired, + P4_CACHE__itlb_reference_hit, + P4_CACHE__itlb_reference_miss, + + P4_CACHE__MAX }; #endif /* PERF_EVENT_P4_H */ -- cgit v1.2.3 From 7c5ecaf7666617889f337296c610815b519abfa9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Mar 2010 14:51:49 +0100 Subject: perf, x86: Clean up debugctlmsr bit definitions Move all debugctlmsr thingies into msr-index.h Signed-off-by: Peter Zijlstra LKML-Reference: <20100325135413.861425293@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index aef562c0a647..06e4cf0d3846 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -71,11 +71,14 @@ #define MSR_IA32_LASTINTTOIP 0x000001de /* DEBUGCTLMSR bits (others vary by model): */ -#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ -#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ - -#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) -#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 -- cgit v1.2.3 From faa4602e47690fb11221e00f9b9697c8dc0d4b19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Mar 2010 14:51:50 +0100 Subject: x86, perf, bts, mm: Delete the never used BTS-ptrace code Support for the PMU's BTS features has been upstreamed in v2.6.32, but we still have the old and disabled ptrace-BTS, as Linus noticed it not so long ago. It's buggy: TIF_DEBUGCTLMSR is trampling all over that MSR without regard for other uses (perf) and doesn't provide the flexibility needed for perf either. Its users are ptrace-block-step and ptrace-bts, since ptrace-bts was never used and ptrace-block-step can be implemented using a much simpler approach. So axe all 3000 lines of it. That includes the *locked_memory*() APIs in mm/mlock.c as well. Reported-by: Linus Torvalds Signed-off-by: Peter Zijlstra Cc: Roland McGrath Cc: Oleg Nesterov Cc: Markus Metzger Cc: Steven Rostedt Cc: Andrew Morton LKML-Reference: <20100325135413.938004390@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 302 ------------------------------------- arch/x86/include/asm/processor.h | 33 +--- arch/x86/include/asm/ptrace-abi.h | 57 +------ arch/x86/include/asm/ptrace.h | 6 - arch/x86/include/asm/thread_info.h | 6 +- 5 files changed, 3 insertions(+), 401 deletions(-) delete mode 100644 arch/x86/include/asm/ds.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h deleted file mode 100644 index 70dac199b093..000000000000 --- a/arch/x86/include/asm/ds.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Debug Store (DS) support - * - * This provides a low-level interface to the hardware's Debug Store - * feature that is used for branch trace store (BTS) and - * precise-event based sampling (PEBS). - * - * It manages: - * - DS and BTS hardware configuration - * - buffer overflow handling (to be done) - * - buffer access - * - * It does not do: - * - security checking (is the caller allowed to trace the task) - * - buffer allocation (memory accounting) - * - * - * Copyright (C) 2007-2009 Intel Corporation. - * Markus Metzger , 2007-2009 - */ - -#ifndef _ASM_X86_DS_H -#define _ASM_X86_DS_H - - -#include -#include -#include - - -#ifdef CONFIG_X86_DS - -struct task_struct; -struct ds_context; -struct ds_tracer; -struct bts_tracer; -struct pebs_tracer; - -typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); -typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); - - -/* - * A list of features plus corresponding macros to talk about them in - * the ds_request function's flags parameter. - * - * We use the enum to index an array of corresponding control bits; - * we use the macro to index a flags bit-vector. - */ -enum ds_feature { - dsf_bts = 0, - dsf_bts_kernel, -#define BTS_KERNEL (1 << dsf_bts_kernel) - /* trace kernel-mode branches */ - - dsf_bts_user, -#define BTS_USER (1 << dsf_bts_user) - /* trace user-mode branches */ - - dsf_bts_overflow, - dsf_bts_max, - dsf_pebs = dsf_bts_max, - - dsf_pebs_max, - dsf_ctl_max = dsf_pebs_max, - dsf_bts_timestamps = dsf_ctl_max, -#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) - /* add timestamps into BTS trace */ - -#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) -}; - - -/* - * Request BTS or PEBS - * - * Due to alignement constraints, the actual buffer may be slightly - * smaller than the requested or provided buffer. - * - * Returns a pointer to a tracer structure on success, or - * ERR_PTR(errcode) on failure. - * - * The interrupt threshold is independent from the overflow callback - * to allow users to use their own overflow interrupt handling mechanism. - * - * The function might sleep. - * - * task: the task to request recording for - * cpu: the cpu to request recording for - * base: the base pointer for the (non-pageable) buffer; - * size: the size of the provided buffer in bytes - * ovfl: pointer to a function to be called on buffer overflow; - * NULL if cyclic buffer requested - * th: the interrupt threshold in records from the end of the buffer; - * -1 if no interrupt threshold is requested. - * flags: a bit-mask of the above flags - */ -extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags); - -/* - * Release BTS or PEBS resources - * Suspend and resume BTS or PEBS tracing - * - * Must be called with irq's enabled. - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern void ds_release_bts(struct bts_tracer *tracer); -extern void ds_suspend_bts(struct bts_tracer *tracer); -extern void ds_resume_bts(struct bts_tracer *tracer); -extern void ds_release_pebs(struct pebs_tracer *tracer); -extern void ds_suspend_pebs(struct pebs_tracer *tracer); -extern void ds_resume_pebs(struct pebs_tracer *tracer); - -/* - * Release BTS or PEBS resources - * Suspend and resume BTS or PEBS tracing - * - * Cpu tracers must call this on the traced cpu. - * Task tracers must call ds_release_~_noirq() for themselves. - * - * May be called with irq's disabled. - * - * Returns 0 if successful; - * -EPERM if the cpu tracer does not trace the current cpu. - * -EPERM if the task tracer does not trace itself. - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern int ds_release_bts_noirq(struct bts_tracer *tracer); -extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); -extern int ds_resume_bts_noirq(struct bts_tracer *tracer); -extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); -extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); -extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); - - -/* - * The raw DS buffer state as it is used for BTS and PEBS recording. - * - * This is the low-level, arch-dependent interface for working - * directly on the raw trace data. - */ -struct ds_trace { - /* the number of bts/pebs records */ - size_t n; - /* the size of a bts/pebs record in bytes */ - size_t size; - /* pointers into the raw buffer: - - to the first entry */ - void *begin; - /* - one beyond the last entry */ - void *end; - /* - one beyond the newest entry */ - void *top; - /* - the interrupt threshold */ - void *ith; - /* flags given on ds_request() */ - unsigned int flags; -}; - -/* - * An arch-independent view on branch trace data. - */ -enum bts_qualifier { - bts_invalid, -#define BTS_INVALID bts_invalid - - bts_branch, -#define BTS_BRANCH bts_branch - - bts_task_arrives, -#define BTS_TASK_ARRIVES bts_task_arrives - - bts_task_departs, -#define BTS_TASK_DEPARTS bts_task_departs - - bts_qual_bit_size = 4, - bts_qual_max = (1 << bts_qual_bit_size), -}; - -struct bts_struct { - __u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - __u64 from; - __u64 to; - } lbr; - /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ - struct { - __u64 clock; - pid_t pid; - } event; - } variant; -}; - - -/* - * The BTS state. - * - * This gives access to the raw DS state and adds functions to provide - * an arch-independent view of the BTS data. - */ -struct bts_trace { - struct ds_trace ds; - - int (*read)(struct bts_tracer *tracer, const void *at, - struct bts_struct *out); - int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); -}; - - -/* - * The PEBS state. - * - * This gives access to the raw DS state and the PEBS-specific counter - * reset value. - */ -struct pebs_trace { - struct ds_trace ds; - - /* the number of valid counters in the below array */ - unsigned int counters; - -#define MAX_PEBS_COUNTERS 4 - /* the counter reset value */ - unsigned long long counter_reset[MAX_PEBS_COUNTERS]; -}; - - -/* - * Read the BTS or PEBS trace. - * - * Returns a view on the trace collected for the parameter tracer. - * - * The view remains valid as long as the traced task is not running or - * the tracer is suspended. - * Writes into the trace buffer are not reflected. - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); -extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); - - -/* - * Reset the write pointer of the BTS/PEBS buffer. - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern int ds_reset_bts(struct bts_tracer *tracer); -extern int ds_reset_pebs(struct pebs_tracer *tracer); - -/* - * Set the PEBS counter reset value. - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_pebs() - * counter: the index of the counter - * value: the new counter reset value - */ -extern int ds_set_pebs_reset(struct pebs_tracer *tracer, - unsigned int counter, u64 value); - -/* - * Initialization - */ -struct cpuinfo_x86; -extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); - -/* - * Context switch work - */ -extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); - -#else /* CONFIG_X86_DS */ - -struct cpuinfo_x86; -static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} -static inline void ds_switch_to(struct task_struct *prev, - struct task_struct *next) {} - -#endif /* CONFIG_X86_DS */ -#endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea59703a..5bec21a66dc5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -21,7 +21,6 @@ struct mm_struct; #include #include #include -#include #include #include @@ -29,6 +28,7 @@ struct mm_struct; #include #include #include +#include #define HBP_NUM 4 /* @@ -473,10 +473,6 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; -/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ - unsigned long debugctlmsr; - /* Debug Store context; see asm/ds.h */ - struct ds_context *ds_ctx; }; static inline unsigned long native_get_debugreg(int regno) @@ -814,21 +810,6 @@ static inline unsigned long get_debugctlmsr(void) return debugctlmsr; } -static inline unsigned long get_debugctlmsr_on_cpu(int cpu) -{ - u64 debugctlmsr = 0; - u32 val1, val2; - -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return 0; -#endif - rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); - debugctlmsr = val1 | ((u64)val2 << 32); - - return debugctlmsr; -} - static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR @@ -838,18 +819,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } -static inline void update_debugctlmsr_on_cpu(int cpu, - unsigned long debugctlmsr) -{ -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return; -#endif - wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, - (u32)((u64)debugctlmsr), - (u32)((u64)debugctlmsr >> 32)); -} - /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 86723035a515..52b098a6eebb 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h @@ -82,61 +82,6 @@ #ifndef __ASSEMBLY__ #include - -/* configuration/status structure used in PTRACE_BTS_CONFIG and - PTRACE_BTS_STATUS commands. -*/ -struct ptrace_bts_config { - /* requested or actual size of BTS buffer in bytes */ - __u32 size; - /* bitmask of below flags */ - __u32 flags; - /* buffer overflow signal */ - __u32 signal; - /* actual size of bts_struct in bytes */ - __u32 bts_size; -}; -#endif /* __ASSEMBLY__ */ - -#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ -#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ -#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG on buffer overflow - instead of wrapping around */ -#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ - -#define PTRACE_BTS_CONFIG 40 -/* Configure branch trace recording. - ADDR points to a struct ptrace_bts_config. - DATA gives the size of that buffer. - A new buffer is allocated, if requested in the flags. - An overflow signal may only be requested for new buffers. - Returns the number of bytes read. -*/ -#define PTRACE_BTS_STATUS 41 -/* Return the current configuration in a struct ptrace_bts_config - pointed to by ADDR; DATA gives the size of that buffer. - Returns the number of bytes written. -*/ -#define PTRACE_BTS_SIZE 42 -/* Return the number of available BTS records for draining. - DATA and ADDR are ignored. -*/ -#define PTRACE_BTS_GET 43 -/* Get a single BTS record. - DATA defines the index into the BTS array, where 0 is the newest - entry, and higher indices refer to older entries. - ADDR is pointing to struct bts_struct (see asm/ds.h). -*/ -#define PTRACE_BTS_CLEAR 44 -/* Clear the BTS buffer. - DATA and ADDR are ignored. -*/ -#define PTRACE_BTS_DRAIN 45 -/* Read all available BTS records and clear the buffer. - ADDR points to an array of struct bts_struct. - DATA gives the size of that buffer. - BTS records are read from oldest to newest. - Returns number of BTS records drained. -*/ +#endif #endif /* _ASM_X86_PTRACE_ABI_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 69a686a7dff0..78cd1ea94500 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -#ifdef CONFIG_X86_PTRACE_BTS -extern void ptrace_bts_untrace(struct task_struct *tsk); - -#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) -#endif /* CONFIG_X86_PTRACE_BTS */ - #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0d28901e969..dc85e12d1405 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -92,8 +92,6 @@ struct thread_info { #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_FREEZE 23 /* is freezing for suspend */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ @@ -115,8 +113,6 @@ struct thread_info { #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FREEZE (1 << TIF_FREEZE) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) @@ -147,7 +143,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) + (_TIF_IO_BITMAP|_TIF_NOTSC) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) -- cgit v1.2.3 From ea8e61b7bbc4a2faef77db34eb2db2a2c2372ff6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Mar 2010 14:51:51 +0100 Subject: x86, ptrace: Fix block-step Implement ptrace-block-step using TIF_BLOCKSTEP which will set DEBUGCTLMSR_BTF when set for a task while preserving any other DEBUGCTLMSR bits. Signed-off-by: Peter Zijlstra LKML-Reference: <20100325135414.017536066@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/include/asm/thread_info.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5bec21a66dc5..32428b410b55 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -799,7 +799,7 @@ extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) { - unsigned long debugctlmsr = 0; + unsigned long debugctlmsr = 0; #ifndef CONFIG_X86_DEBUGCTLMSR if (boot_cpu_data.x86 < 6) @@ -807,7 +807,7 @@ static inline unsigned long get_debugctlmsr(void) #endif rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); - return debugctlmsr; + return debugctlmsr; } static inline void update_debugctlmsr(unsigned long debugctlmsr) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index dc85e12d1405..d017ed5502e2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -92,6 +92,7 @@ struct thread_info { #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_FREEZE 23 /* is freezing for suspend */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ @@ -113,6 +114,7 @@ struct thread_info { #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FREEZE (1 << TIF_FREEZE) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) @@ -143,7 +145,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC) + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) -- cgit v1.2.3 From 948b1bb89a44561560531394c18da4a99215f772 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 29 Mar 2010 18:36:50 +0200 Subject: perf, x86: Undo some some *_counter* -> *_event* renames The big rename: cdd6c48 perf: Do the big rename: Performance Counters -> Performance Events accidentally renamed some members of stucts that were named after registers in the spec. To avoid confusion this patch reverts some changes. The related specs are MSR descriptions in AMD's BKDGs and the ARCHITECTURAL PERFORMANCE MONITORING section in the Intel 64 and IA-32 Architectures Software Developer's Manuals. This patch does: $ sed -i -e 's:num_events:num_counters:g' \ arch/x86/include/asm/perf_event.h \ arch/x86/kernel/cpu/perf_event_amd.c \ arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_intel.c \ arch/x86/kernel/cpu/perf_event_p6.c \ arch/x86/kernel/cpu/perf_event_p4.c \ arch/x86/oprofile/op_model_ppro.c $ sed -i -e 's:event_bits:cntval_bits:g' -e 's:event_mask:cntval_mask:g' \ arch/x86/kernel/cpu/perf_event_amd.c \ arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_intel.c \ arch/x86/kernel/cpu/perf_event_p6.c \ arch/x86/kernel/cpu/perf_event_p4.c Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1269880612-25800-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 124dddd598f3..987bf673141e 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -67,7 +67,7 @@ union cpuid10_eax { struct { unsigned int version_id:8; - unsigned int num_events:8; + unsigned int num_counters:8; unsigned int bit_width:8; unsigned int mask_length:8; } split; @@ -76,7 +76,7 @@ union cpuid10_eax { union cpuid10_edx { struct { - unsigned int num_events_fixed:4; + unsigned int num_counters_fixed:4; unsigned int reserved:28; } split; unsigned int full; -- cgit v1.2.3 From a098f4484bc7dae23f5b62360954007b99b64600 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 30 Mar 2010 11:28:21 +0200 Subject: perf, x86: implement ARCH_PERFMON_EVENTSEL bit masks ARCH_PERFMON_EVENTSEL bit masks are often used in the kernel. This patch adds macros for the bit masks and removes local defines. The function intel_pmu_raw_event() becomes x86_pmu_raw_event() which is generic for x86 models and same also for p6. Duplicate code is removed. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <20100330092821.GH11907@erda.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 58 +++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 33 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 987bf673141e..f6d43dbfd8e7 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -18,39 +18,31 @@ #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 -#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) - -/* - * Includes eventsel and unit mask as well: - */ - - -#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL -#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL -#define INTEL_ARCH_EDGE_MASK 0x00040000ULL -#define INTEL_ARCH_INV_MASK 0x00800000ULL -#define INTEL_ARCH_CNT_MASK 0xFF000000ULL -#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) - -/* - * filter mask to validate fixed counter events. - * the following filters disqualify for fixed counters: - * - inv - * - edge - * - cnt-mask - * The other filters are supported by fixed counters. - * The any-thread option is supported starting with v3. - */ -#define INTEL_ARCH_FIXED_MASK \ - (INTEL_ARCH_CNT_MASK| \ - INTEL_ARCH_INV_MASK| \ - INTEL_ARCH_EDGE_MASK|\ - INTEL_ARCH_UNIT_MASK|\ - INTEL_ARCH_EVENT_MASK) +#define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL +#define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL +#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) +#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) +#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) +#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) +#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) +#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) +#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) +#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL + +#define AMD64_EVENTSEL_EVENT \ + (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) +#define INTEL_ARCH_EVENT_MASK \ + (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) + +#define X86_RAW_EVENT_MASK \ + (ARCH_PERFMON_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK | \ + ARCH_PERFMON_EVENTSEL_EDGE | \ + ARCH_PERFMON_EVENTSEL_INV | \ + ARCH_PERFMON_EVENTSEL_CMASK) +#define AMD64_RAW_EVENT_MASK \ + (X86_RAW_EVENT_MASK | \ + AMD64_EVENTSEL_EVENT) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -- cgit v1.2.3 From d61931d89be506372d01a90d1755f6d0a9fafe2d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Mar 2010 17:34:46 +0100 Subject: x86: Add optimized popcnt variants Add support for the hardware version of the Hamming weight function, popcnt, present in CPUs which advertize it under CPUID, Function 0x0000_0001_ECX[23]. On CPUs which don't support it, we fallback to the default lib/hweight.c sw versions. A synthetic benchmark comparing popcnt with __sw_hweight64 showed almost a 3x speedup on a F10h machine. Signed-off-by: Borislav Petkov LKML-Reference: <20100318112015.GC11152@aftab> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative.h | 9 ++++-- arch/x86/include/asm/arch_hweight.h | 59 +++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/bitops.h | 4 ++- 3 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 arch/x86/include/asm/arch_hweight.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index b09ec55650b3..67dae51e7fd0 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -39,9 +39,6 @@ #define LOCK_PREFIX "" #endif -/* This must be included *after* the definition of LOCK_PREFIX */ -#include - struct alt_instr { u8 *instr; /* original instruction */ u8 *replacement; @@ -95,6 +92,12 @@ static inline int alternatives_text_reserved(void *start, void *end) "663:\n\t" newinstr "\n664:\n" /* replacement */ \ ".previous" +/* + * This must be included *after* the definition of ALTERNATIVE due to + * + */ +#include + /* * Alternative instructions for different CPU types or capabilities. * diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 000000000000..d1fc3c219ae6 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h @@ -0,0 +1,59 @@ +#ifndef _ASM_X86_HWEIGHT_H +#define _ASM_X86_HWEIGHT_H + +#ifdef CONFIG_64BIT +/* popcnt %rdi, %rax */ +#define POPCNT ".byte 0xf3,0x48,0x0f,0xb8,0xc7" +#define REG_IN "D" +#define REG_OUT "a" +#else +/* popcnt %eax, %eax */ +#define POPCNT ".byte 0xf3,0x0f,0xb8,0xc0" +#define REG_IN "a" +#define REG_OUT "a" +#endif + +/* + * __sw_hweightXX are called from within the alternatives below + * and callee-clobbered registers need to be taken care of. See + * ARCH_HWEIGHT_CFLAGS in for the respective + * compiler switches. + */ +static inline unsigned int __arch_hweight32(unsigned int w) +{ + unsigned int res = 0; + + asm (ALTERNATIVE("call __sw_hweight32", POPCNT, X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); + + return res; +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +static inline unsigned long __arch_hweight64(__u64 w) +{ + unsigned long res = 0; + +#ifdef CONFIG_X86_32 + return __arch_hweight32((u32)w) + + __arch_hweight32((u32)(w >> 32)); +#else + asm (ALTERNATIVE("call __sw_hweight64", POPCNT, X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); +#endif /* CONFIG_X86_32 */ + + return res; +} + +#endif diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a603fc8..545776efeb16 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -444,7 +444,9 @@ static inline int fls(int x) #define ARCH_HAS_FAST_MULTIPLIER 1 -#include +#include + +#include #endif /* __KERNEL__ */ -- cgit v1.2.3 From 5958f1d5d722df7a9e5d129676614a8e5219bacd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 31 Mar 2010 21:56:41 +0200 Subject: x86, cpu: Add AMD core boosting feature flag to /proc/cpuinfo By semi-popular demand, this adds the Core Performance Boost feature flag to /proc/cpuinfo. Possible use case for this is userspace tools like cpufreq-aperf, for example, so that they don't have to jump through hoops of accessing "/dev/cpu/%d/cpuid" in order to check for CPB hw support, or call cpuid from userspace. Signed-off-by: Borislav Petkov LKML-Reference: <1270065406-1814-2-git-send-email-bp@amd64.org> Reviewed-by: Thomas Renninger Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpufeature.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0cd82d068613..630e623f61e0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -161,6 +161,7 @@ */ #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ +#define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ /* Virtualization flags: Linux defined */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ -- cgit v1.2.3 From b8f7fb13d2d7ff14818fd1d3edd8b834d38b0217 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 14 Apr 2010 11:35:46 -0500 Subject: x86, UV: Improve BAU performance and error recovery - increase performance of the interrupt handler - release timed-out software acknowledge resources - recover from continuous-busy status due to a hardware issue - add a 'throttle' to keep a uvhub from sending more than a specified number of broadcasts concurrently (work around the hardware issue) - provide a 'nobau' boot command line option - rename 'pnode' and 'node' to 'uvhub' (the 'node' terminology is ambiguous) - add some new statistics about the scope of broadcasts, retries, the hardware issue and the 'throttle' - split off new function uv_bau_retry_msg() from uv_bau_process_message() per community coding style feedback. - simplify the argument list to uv_bau_process_message(), per community coding style feedback. Signed-off-by: Cliff Wickman Cc: linux-mm@kvack.org Cc: Jack Steiner Cc: Russ Anderson Cc: Mike Travis Cc: "H. Peter Anvin" Cc: Thomas Gleixner LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 247 +++++++++++++++++++++++++++------------ 1 file changed, 175 insertions(+), 72 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index b414d2b401f6..aa558ac0306e 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -27,13 +27,14 @@ * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. * * We will use 31 sets, one for sending BAU messages from each of the 32 - * cpu's on the node. + * cpu's on the uvhub. * * TLB shootdown will use the first of the 8 descriptors of each set. * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). */ #define UV_ITEMS_PER_DESCRIPTOR 8 +#define MAX_BAU_CONCURRENT 3 #define UV_CPUS_PER_ACT_STATUS 32 #define UV_ACT_STATUS_MASK 0x3 #define UV_ACT_STATUS_SIZE 2 @@ -45,6 +46,9 @@ #define UV_PAYLOADQ_PNODE_SHIFT 49 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) +#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 +#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 +#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL /* * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 @@ -55,15 +59,29 @@ #define DESC_STATUS_SOURCE_TIMEOUT 3 /* - * source side thresholds at which message retries print a warning + * source side threshholds at which message retries print a warning */ #define SOURCE_TIMEOUT_LIMIT 20 #define DESTINATION_TIMEOUT_LIMIT 20 +/* + * misc. delays, in microseconds + */ +#define THROTTLE_DELAY 10 +#define TIMEOUT_DELAY 10 +#define BIOS_TO 1000 +/* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */ + +/* + * threshholds at which to use IPI to free resources + */ +#define PLUGSB4RESET 100 +#define TIMEOUTSB4RESET 100 + /* * number of entries in the destination side payload queue */ -#define DEST_Q_SIZE 17 +#define DEST_Q_SIZE 20 /* * number of destination side software ack resources */ @@ -72,9 +90,10 @@ /* * completion statuses for sending a TLB flush message */ -#define FLUSH_RETRY 1 -#define FLUSH_GIVEUP 2 -#define FLUSH_COMPLETE 3 +#define FLUSH_RETRY_PLUGGED 1 +#define FLUSH_RETRY_TIMEOUT 2 +#define FLUSH_GIVEUP 3 +#define FLUSH_COMPLETE 4 /* * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) @@ -86,14 +105,14 @@ * 'base_dest_nodeid' field of the header corresponds to the * destination nodeID associated with that specified bit. */ -struct bau_target_nodemask { - unsigned long bits[BITS_TO_LONGS(256)]; +struct bau_target_uvhubmask { + unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; }; /* - * mask of cpu's on a node + * mask of cpu's on a uvhub * (during initialization we need to check that unsigned long has - * enough bits for max. cpu's per node) + * enough bits for max. cpu's per uvhub) */ struct bau_local_cpumask { unsigned long bits; @@ -135,8 +154,8 @@ struct bau_msg_payload { struct bau_msg_header { unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ /* bits 5:0 */ - unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ - /* bits 20:6 */ /* first bit in node_map */ + unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ + /* bits 20:6 */ /* first bit in uvhub map */ unsigned int command:8; /* message type */ /* bits 28:21 */ /* 0x38: SN3net EndPoint Message */ @@ -146,26 +165,38 @@ struct bau_msg_header { unsigned int rsvd_2:9; /* must be zero */ /* bits 40:32 */ /* Suppl_A is 56-41 */ - unsigned int payload_2a:8;/* becomes byte 16 of msg */ - /* bits 48:41 */ /* not currently using */ - unsigned int payload_2b:8;/* becomes byte 17 of msg */ - /* bits 56:49 */ /* not currently using */ + unsigned int sequence:16;/* message sequence number */ + /* bits 56:41 */ /* becomes bytes 16-17 of msg */ /* Address field (96:57) is never used as an address (these are address bits 42:3) */ + unsigned int rsvd_3:1; /* must be zero */ /* bit 57 */ /* address bits 27:4 are payload */ - /* these 24 bits become bytes 12-14 of msg */ + /* these next 24 (58-81) bits become bytes 12-14 of msg */ + + /* bits 65:58 land in byte 12 */ unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ /* bit 58 */ - - unsigned int payload_1a:5;/* not currently used */ - /* bits 63:59 */ - unsigned int payload_1b:8;/* not currently used */ - /* bits 71:64 */ - unsigned int payload_1c:8;/* not currently used */ - /* bits 79:72 */ - unsigned int payload_1d:2;/* not currently used */ + unsigned int msg_type:3; /* software type of the message*/ + /* bits 61:59 */ + unsigned int canceled:1; /* message canceled, resource to be freed*/ + /* bit 62 */ + unsigned int payload_1a:1;/* not currently used */ + /* bit 63 */ + unsigned int payload_1b:2;/* not currently used */ + /* bits 65:64 */ + + /* bits 73:66 land in byte 13 */ + unsigned int payload_1ca:6;/* not currently used */ + /* bits 71:66 */ + unsigned int payload_1c:2;/* not currently used */ + /* bits 73:72 */ + + /* bits 81:74 land in byte 14 */ + unsigned int payload_1d:6;/* not currently used */ + /* bits 79:74 */ + unsigned int payload_1e:2;/* not currently used */ /* bits 81:80 */ unsigned int rsvd_4:7; /* must be zero */ @@ -178,7 +209,7 @@ struct bau_msg_header { /* bits 95:90 */ unsigned int rsvd_6:5; /* must be zero */ /* bits 100:96 */ - unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ + unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ /* bit 101*/ unsigned int fairness:3;/* usually zero */ /* bits 104:102 */ @@ -191,13 +222,18 @@ struct bau_msg_header { /* bits 127:107 */ }; +/* see msg_type: */ +#define MSG_NOOP 0 +#define MSG_REGULAR 1 +#define MSG_RETRY 2 + /* * The activation descriptor: * The format of the message to send, plus all accompanying control * Should be 64 bytes */ struct bau_desc { - struct bau_target_nodemask distribution; + struct bau_target_uvhubmask distribution; /* * message template, consisting of header and payload: */ @@ -237,19 +273,25 @@ struct bau_payload_queue_entry { unsigned short acknowledge_count; /* filled in by destination */ /* 16 bits, bytes 10-11 */ - unsigned short replied_to:1; /* sent as 0 by the source */ - /* 1 bit */ - unsigned short unused1:7; /* not currently using */ - /* 7 bits: byte 12) */ + /* these next 3 bytes come from bits 58-81 of the message header */ + unsigned short replied_to:1; /* sent as 0 by the source */ + unsigned short msg_type:3; /* software message type */ + unsigned short canceled:1; /* sent as 0 by the source */ + unsigned short unused1:3; /* not currently using */ + /* byte 12 */ - unsigned char unused2[2]; /* not currently using */ - /* bytes 13-14 */ + unsigned char unused2a; /* not currently using */ + /* byte 13 */ + unsigned char unused2; /* not currently using */ + /* byte 14 */ unsigned char sw_ack_vector; /* filled in by the hardware */ /* byte 15 (bits 127:120) */ - unsigned char unused4[3]; /* not currently using bytes 17-19 */ - /* bytes 17-19 */ + unsigned short sequence; /* message sequence number */ + /* bytes 16-17 */ + unsigned char unused4[2]; /* not currently using bytes 18-19 */ + /* bytes 18-19 */ int number_of_cpus; /* filled in at destination */ /* 32 bits, bytes 20-23 (aligned) */ @@ -259,63 +301,93 @@ struct bau_payload_queue_entry { }; /* - * one for every slot in the destination payload queue - */ -struct bau_msg_status { - struct bau_local_cpumask seen_by; /* map of cpu's */ -}; - -/* - * one for every slot in the destination software ack resources - */ -struct bau_sw_ack_status { - struct bau_payload_queue_entry *msg; /* associated message */ - int watcher; /* cpu monitoring, or -1 */ -}; - -/* - * one on every node and per-cpu; to locate the software tables + * one per-cpu; to locate the software tables */ struct bau_control { struct bau_desc *descriptor_base; - struct bau_payload_queue_entry *bau_msg_head; struct bau_payload_queue_entry *va_queue_first; struct bau_payload_queue_entry *va_queue_last; - struct bau_msg_status *msg_statuses; - int *watching; /* pointer to array */ + struct bau_payload_queue_entry *bau_msg_head; + struct bau_control *uvhub_master; + struct bau_control *socket_master; + unsigned long timeout_interval; + atomic_t active_descriptor_count; + int max_concurrent; + int max_concurrent_constant; + int retry_message_scans; + int plugged_tries; + int timeout_tries; + int ipi_attempts; + int conseccompletes; + short cpu; + short uvhub_cpu; + short uvhub; + short cpus_in_socket; + short cpus_in_uvhub; + unsigned short message_number; + unsigned short uvhub_quiesce; + short socket_acknowledge_count[DEST_Q_SIZE]; + cycles_t send_message; + spinlock_t masks_lock; + spinlock_t uvhub_lock; + spinlock_t queue_lock; }; /* * This structure is allocated per_cpu for UV TLB shootdown statistics. */ struct ptc_stats { - unsigned long ptc_i; /* number of IPI-style flushes */ - unsigned long requestor; /* number of nodes this cpu sent to */ - unsigned long requestee; /* times cpu was remotely requested */ - unsigned long alltlb; /* times all tlb's on this cpu were flushed */ - unsigned long onetlb; /* times just one tlb on this cpu was flushed */ - unsigned long s_retry; /* retries on source side timeouts */ - unsigned long d_retry; /* retries on destination side timeouts */ - unsigned long sflush; /* cycles spent in uv_flush_tlb_others */ - unsigned long dflush; /* cycles spent on destination side */ - unsigned long retriesok; /* successes on retries */ - unsigned long nomsg; /* interrupts with no message */ - unsigned long multmsg; /* interrupts with multiple messages */ - unsigned long ntargeted;/* nodes targeted */ + /* sender statistics */ + unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ + unsigned long s_requestor; /* number of shootdown requests */ + unsigned long s_stimeout; /* source side timeouts */ + unsigned long s_dtimeout; /* destination side timeouts */ + unsigned long s_time; /* time spent in sending side */ + unsigned long s_retriesok; /* successful retries */ + unsigned long s_ntargcpu; /* number of cpus targeted */ + unsigned long s_ntarguvhub; /* number of uvhubs targeted */ + unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */ + unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */ + unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */ + unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */ + unsigned long s_ntarguvhub1; /* number of times == 1 target hub */ + unsigned long s_resets_plug; /* ipi-style resets from plug state */ + unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ + unsigned long s_busy; /* status stayed busy past s/w timer */ + unsigned long s_throttles; /* waits in throttle */ + unsigned long s_retry_messages; /* retry broadcasts */ + /* destination statistics */ + unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ + unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ + unsigned long d_multmsg; /* interrupts with multiple messages */ + unsigned long d_nomsg; /* interrupts with no message */ + unsigned long d_time; /* time spent on destination side */ + unsigned long d_requestee; /* number of messages processed */ + unsigned long d_retries; /* number of retry messages processed */ + unsigned long d_canceled; /* number of messages canceled by retries */ + unsigned long d_nocanceled; /* retries that found nothing to cancel */ + unsigned long d_resets; /* number of ipi-style requests processed */ + unsigned long d_rcanceled; /* number of messages canceled by resets */ }; -static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp) +static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) { - return constant_test_bit(node, &dstp->bits[0]); + return constant_test_bit(uvhub, &dstp->bits[0]); } -static inline void bau_node_set(int node, struct bau_target_nodemask *dstp) +static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) { - __set_bit(node, &dstp->bits[0]); + __set_bit(uvhub, &dstp->bits[0]); } -static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits) +static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, + int nbits) { bitmap_zero(&dstp->bits[0], nbits); } +static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) +{ + return bitmap_weight((unsigned long *)&dstp->bits[0], + UV_DISTRIBUTION_SIZE); +} static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) { @@ -328,4 +400,35 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); +struct atomic_short { + short counter; +}; + +/** + * atomic_read_short - read a short atomic variable + * @v: pointer of type atomic_short + * + * Atomically reads the value of @v. + */ +static inline int atomic_read_short(const struct atomic_short *v) +{ + return v->counter; +} + +/** + * atomic_add_short_return - add and return a short int + * @i: short value to add + * @v: pointer of type atomic_short + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_short_return(short i, struct atomic_short *v) +{ + short __i = i; + asm volatile(LOCK_PREFIX "xaddw %0, %1" + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +} + #endif /* _ASM_X86_UV_UV_BAU_H */ -- cgit v1.2.3 From a289cc7c70da784a2d370b91885cab4f966dcb0f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2010 17:51:42 -0700 Subject: x86, UV: uv_irq.c: Fix all sparse warnings Fix all sparse warnings in building uv_irq.c. arch/x86/kernel/uv_irq.c:46:17: warning: symbol 'uv_irq_chip' was not declared. Should it be static? arch/x86/kernel/uv_irq.c:143:50: error: no identifier for function argument arch/x86/kernel/uv_irq.c:162:13: error: typename in expression arch/x86/kernel/uv_irq.c:162:13: error: undefined identifier 'restrict' arch/x86/kernel/uv_irq.c:250:44: error: no identifier for function argument arch/x86/kernel/uv_irq.c:260:17: error: typename in expression arch/x86/kernel/uv_irq.c:260:17: error: undefined identifier 'restrict' arch/x86/kernel/uv_irq.c:233:50: warning: incorrect type in argument 3 (different signedness) arch/x86/kernel/uv_irq.c:233:50: expected int *pnode arch/x86/kernel/uv_irq.c:233:50: got unsigned int * arch/x86/include/asm/uv/uv_hub.h:318:44: warning: incorrect type in argument 2 (different address spaces) arch/x86/include/asm/uv/uv_hub.h:318:44: expected void volatile [noderef] *addr arch/x86/include/asm/uv/uv_hub.h:318:44: got unsigned long * Signed-off-by: Randy Dunlap Cc: Dimitri Sivanich Cc: Russ Anderson Cc: Robin Holt Cc: Mike Travis Cc: Cliff Wickman Cc: Jack Steiner LKML-Reference: <20100416175142.f4b59683.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 14cc74ba5d23..bf6b88ef8eeb 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -307,7 +307,7 @@ static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset * Access Global MMR space using the MMR space located at the top of physical * memory. */ -static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) +static inline volatile void __iomem *uv_global_mmr64_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR64_BASE | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); -- cgit v1.2.3 From 39447b386c846bbf1c56f6403c5282837486200f Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Mon, 19 Apr 2010 13:32:41 +0800 Subject: perf: Enhance perf to allow for guest statistic collection from host Below patch introduces perf_guest_info_callbacks and related register/unregister functions. Add more PERF_RECORD_MISC_XXX bits meaning guest kernel and guest user space. Signed-off-by: Zhang Yanmin Signed-off-by: Avi Kivity --- arch/x86/include/asm/perf_event.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f6d43dbfd8e7..254883d0c7e0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -135,17 +135,10 @@ extern void perf_events_lapic_init(void); */ #define PERF_EFLAGS_EXACT (1UL << 3) -#define perf_misc_flags(regs) \ -({ int misc = 0; \ - if (user_mode(regs)) \ - misc |= PERF_RECORD_MISC_USER; \ - else \ - misc |= PERF_RECORD_MISC_KERNEL; \ - if (regs->flags & PERF_EFLAGS_EXACT) \ - misc |= PERF_RECORD_MISC_EXACT; \ - misc; }) - -#define perf_instruction_pointer(regs) ((regs)->ip) +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) #else static inline void init_hw_perf_events(void) { } -- cgit v1.2.3 From 1f9cc3cb6a27521edfe0a21abf97d2bb11c4d237 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Fri, 23 Apr 2010 10:36:22 -0500 Subject: x86, pat: Update the page flags for memtype atomically instead of using memtype_lock While testing an application using the xpmem (out of kernel) driver, we noticed a significant page fault rate reduction of x86_64 with respect to ia64. For one test running with 32 cpus, one thread per cpu, it took 01:08 for each of the threads to vm_insert_pfn 2GB worth of pages. For the same test running on 256 cpus, one thread per cpu, it took 14:48 to vm_insert_pfn 2 GB worth of pages. The slowdown was tracked to lookup_memtype which acquires the spinlock memtype_lock. This heavily contended lock was slowing down vm_insert_pfn(). With the cmpxchg on page->flags method, both the 32 cpu and 256 cpu cases take approx 00:01.3 seconds to complete. Signed-off-by: Robin Holt LKML-Reference: <20100423153627.751194346@gulag1.americas.sgi.com> Cc: Venkatesh Pallipadi Cc: Rafael Wysocki Reviewed-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cacheflush.h | 44 ++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 634c40a739a6..c70068d05f70 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -44,9 +44,6 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, memcpy(dst, src, len); } -#define PG_WC PG_arch_1 -PAGEFLAG(WC, WC) - #ifdef CONFIG_X86_PAT /* * X86 PAT uses page flags WC and Uncached together to keep track of @@ -55,16 +52,24 @@ PAGEFLAG(WC, WC) * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not * been changed from its default (value of -1 used to denote this). * Note we do not support _PAGE_CACHE_UC here. - * - * Caller must hold memtype_lock for atomicity. */ + +#define _PGMT_DEFAULT 0 +#define _PGMT_WC (1UL << PG_arch_1) +#define _PGMT_UC_MINUS (1UL << PG_uncached) +#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) +#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) +#define _PGMT_CLEAR_MASK (~_PGMT_MASK) + static inline unsigned long get_page_memtype(struct page *pg) { - if (!PageUncached(pg) && !PageWC(pg)) + unsigned long pg_flags = pg->flags & _PGMT_MASK; + + if (pg_flags == _PGMT_DEFAULT) return -1; - else if (!PageUncached(pg) && PageWC(pg)) + else if (pg_flags == _PGMT_WC) return _PAGE_CACHE_WC; - else if (PageUncached(pg) && !PageWC(pg)) + else if (pg_flags == _PGMT_UC_MINUS) return _PAGE_CACHE_UC_MINUS; else return _PAGE_CACHE_WB; @@ -72,25 +77,26 @@ static inline unsigned long get_page_memtype(struct page *pg) static inline void set_page_memtype(struct page *pg, unsigned long memtype) { + unsigned long memtype_flags = _PGMT_DEFAULT; + unsigned long old_flags; + unsigned long new_flags; + switch (memtype) { case _PAGE_CACHE_WC: - ClearPageUncached(pg); - SetPageWC(pg); + memtype_flags = _PGMT_WC; break; case _PAGE_CACHE_UC_MINUS: - SetPageUncached(pg); - ClearPageWC(pg); + memtype_flags = _PGMT_UC_MINUS; break; case _PAGE_CACHE_WB: - SetPageUncached(pg); - SetPageWC(pg); - break; - default: - case -1: - ClearPageUncached(pg); - ClearPageWC(pg); + memtype_flags = _PGMT_WB; break; } + + do { + old_flags = pg->flags; + new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; + } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); } #else static inline unsigned long get_page_memtype(struct page *pg) { return -1; } -- cgit v1.2.3 From 402af0d7c692ddcfa2333e93d3f275ebd0487926 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 21 Apr 2010 15:21:51 +0100 Subject: x86, asm: Introduce and use percpu_inc() ... generating slightly smaller code. Signed-off-by: Jan Beulich LKML-Reference: <4BCF261F020000780003B33C@vpn.id2.novell.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hardirq.h | 2 +- arch/x86/include/asm/percpu.h | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f8576427cfe..aeab29aee617 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -35,7 +35,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT -#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) +#define inc_irq_stat(member) percpu_inc(irq_stat.member) #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 66a272dfd8b8..0ec6d12d84e6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -190,6 +190,29 @@ do { \ pfo_ret__; \ }) +#define percpu_unary_op(op, var) \ +({ \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b "__percpu_arg(0) \ + : "+m" (var)); \ + break; \ + case 2: \ + asm(op "w "__percpu_arg(0) \ + : "+m" (var)); \ + break; \ + case 4: \ + asm(op "l "__percpu_arg(0) \ + : "+m" (var)); \ + break; \ + case 8: \ + asm(op "q "__percpu_arg(0) \ + : "+m" (var)); \ + break; \ + default: __bad_percpu_size(); \ + } \ +}) + /* * percpu_read() makes gcc load the percpu variable every time it is * accessed while percpu_read_stable() allows the value to be cached. @@ -207,6 +230,7 @@ do { \ #define percpu_and(var, val) percpu_to_op("and", var, val) #define percpu_or(var, val) percpu_to_op("or", var, val) #define percpu_xor(var, val) percpu_to_op("xor", var, val) +#define percpu_inc(var) percpu_unary_op("inc", var) #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -- cgit v1.2.3 From 5967ed87ade85a421ef814296c3c7f182b08c225 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 21 Apr 2010 16:08:14 +0100 Subject: x86-64: Reduce SMP locks table size Reduce the SMP locks table size by using relative pointers instead of absolute ones, thus cutting the table size by half. Signed-off-by: Jan Beulich LKML-Reference: <4BCF30FE020000780003B3B6@vpn.id2.novell.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative-asm.h | 4 ++-- arch/x86/include/asm/alternative.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index b97f786a48d5..a63a68be1cce 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -6,8 +6,8 @@ .macro LOCK_PREFIX 1: lock .section .smp_locks,"a" - _ASM_ALIGN - _ASM_PTR 1b + .balign 4 + .long 1b - . .previous .endm #else diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index b09ec55650b3..714bf2417284 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -30,8 +30,8 @@ #ifdef CONFIG_SMP #define LOCK_PREFIX \ ".section .smp_locks,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661f\n" /* address */ \ + ".balign 4\n" \ + ".long 661f - .\n" /* offset */ \ ".previous\n" \ "661:\n\tlock; " -- cgit v1.2.3 From 6fc108a08dcddf8f9113cc7102ddaacf7ed37a6b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 21 Apr 2010 15:23:44 +0100 Subject: x86: Clean up arch/x86/Kconfig* No functional change intended. Signed-off-by: Jan Beulich LKML-Reference: <4BCF2690020000780003B340@vpn.id2.novell.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/boot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 7a1065958ba9..3b62ab56c7a0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -24,7 +24,7 @@ #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ - (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) + (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) #error "Invalid value for CONFIG_PHYSICAL_ALIGN" #endif -- cgit v1.2.3 From b701a47ba48b698976fb2fe05fb285b0edc1d26a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 29 Apr 2010 16:03:57 -0700 Subject: x86: Fix LOCK_PREFIX_HERE for uniprocessor build Checkin b3ac891b67bd4b1fc728d1c784cad1212dea433d: x86: Add support for lock prefix in alternatives ... did not define LOCK_PREFIX_HERE in the case of a uniprocessor build. As a result, it would cause any of the usages of this macro to fail on a uniprocessor build. Fix this by defining LOCK_PREFIX_HERE as a null string. Signed-off-by: H. Peter Anvin Cc: Luca Barbieri LKML-Reference: <1267005265-27958-2-git-send-email-luca@luca-barbieri.com> --- arch/x86/include/asm/alternative.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 55fee12cea6d..e29a6c9bba00 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -38,6 +38,7 @@ #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " #else /* ! CONFIG_SMP */ +#define LOCK_PREFIX_HERE "" #define LOCK_PREFIX "" #endif -- cgit v1.2.3 From b2812d031dea86926e9c10f7714af33ac2f6b43d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Apr 2010 18:11:53 +0200 Subject: hw-breakpoints: Change/Enforce some breakpoints policies The current policies of breakpoints in x86 and SH are the following: - task bound breakpoints can only break on userspace addresses - cpu wide breakpoints can only break on kernel addresses The former rule prevents ptrace breakpoints to be set to trigger on kernel addresses, which is good. But as a side effect, we can't breakpoint on kernel addresses for task bound breakpoints. The latter rule simply makes no sense, there is no reason why we can't set breakpoints on userspace while performing cpu bound profiles. We want the following new policies: - task bound breakpoint can set userspace address breakpoints, with no particular privilege required. - task bound breakpoints can set kernelspace address breakpoints but must be privileged to do that. - cpu bound breakpoints can do what they want as they are privileged already. To implement these new policies, this patch checks if we are dealing with a kernel address breakpoint, if so and if the exclude_kernel parameter is set, we tell the user that the breakpoint is invalid, which makes a good generic ptrace protection. If we don't have exclude_kernel, ensure the user has the right privileges as kernel breakpoints are quite sensitive (risk of trap recursion attacks and global performance impacts). [ Paul Mundt: keep addr space check for sh signal delivery and fix double function declaration] Signed-off-by: Frederic Weisbecker Cc: Will Deacon Cc: Mahesh Salgaonkar Cc: K. Prasad Cc: Paul Mundt Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Jason Wessel Cc: Ingo Molnar Signed-off-by: Paul Mundt --- arch/x86/include/asm/hw_breakpoint.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 2a1bd8f4f23a..c77a5a6fab9d 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -44,9 +44,8 @@ struct arch_hw_breakpoint { struct perf_event; struct pmu; -extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp, - struct task_struct *tsk); +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); -- cgit v1.2.3 From feef47d0cb530e8419dfa0b48141b538b89b1b1a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 23 Apr 2010 05:59:55 +0200 Subject: hw-breakpoints: Get the number of available registers on boot dynamically The breakpoint generic layer assumes that archs always know in advance the static number of address registers available to host breakpoints through the HBP_NUM macro. However this is not true for every archs. For example Arm needs to get this information dynamically to handle the compatiblity between different versions. To solve this, this patch proposes to drop the static HBP_NUM macro and let the arch provide the number of available slots through a new hw_breakpoint_slots() function. For archs that have CONFIG_HAVE_MIXED_BREAKPOINTS_REGS selected, it will be called once as the number of registers fits for instruction and data breakpoints together. For the others it will be called first to get the number of instruction breakpoint registers and another time to get the data breakpoint registers, the targeted type is given as a parameter of hw_breakpoint_slots(). Reported-by: Will Deacon Signed-off-by: Frederic Weisbecker Acked-by: Paul Mundt Cc: Mahesh Salgaonkar Cc: K. Prasad Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Jason Wessel Cc: Ingo Molnar --- arch/x86/include/asm/hw_breakpoint.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index c77a5a6fab9d..942255310e6a 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -41,6 +41,11 @@ struct arch_hw_breakpoint { /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 +static inline int hw_breakpoint_slots(int type) +{ + return HBP_NUM; +} + struct perf_event; struct pmu; -- cgit v1.2.3 From 9b6dba9e0798325dab427b9d60c61630ccc39b28 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 21 Mar 2010 09:00:44 -0400 Subject: x86: Merge simd_math_error() into math_error() The only difference between FPU and SIMD exceptions is where the status bits are read from (cwd/swd vs. mxcsr). This also fixes the discrepency introduced by commit adf77bac, which fixed FPU but not SIMD. Signed-off-by: Brian Gerst LKML-Reference: <1269176446-2489-3-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/traps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 4da91ad69e0d..f66cda56781d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -79,7 +79,7 @@ static inline int get_si_code(unsigned long condition) extern int panic_on_unrecovered_nmi; -void math_error(void __user *); +void math_error(struct pt_regs *, int, int); void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 asmlinkage void smp_thermal_interrupt(void); -- cgit v1.2.3 From d88d95eb1c2a72b6126a550debe0883ff723a948 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 24 Apr 2010 09:56:53 +0200 Subject: x86, k8: Fix build error when K8_NB is disabled K8_NB depends on PCI and when the last is disabled (allnoconfig) we fail at the final linking stage due to missing exported num_k8_northbridges. Add a header stub for that. Signed-off-by: Borislav Petkov LKML-Reference: <20100503183036.GJ26107@aftab> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/k8.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index f70e60071fe8..af00bd1d2089 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); extern int k8_scan_nodes(void); #ifdef CONFIG_K8_NB +extern int num_k8_northbridges; + static inline struct pci_dev *node_to_k8_nb_misc(int node) { return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; } + #else +#define num_k8_northbridges 0 + static inline struct pci_dev *node_to_k8_nb_misc(int node) { return NULL; -- cgit v1.2.3 From 9638fa521e42c9281c874c6b5a382b1ced4ee496 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 01:07:07 -0700 Subject: x86, ioapic: Only export mp_find_ioapic and mp_find_ioapic_pin in io_apic.h Multiple declarations of the same function in different headers is a pain to maintain. Signed-off-by: Eric W. Biederman LKML-Reference: <1269936436-7039-6-git-send-email-ebiederm@xmission.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mpspec.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index d8bf23a88d05..29994f06c7e2 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -106,10 +106,6 @@ struct device; extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, int active_high_low); extern int acpi_probe_gsi(void); -#ifdef CONFIG_X86_IO_APIC -extern int mp_find_ioapic(int gsi); -extern int mp_find_ioapic_pin(int ioapic, int gsi); -#endif #else /* !CONFIG_ACPI: */ static inline int acpi_probe_gsi(void) { -- cgit v1.2.3 From eddb0c55a14074d6bac8c2ef169aefd7e2c6f139 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 01:07:09 -0700 Subject: x86, ioapic: Fix the types of gsi values This patches fixes the types of gsi_base and gsi_end values in struct mp_ioapic_gsi, and the gsi parameter of mp_find_ioapic and mp_find_ioapic_pin A gsi is cannonically a u32, not an int. Signed-off-by: Eric W. Biederman LKML-Reference: <1269936436-7039-8-git-send-email-ebiederm@xmission.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 35832a03a515..feeaf0d92460 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -180,12 +180,12 @@ extern void ioapic_write_entry(int apic, int pin, extern void setup_ioapic_ids_from_mpc(void); struct mp_ioapic_gsi{ - int gsi_base; - int gsi_end; + u32 gsi_base; + u32 gsi_end; }; extern struct mp_ioapic_gsi mp_gsi_routing[]; -int mp_find_ioapic(int gsi); -int mp_find_ioapic_pin(int ioapic, int gsi); +int mp_find_ioapic(u32 gsi); +int mp_find_ioapic_pin(int ioapic, u32 gsi); void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); extern void __init pre_init_apic_IRQ0(void); @@ -197,7 +197,7 @@ static const int timer_through_8259 = 0; static inline void ioapic_init_mappings(void) { } static inline void ioapic_insert_resources(void) { } static inline void probe_nr_irqs_gsi(void) { } -static inline int mp_find_ioapic(int gsi) { return 0; } +static inline int mp_find_ioapic(u32 gsi) { return 0; } struct io_apic_irq_attr; static inline int io_apic_set_pci_routing(struct device *dev, int irq, -- cgit v1.2.3 From 5777372af5c929b8f3c706ed7b295b7279537c88 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 01:07:10 -0700 Subject: x86, ioapic: Teach mp_register_ioapic to compute a global gsi_end Add the global variable gsi_end and teach mp_register_ioapic to keep it uptodate as we add more ioapics into the system. ioapics can only be added early in boot so the code that runs later can treat gsi_end as a constant. Remove the have hacks in sfi.c to second guess mp_register_ioapic by keeping t's own running total of how many gsi's have been seen, and instead use the gsi_end. Signed-off-by: Eric W. Biederman LKML-Reference: <1269936436-7039-9-git-send-email-ebiederm@xmission.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index feeaf0d92460..37b0f2bb5034 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -184,6 +184,7 @@ struct mp_ioapic_gsi{ u32 gsi_end; }; extern struct mp_ioapic_gsi mp_gsi_routing[]; +extern u32 gsi_end; int mp_find_ioapic(u32 gsi); int mp_find_ioapic_pin(int ioapic, u32 gsi); void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); -- cgit v1.2.3 From 4afc51a835d3aeba11c35090f524e05c84586d27 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 01:07:14 -0700 Subject: x86, ioapic: Simplify probe_nr_irqs_gsi. Use the global gsi_end value now that all ioapics have valid gsi numbers instead of a combination of acpi_probe_gsi and walking all of the ioapics and couting their number of entries by hand if acpi_probe_gsi gave us an answer we did not like. Signed-off-by: Eric W. Biederman LKML-Reference: <1269936436-7039-13-git-send-email-ebiederm@xmission.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mpspec.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 29994f06c7e2..c82868e9f905 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -105,12 +105,6 @@ extern void mp_config_acpi_legacy_irqs(void); struct device; extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, int active_high_low); -extern int acpi_probe_gsi(void); -#else /* !CONFIG_ACPI: */ -static inline int acpi_probe_gsi(void) -{ - return 0; -} #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) -- cgit v1.2.3 From 7b20bd5fb902088579af4e70f7f802b93181a628 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 01:07:16 -0700 Subject: x86, irq: Kill io_apic_renumber_irq Now that the generic irq layer is performing the exact same remapping as io_apic_renumber_irq we can kill this weird es7000 specific function. Signed-off-by: Eric W. Biederman LKML-Reference: <1269936436-7039-15-git-send-email-ebiederm@xmission.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 37b0f2bb5034..9da192a17f0f 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -159,7 +159,6 @@ struct io_apic_irq_attr; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); void setup_IO_APIC_irq_extra(u32 gsi); -extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); extern void ioapic_insert_resources(void); -- cgit v1.2.3 From 4f47b4c9f0b711bf84adb8c27774ae80d346b628 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 May 2010 13:22:25 -0700 Subject: x86, acpi/irq: Define gsi_end when X86_IO_APIC is undefined My recent changes introducing a global gsi_end variable failed to take into account the case of using acpi on a system not built to support IO_APICs, causing the build to fail. Define gsi_end to 15 when CONFIG_X86_IO_APIC is not set to avoid compile errors. Signed-off-by: Eric W. Biederman Cc: Yinghai Lu LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9da192a17f0f..63cb4096c3dc 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -197,6 +197,7 @@ static const int timer_through_8259 = 0; static inline void ioapic_init_mappings(void) { } static inline void ioapic_insert_resources(void) { } static inline void probe_nr_irqs_gsi(void) { } +#define gsi_end (NR_IRQS_LEGACY - 1) static inline int mp_find_ioapic(u32 gsi) { return 0; } struct io_apic_irq_attr; -- cgit v1.2.3 From a2a47c6c3d1a7c01da4464b3b7be93b924f874c1 Mon Sep 17 00:00:00 2001 From: Ky Srinivasan Date: Thu, 6 May 2010 12:08:41 -0700 Subject: x86: Detect running on a Microsoft HyperV system This patch integrates HyperV detection within the framework currently used by VmWare. With this patch, we can avoid having to replicate the HyperV detection code in each of the Microsoft HyperV drivers. Reworked and tweaked by Greg K-H to build properly. Signed-off-by: K. Y. Srinivasan LKML-Reference: <20100506190841.GA1605@kroah.com> Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Vadim Rozenfeld Cc: Avi Kivity Cc: Gleb Natapov Cc: Frederic Weisbecker Cc: Alexey Dobriyan Cc: "K.Prasad" Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Paul Mackerras Cc: Alan Cox Cc: Haiyang Zhang Cc: Hank Janssen Signed-off-by: Greg Kroah-Hartman Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hyperv.h | 6 ++++++ arch/x86/include/asm/mshyperv.h | 7 +++++++ arch/x86/include/asm/processor.h | 3 +++ 3 files changed, 16 insertions(+) create mode 100644 arch/x86/include/asm/mshyperv.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h index e153a2b3889a..46040473e122 100644 --- a/arch/x86/include/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv.h @@ -14,6 +14,9 @@ #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 +#define HYPERV_CPUID_MIN 0x40000005 + /* * Feature identification. EAX indicates which features are available * to the partition based upon the current partition privileges. @@ -129,6 +132,9 @@ /* MSR used to provide vcpu index */ #define HV_X64_MSR_VP_INDEX 0x40000002 +/* MSR used to read the per-partition time reference counter */ +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 + /* Define the virtual APIC registers */ #define HV_X64_MSR_EOI 0x40000070 #define HV_X64_MSR_ICR 0x40000071 diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h new file mode 100644 index 000000000000..6cd8101d1344 --- /dev/null +++ b/arch/x86/include/asm/mshyperv.h @@ -0,0 +1,7 @@ +#ifndef ASM_X86__MSHYPER_H +#define ASM_X86__MSHYPER_H + +int ms_hyperv_platform(void); +void __cpuinit ms_hyperv_set_feature_bits(struct cpuinfo_x86 *c); + +#endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea59703a..597c041bd124 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -114,6 +114,8 @@ struct cpuinfo_x86 { u16 cpu_index; #endif unsigned int x86_hyper_vendor; + /* The layout of this field is hypervisor specific */ + unsigned int x86_hyper_features; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 @@ -129,6 +131,7 @@ struct cpuinfo_x86 { #define X86_HYPER_VENDOR_NONE 0 #define X86_HYPER_VENDOR_VMWARE 1 +#define X86_HYPER_VENDOR_MSFT 2 /* * capabilities of CPUs -- cgit v1.2.3 From e08cae4181af9483b04ecfac48f01c8e5a5f27bf Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 7 May 2010 16:57:28 -0700 Subject: x86: Clean up the hypervisor layer Clean up the hypervisor layer and the hypervisor drivers, using an ops structure instead of an enumeration with if statements. The identity of the hypervisor, if needed, can be tested by testing the pointer value in x86_hyper. The MS-HyperV private state is moved into a normal global variable (it's per-system state, not per-CPU state). Being a normal bss variable, it will be left at all zero on non-HyperV platforms, and so can generally be tested for HyperV-specific features without additional qualification. Signed-off-by: H. Peter Anvin Acked-by: Greg KH Cc: Hank Janssen Cc: Alok Kataria Cc: Ky Srinivasan LKML-Reference: <4BE49778.6060800@zytor.com> --- arch/x86/include/asm/hyperv.h | 5 +++-- arch/x86/include/asm/hypervisor.h | 27 +++++++++++++++++++++++++-- arch/x86/include/asm/mshyperv.h | 15 +++++++++++---- arch/x86/include/asm/processor.h | 7 ------- arch/x86/include/asm/vmware.h | 27 --------------------------- 5 files changed, 39 insertions(+), 42 deletions(-) delete mode 100644 arch/x86/include/asm/vmware.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h index 46040473e122..5df477ac3af7 100644 --- a/arch/x86/include/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_KVM_HYPERV_H -#define _ASM_X86_KVM_HYPERV_H +#ifndef _ASM_X86_HYPERV_H +#define _ASM_X86_HYPERV_H #include @@ -16,6 +16,7 @@ #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 #define HYPERV_CPUID_MIN 0x40000005 +#define HYPERV_CPUID_MAX 0x4000ffff /* * Feature identification. EAX indicates which features are available diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index b78c0941e422..70abda7058c8 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -17,10 +17,33 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * */ -#ifndef ASM_X86__HYPERVISOR_H -#define ASM_X86__HYPERVISOR_H +#ifndef _ASM_X86_HYPERVISOR_H +#define _ASM_X86_HYPERVISOR_H extern void init_hypervisor(struct cpuinfo_x86 *c); extern void init_hypervisor_platform(void); +/* + * x86 hypervisor information + */ +struct hypervisor_x86 { + /* Hypervisor name */ + const char *name; + + /* Detection routine */ + bool (*detect)(void); + + /* Adjust CPU feature bits (run once per CPU) */ + void (*set_cpu_features)(struct cpuinfo_x86 *); + + /* Platform setup (run once per boot) */ + void (*init_platform)(void); +}; + +extern const struct hypervisor_x86 *x86_hyper; + +/* Recognized hypervisors */ +extern const struct hypervisor_x86 x86_hyper_vmware; +extern const struct hypervisor_x86 x86_hyper_ms_hyperv; + #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6cd8101d1344..79ce5685ab64 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -1,7 +1,14 @@ -#ifndef ASM_X86__MSHYPER_H -#define ASM_X86__MSHYPER_H +#ifndef _ASM_X86_MSHYPER_H +#define _ASM_X86_MSHYPER_H -int ms_hyperv_platform(void); -void __cpuinit ms_hyperv_set_feature_bits(struct cpuinfo_x86 *c); +#include +#include + +struct ms_hyperv_info { + u32 features; + u32 hints; +}; + +extern struct ms_hyperv_info ms_hyperv; #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 597c041bd124..e4f1dfb2d05b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -113,9 +113,6 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; #endif - unsigned int x86_hyper_vendor; - /* The layout of this field is hypervisor specific */ - unsigned int x86_hyper_features; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 @@ -129,10 +126,6 @@ struct cpuinfo_x86 { #define X86_VENDOR_UNKNOWN 0xff -#define X86_HYPER_VENDOR_NONE 0 -#define X86_HYPER_VENDOR_VMWARE 1 -#define X86_HYPER_VENDOR_MSFT 2 - /* * capabilities of CPUs */ diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h deleted file mode 100644 index e49ed6d2fd4e..000000000000 --- a/arch/x86/include/asm/vmware.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2008, VMware, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - */ -#ifndef ASM_X86__VMWARE_H -#define ASM_X86__VMWARE_H - -extern void vmware_platform_setup(void); -extern int vmware_platform(void); -extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); - -#endif -- cgit v1.2.3 From c9ad488289144ae5ef53b012e15895ef1f5e4bb6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 6 May 2010 11:45:45 +0300 Subject: x86: Eliminate TS_XSAVE The fpu code currently uses current->thread_info->status & TS_XSAVE as a way to distinguish between XSAVE capable processors and older processors. The decision is not really task specific; instead we use the task status to avoid a global memory reference - the value should be the same across all threads. Eliminate this tie-in into the task structure by using an alternative instruction keyed off the XSAVE cpu feature; this results in shorter and faster code, without introducing a global memory reference. [ hpa: in the future, this probably should use an asm jmp ] Signed-off-by: Avi Kivity Acked-by: Suresh Siddha LKML-Reference: <1273135546-29690-2-git-send-email-avi@redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 20 ++++++++++++++++---- arch/x86/include/asm/thread_info.h | 1 - 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index da2930924501..a301a6825c3a 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -56,6 +56,18 @@ extern int restore_i387_xstate_ia32(void __user *buf); #define X87_FSW_ES (1 << 7) /* Exception Summary */ +static inline bool use_xsave(void) +{ + u8 has_xsave; + + alternative_io("mov $0, %0", + "mov $1, %0", + X86_FEATURE_XSAVE, + "=g"(has_xsave)); + + return has_xsave; +} + #ifdef CONFIG_X86_64 /* Ignore delayed exceptions from user space */ @@ -99,7 +111,7 @@ static inline void clear_fpu_state(struct task_struct *tsk) /* * xsave header may indicate the init state of the FP. */ - if ((task_thread_info(tsk)->status & TS_XSAVE) && + if (use_xsave() && !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) return; @@ -164,7 +176,7 @@ static inline void fxsave(struct task_struct *tsk) static inline void __save_init_fpu(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_XSAVE) + if (use_xsave()) xsave(tsk); else fxsave(tsk); @@ -218,7 +230,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) */ static inline void __save_init_fpu(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_XSAVE) { + if (use_xsave()) { struct xsave_struct *xstate = &tsk->thread.xstate->xsave; struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; @@ -266,7 +278,7 @@ end: static inline int restore_fpu_checking(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_XSAVE) + if (use_xsave()) return xrstor_checking(&tsk->thread.xstate->xsave); else return fxrstor_checking(&tsk->thread.xstate->fxsave); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0d28901e969..e9e341505ab3 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -244,7 +244,6 @@ static inline struct thread_info *current_thread_info(void) #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ -#define TS_XSAVE 0x0010 /* Use xsave/xrstor */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) -- cgit v1.2.3 From 86603283326c9e95e5ad4e9fdddeec93cac5d9ad Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 6 May 2010 11:45:46 +0300 Subject: x86: Introduce 'struct fpu' and related API Currently all fpu state access is through tsk->thread.xstate. Since we wish to generalize fpu access to non-task contexts, wrap the state in a new 'struct fpu' and convert existing access to use an fpu API. Signal frame handlers are not converted to the API since they will remain task context only things. Signed-off-by: Avi Kivity Acked-by: Suresh Siddha LKML-Reference: <1273135546-29690-3-git-send-email-avi@redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 115 +++++++++++++++++++++++++++++---------- arch/x86/include/asm/processor.h | 6 +- arch/x86/include/asm/xsave.h | 7 ++- 3 files changed, 94 insertions(+), 34 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index a301a6825c3a..1a8cca33b736 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -103,10 +104,10 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) values. The kernel data segment can be sometimes 0 and sometimes new user value. Both should be ok. Use the PDA as safe address because it should be already in L1. */ -static inline void clear_fpu_state(struct task_struct *tsk) +static inline void fpu_clear(struct fpu *fpu) { - struct xsave_struct *xstate = &tsk->thread.xstate->xsave; - struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + struct xsave_struct *xstate = &fpu->state->xsave; + struct i387_fxsave_struct *fx = &fpu->state->fxsave; /* * xsave header may indicate the init state of the FP. @@ -123,6 +124,11 @@ static inline void clear_fpu_state(struct task_struct *tsk) X86_FEATURE_FXSAVE_LEAK); } +static inline void clear_fpu_state(struct task_struct *tsk) +{ + fpu_clear(&tsk->thread.fpu); +} + static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; @@ -147,7 +153,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) return err; } -static inline void fxsave(struct task_struct *tsk) +static inline void fpu_fxsave(struct fpu *fpu) { /* Using "rex64; fxsave %0" is broken because, if the memory operand uses any extended registers for addressing, a second REX prefix @@ -157,42 +163,45 @@ static inline void fxsave(struct task_struct *tsk) /* Using "fxsaveq %0" would be the ideal choice, but is only supported starting with gas 2.16. */ __asm__ __volatile__("fxsaveq %0" - : "=m" (tsk->thread.xstate->fxsave)); + : "=m" (fpu->state->fxsave)); #elif 0 /* Using, as a workaround, the properly prefixed form below isn't accepted by any binutils version so far released, complaining that the same type of prefix is used twice if an extended register is needed for addressing (fix submitted to mainline 2005-11-21). */ __asm__ __volatile__("rex64/fxsave %0" - : "=m" (tsk->thread.xstate->fxsave)); + : "=m" (fpu->state->fxsave)); #else /* This, however, we can work around by forcing the compiler to select an addressing mode that doesn't require extended registers. */ __asm__ __volatile__("rex64/fxsave (%1)" - : "=m" (tsk->thread.xstate->fxsave) - : "cdaSDb" (&tsk->thread.xstate->fxsave)); + : "=m" (fpu->state->fxsave) + : "cdaSDb" (&fpu->state->fxsave)); #endif } -static inline void __save_init_fpu(struct task_struct *tsk) +static inline void fpu_save_init(struct fpu *fpu) { if (use_xsave()) - xsave(tsk); + fpu_xsave(fpu); else - fxsave(tsk); + fpu_fxsave(fpu); - clear_fpu_state(tsk); + fpu_clear(fpu); +} + +static inline void __save_init_fpu(struct task_struct *tsk) +{ + fpu_save_init(&tsk->thread.fpu); task_thread_info(tsk)->status &= ~TS_USEDFPU; } #else /* CONFIG_X86_32 */ #ifdef CONFIG_MATH_EMULATION -extern void finit_task(struct task_struct *tsk); +extern void finit_soft_fpu(struct i387_soft_struct *soft); #else -static inline void finit_task(struct task_struct *tsk) -{ -} +static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif static inline void tolerant_fwait(void) @@ -228,13 +237,13 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) /* * These must be called with preempt disabled */ -static inline void __save_init_fpu(struct task_struct *tsk) +static inline void fpu_save_init(struct fpu *fpu) { if (use_xsave()) { - struct xsave_struct *xstate = &tsk->thread.xstate->xsave; - struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + struct xsave_struct *xstate = &fpu->state->xsave; + struct i387_fxsave_struct *fx = &fpu->state->fxsave; - xsave(tsk); + fpu_xsave(fpu); /* * xsave header may indicate the init state of the FP. @@ -258,8 +267,8 @@ static inline void __save_init_fpu(struct task_struct *tsk) "fxsave %[fx]\n" "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", X86_FEATURE_FXSR, - [fx] "m" (tsk->thread.xstate->fxsave), - [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); + [fx] "m" (fpu->state->fxsave), + [fsw] "m" (fpu->state->fxsave.swd) : "memory"); clear_state: /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed @@ -271,17 +280,34 @@ clear_state: X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); end: + ; +} + +static inline void __save_init_fpu(struct task_struct *tsk) +{ + fpu_save_init(&tsk->thread.fpu); task_thread_info(tsk)->status &= ~TS_USEDFPU; } + #endif /* CONFIG_X86_64 */ -static inline int restore_fpu_checking(struct task_struct *tsk) +static inline int fpu_fxrstor_checking(struct fpu *fpu) +{ + return fxrstor_checking(&fpu->state->fxsave); +} + +static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) - return xrstor_checking(&tsk->thread.xstate->xsave); + return fpu_xrstor_checking(fpu); else - return fxrstor_checking(&tsk->thread.xstate->fxsave); + return fpu_fxrstor_checking(fpu); +} + +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + return fpu_restore_checking(&tsk->thread.fpu); } /* @@ -409,30 +435,59 @@ static inline void clear_fpu(struct task_struct *tsk) static inline unsigned short get_fpu_cwd(struct task_struct *tsk) { if (cpu_has_fxsr) { - return tsk->thread.xstate->fxsave.cwd; + return tsk->thread.fpu.state->fxsave.cwd; } else { - return (unsigned short)tsk->thread.xstate->fsave.cwd; + return (unsigned short)tsk->thread.fpu.state->fsave.cwd; } } static inline unsigned short get_fpu_swd(struct task_struct *tsk) { if (cpu_has_fxsr) { - return tsk->thread.xstate->fxsave.swd; + return tsk->thread.fpu.state->fxsave.swd; } else { - return (unsigned short)tsk->thread.xstate->fsave.swd; + return (unsigned short)tsk->thread.fpu.state->fsave.swd; } } static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) { if (cpu_has_xmm) { - return tsk->thread.xstate->fxsave.mxcsr; + return tsk->thread.fpu.state->fxsave.mxcsr; } else { return MXCSR_DEFAULT; } } +static bool fpu_allocated(struct fpu *fpu) +{ + return fpu->state != NULL; +} + +static inline int fpu_alloc(struct fpu *fpu) +{ + if (fpu_allocated(fpu)) + return 0; + fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); + if (!fpu->state) + return -ENOMEM; + WARN_ON((unsigned long)fpu->state & 15); + return 0; +} + +static inline void fpu_free(struct fpu *fpu) +{ + if (fpu->state) { + kmem_cache_free(task_xstate_cachep, fpu->state); + fpu->state = NULL; + } +} + +static inline void fpu_copy(struct fpu *dst, struct fpu *src) +{ + memcpy(dst->state, src->state, xstate_size); +} + #endif /* __ASSEMBLY__ */ #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea59703a..b684f587647c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -380,6 +380,10 @@ union thread_xstate { struct xsave_struct xsave; }; +struct fpu { + union thread_xstate *state; +}; + #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); @@ -457,7 +461,7 @@ struct thread_struct { unsigned long trap_no; unsigned long error_code; /* floating point and extended processor state */ - union thread_xstate *xstate; + struct fpu fpu; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ struct vm86_struct __user *vm86_info; diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index ddc04ccad03b..2c4390cae228 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -37,8 +37,9 @@ extern int check_for_xstate(struct i387_fxsave_struct __user *buf, void __user *fpstate, struct _fpx_sw_bytes *sw); -static inline int xrstor_checking(struct xsave_struct *fx) +static inline int fpu_xrstor_checking(struct fpu *fpu) { + struct xsave_struct *fx = &fpu->state->xsave; int err; asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" @@ -110,12 +111,12 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask) : "memory"); } -static inline void xsave(struct task_struct *tsk) +static inline void fpu_xsave(struct fpu *fpu) { /* This, however, we can work around by forcing the compiler to select an addressing mode that doesn't require extended registers. */ __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" - : : "D" (&(tsk->thread.xstate->xsave)), + : : "D" (&(fpu->state->xsave)), "a" (-1), "d"(-1) : "memory"); } #endif -- cgit v1.2.3 From dce8bf4e115aa44d590802ce3554e926840c9042 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 10 May 2010 13:41:41 -0700 Subject: x86, fpu: Use the proper asm constraint in use_xsave() The proper constraint for a receiving 8-bit variable is "=qm", not "=g" which equals "=rim"; even though the "i" will never match, bugs can and do happen due to the difference between "q" and "r". Signed-off-by: H. Peter Anvin Cc: Avi Kivity Cc: Suresh Siddha LKML-Reference: <1273135546-29690-2-git-send-email-avi@redhat.com> --- arch/x86/include/asm/i387.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 1a8cca33b736..8002e9ce25fc 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -64,7 +64,7 @@ static inline bool use_xsave(void) alternative_io("mov $0, %0", "mov $1, %0", X86_FEATURE_XSAVE, - "=g"(has_xsave)); + "=qm" (has_xsave)); return has_xsave; } -- cgit v1.2.3 From a3c8acd04376d604370dcb6cd2143c9c14078a50 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 11 May 2010 17:47:07 -0700 Subject: x86: Add new static_cpu_has() function using alternatives For CPU-feature-specific code that touches performance-critical paths, introduce a static patching version of [boot_]cpu_has(). This is run at alternatives time and is therefore not appropriate for most initialization code, but on the other hand initialization code is generally not performance critical. On gcc 4.5+ this uses the new "asm goto" feature. Signed-off-by: H. Peter Anvin Cc: Avi Kivity Cc: Suresh Siddha LKML-Reference: <1273135546-29690-2-git-send-email-avi@redhat.com> --- arch/x86/include/asm/cpufeature.h | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0cd82d068613..9b11a5cc6662 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -175,6 +175,7 @@ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#include #include extern const char * const x86_cap_flags[NCAPINTS*32]; @@ -283,6 +284,62 @@ extern const char * const x86_power_flags[32]; #endif /* CONFIG_X86_64 */ +/* + * Static testing of CPU features. Used the same as boot_cpu_has(). + * These are only valid after alternatives have run, but will statically + * patch the target code for additional performance. + * + */ +static __always_inline __pure bool __static_cpu_has(u8 bit) +{ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) + asm goto("1: jmp %l[t_no]\n" + "2:\n" + ".section .altinstructions,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b\n" + _ASM_PTR "0\n" /* no replacement */ + " .byte %P0\n" /* feature bit */ + " .byte 2b - 1b\n" /* source len */ + " .byte 0\n" /* replacement len */ + " .byte 0xff + 0 - (2b-1b)\n" /* padding */ + ".previous\n" + : : "i" (bit) : : t_no); + return true; + t_no: + return false; +#else + u8 flag; + /* Open-coded due to __stringify() in ALTERNATIVE() */ + asm volatile("1: movb $0,%0\n" + "2:\n" + ".section .altinstructions,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b\n" + _ASM_PTR "3f\n" + " .byte %P1\n" /* feature bit */ + " .byte 2b - 1b\n" /* source len */ + " .byte 4f - 3f\n" /* replacement len */ + " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */ + ".previous\n" + ".section .altinstr_replacement,\"ax\"\n" + "3: movb $1,%0\n" + "4:\n" + ".previous\n" + : "=qm" (flag) : "i" (bit)); + return flag; +#endif +} + +#define static_cpu_has(bit) \ +( \ + __builtin_constant_p(boot_cpu_has(bit)) ? \ + boot_cpu_has(bit) : \ + (__builtin_constant_p(bit) && !((bit) & ~0xff)) ? \ + __static_cpu_has(bit) : \ + boot_cpu_has(bit) \ +) + #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ #endif /* _ASM_X86_CPUFEATURE_H */ -- cgit v1.2.3 From c9775b4cc522e5f1b40b1366a993f0f05f600f39 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 11 May 2010 17:49:54 -0700 Subject: x86, fpu: Use static_cpu_has() to implement use_xsave() use_xsave() is now just a special case of static_cpu_has(), so use static_cpu_has(). Signed-off-by: H. Peter Anvin Cc: Avi Kivity Cc: Suresh Siddha LKML-Reference: <1273135546-29690-2-git-send-email-avi@redhat.com> --- arch/x86/include/asm/i387.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 8002e9ce25fc..c991b3a7b904 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -57,16 +58,9 @@ extern int restore_i387_xstate_ia32(void __user *buf); #define X87_FSW_ES (1 << 7) /* Exception Summary */ -static inline bool use_xsave(void) +static __always_inline __pure bool use_xsave(void) { - u8 has_xsave; - - alternative_io("mov $0, %0", - "mov $1, %0", - X86_FEATURE_XSAVE, - "=qm" (has_xsave)); - - return has_xsave; + return static_cpu_has(X86_FEATURE_XSAVE); } #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 9e565292270a2d55524be38835104c564ac8f795 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 13 May 2010 21:43:03 -0700 Subject: x86: Use .cfi_sections for assembly code The newer assemblers support the .cfi_sections directive so we can put the CFI from .S files into the .debug_frame section that is preserved in unstripped vmlinux and in separate debuginfo, rather than the .eh_frame section that is now discarded by vmlinux.lds.S. Signed-off-by: Roland McGrath LKML-Reference: <20100514044303.A6FE7400BE@magilla.sf.frob.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/dwarf2.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index ae6253ab9029..733f7e91e7a9 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -34,6 +34,18 @@ #define CFI_SIGNAL_FRAME #endif +#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) + /* + * Emit CFI data in .debug_frame sections, not .eh_frame sections. + * The latter we currently just discard since we don't do DWARF + * unwinding at runtime. So only the offline DWARF information is + * useful to anyone. Note we should not use this directive if this + * file is used in the vDSO assembly, or if vmlinux.lds.S gets + * changed so it doesn't discard .eh_frame. + */ + .cfi_sections .debug_frame +#endif + #else /* -- cgit v1.2.3 From ade029e2aaacc8965a548b0b0f80c5bee97ffc68 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 24 Apr 2010 09:56:53 +0200 Subject: x86, k8: Fix build error when K8_NB is disabled K8_NB depends on PCI and when the last is disabled (allnoconfig) we fail at the final linking stage due to missing exported num_k8_northbridges. Add a header stub for that. Signed-off-by: Borislav Petkov LKML-Reference: <20100503183036.GJ26107@aftab> Signed-off-by: H. Peter Anvin Cc: --- arch/x86/include/asm/k8.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index f70e60071fe8..af00bd1d2089 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); extern int k8_scan_nodes(void); #ifdef CONFIG_K8_NB +extern int num_k8_northbridges; + static inline struct pci_dev *node_to_k8_nb_misc(int node) { return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; } + #else +#define num_k8_northbridges 0 + static inline struct pci_dev *node_to_k8_nb_misc(int node) { return NULL; -- cgit v1.2.3 From f3d46f9d3194e0329216002a8724d4c0957abc79 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 17 May 2010 14:33:53 +1000 Subject: atomic_t: Cast to volatile when accessing atomic variables In preparation for removing volatile from the atomic_t definition, this patch adds a volatile cast to all the atomic read functions. Signed-off-by: Anton Blanchard Signed-off-by: Linus Torvalds --- arch/x86/include/asm/atomic.h | 2 +- arch/x86/include/asm/atomic64_64.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 8f8217b9bdac..37b39d27abe0 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -22,7 +22,7 @@ */ static inline int atomic_read(const atomic_t *v) { - return v->counter; + return (*(volatile int *)&(v)->counter); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 51c5b4056929..b014e235ea8d 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -18,7 +18,7 @@ */ static inline long atomic64_read(const atomic64_t *v) { - return v->counter; + return (*(volatile long *)&(v)->counter); } /** -- cgit v1.2.3 From c59bd5688299cddb71183e156e7a3c1409b90df2 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 17 May 2010 15:13:23 -0700 Subject: x86, hweight: Use a 32-bit popcnt for __arch_hweight32() Use a 32-bit popcnt instruction for __arch_hweight32(), even on x86-64. Even though the input register will *usually* be zero-extended due to the standard operation of the hardware, it isn't necessarily so if the input value was the result of truncating a 64-bit operation. Note: the POPCNT32 variant used on x86-64 has a technically unnecessary REX prefix to make it five bytes long, the same as a CALL instruction, therefore avoiding an unnecessary NOP. Reported-by: Linus Torvalds Signed-off-by: H. Peter Anvin Cc: Borislav Petkov LKML-Reference: --- arch/x86/include/asm/arch_hweight.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index d1fc3c219ae6..9686c3d9ff73 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -2,13 +2,15 @@ #define _ASM_X86_HWEIGHT_H #ifdef CONFIG_64BIT +/* popcnt %edi, %eax -- redundant REX prefix for alignment */ +#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" /* popcnt %rdi, %rax */ -#define POPCNT ".byte 0xf3,0x48,0x0f,0xb8,0xc7" +#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" #define REG_IN "D" #define REG_OUT "a" #else /* popcnt %eax, %eax */ -#define POPCNT ".byte 0xf3,0x0f,0xb8,0xc0" +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" #define REG_IN "a" #define REG_OUT "a" #endif @@ -23,7 +25,7 @@ static inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res = 0; - asm (ALTERNATIVE("call __sw_hweight32", POPCNT, X86_FEATURE_POPCNT) + asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); @@ -48,7 +50,7 @@ static inline unsigned long __arch_hweight64(__u64 w) return __arch_hweight32((u32)w) + __arch_hweight32((u32)(w >> 32)); #else - asm (ALTERNATIVE("call __sw_hweight64", POPCNT, X86_FEATURE_POPCNT) + asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); #endif /* CONFIG_X86_32 */ -- cgit v1.2.3