diff options
Diffstat (limited to 'arch')
140 files changed, 2215 insertions, 1392 deletions
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h index 945de222ab91..e8a761aee088 100644 --- a/arch/alpha/include/asm/futex.h +++ b/arch/alpha/include/asm/futex.h @@ -29,7 +29,7 @@ : "r" (uaddr), "r"(oparg) \ : "memory") -static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -39,7 +39,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -81,21 +81,23 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - int prev, cmp; + int ret = 0, cmp; + u32 prev; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; __asm__ __volatile__ ( __ASM_SMP_MB - "1: ldl_l %0,0(%2)\n" - " cmpeq %0,%3,%1\n" - " beq %1,3f\n" - " mov %4,%1\n" - "2: stl_c %1,0(%2)\n" - " beq %1,4f\n" + "1: ldl_l %1,0(%3)\n" + " cmpeq %1,%4,%2\n" + " beq %2,3f\n" + " mov %5,%2\n" + "2: stl_c %2,0(%3)\n" + " beq %2,4f\n" "3: .subsection 2\n" "4: br 1b\n" " .previous\n" @@ -105,11 +107,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) " .long 2b-.\n" " lda $31,3b-2b(%0)\n" " .previous\n" - : "=&r"(prev), "=&r"(cmp) + : "+r"(ret), "=&r"(prev), "=&r"(cmp) : "r"(uaddr), "r"((long)oldval), "r"(newval) : "memory"); - return prev; + *uval = prev; + return ret; } #endif /* __KERNEL__ */ diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 1570c0b54336..a83bbea62c67 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -13,44 +13,13 @@ #ifdef __KERNEL__ #include <linux/compiler.h> -#include <linux/list.h> -#include <linux/spinlock.h> -struct rwsem_waiter; - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -/* - * the semaphore definition - */ -struct rw_semaphore { - long count; #define RWSEM_UNLOCKED_VALUE 0x0000000000000000L #define RWSEM_ACTIVE_BIAS 0x0000000000000001L #define RWSEM_ACTIVE_MASK 0x00000000ffffffffL #define RWSEM_WAITING_BIAS (-0x0000000100000000L) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - spinlock_t wait_lock; - struct list_head wait_list; -}; - -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} static inline void __down_read(struct rw_semaphore *sem) { @@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) #endif } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index fe698b5045e9..376f22130791 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -230,44 +230,24 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0; } -static int -do_osf_statfs(struct path *path, struct osf_statfs __user *buffer, - unsigned long bufsiz) +SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname, + struct osf_statfs __user *, buffer, unsigned long, bufsiz) { struct kstatfs linux_stat; - int error = vfs_statfs(path, &linux_stat); + int error = user_statfs(pathname, &linux_stat); if (!error) error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); return error; } -SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname, - struct osf_statfs __user *, buffer, unsigned long, bufsiz) -{ - struct path path; - int retval; - - retval = user_path(pathname, &path); - if (!retval) { - retval = do_osf_statfs(&path, buffer, bufsiz); - path_put(&path); - } - return retval; -} - SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, struct osf_statfs __user *, buffer, unsigned long, bufsiz) { - struct file *file; - int retval; - - retval = -EBADF; - file = fget(fd); - if (file) { - retval = do_osf_statfs(&file->f_path, buffer, bufsiz); - fput(file); - } - return retval; + struct kstatfs linux_stat; + int error = fd_statfs(fd, &linux_stat); + if (!error) + error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); + return error; } /* diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index f6c108a3d673..8c13a0c77830 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -149,6 +149,7 @@ static int titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force) { + unsigned int irq = d->irq; spin_lock(&titan_irq_lock); titan_cpu_set_irq_affinity(irq - 16, *affinity); titan_update_irq_hw(titan_cached_irq_mask); diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index c1f3e7cb82a4..a58e84f1a63b 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -159,7 +159,7 @@ void read_persistent_clock(struct timespec *ts) /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ irqreturn_t timer_interrupt(int irq, void *dev) { @@ -172,8 +172,6 @@ irqreturn_t timer_interrupt(int irq, void *dev) profile_tick(CPU_PROFILING); #endif - write_seqlock(&xtime_lock); - /* * Calculate how many ticks have passed since the last update, * including any previous partial leftover. Save any resulting @@ -187,9 +185,7 @@ irqreturn_t timer_interrupt(int irq, void *dev) nticks = delta >> FIX_SHIFT; if (nticks) - do_timer(nticks); - - write_sequnlock(&xtime_lock); + xtime_update(nticks); if (test_irq_work_pending()) { clear_irq_work_pending(); diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index b33fe7065b38..199a6b6de7f4 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -35,7 +35,7 @@ : "cc", "memory") static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -46,7 +46,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); /* implies preempt_disable() */ @@ -88,36 +88,35 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - int val; + int ret = 0; + u32 val; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - pagefault_disable(); /* implies preempt_disable() */ - __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" - "1: " T(ldr) " %0, [%3]\n" - " teq %0, %1\n" + "1: " T(ldr) " %1, [%4]\n" + " teq %1, %2\n" " it eq @ explicit IT needed for the 2b label\n" - "2: " T(streq) " %2, [%3]\n" + "2: " T(streq) " %3, [%4]\n" "3:\n" " .pushsection __ex_table,\"a\"\n" " .align 3\n" " .long 1b, 4f, 2b, 4f\n" " .popsection\n" " .pushsection .fixup,\"ax\"\n" - "4: mov %0, %4\n" + "4: mov %0, %5\n" " b 3b\n" " .popsection" - : "=&r" (val) + : "+r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) : "cc", "memory"); - pagefault_enable(); /* subsumes preempt_enable() */ - - return val; + *uval = val; + return ret; } #endif /* !SMP */ diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 3d76bf233734..1ff46cabc7ef 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -107,9 +107,7 @@ void timer_tick(void) { profile_tick(CPU_PROFILING); do_leds(); - write_seqlock(&xtime_lock); - do_timer(1); - write_sequnlock(&xtime_lock); + xtime_update(1); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); #endif diff --git a/arch/arm/mach-clps711x/include/mach/time.h b/arch/arm/mach-clps711x/include/mach/time.h index 8fe283ccd1f3..61fef9129c6a 100644 --- a/arch/arm/mach-clps711x/include/mach/time.h +++ b/arch/arm/mach-clps711x/include/mach/time.h @@ -30,7 +30,7 @@ p720t_timer_interrupt(int irq, void *dev_id) { struct pt_regs *regs = get_irq_regs(); do_leds(); - do_timer(1); + xtime_update(1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c index c9113619029f..8d73724c0092 100644 --- a/arch/blackfin/kernel/time.c +++ b/arch/blackfin/kernel/time.c @@ -114,16 +114,14 @@ u32 arch_gettimeoffset(void) /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ #ifdef CONFIG_CORE_TIMER_IRQ_L1 __attribute__((l1_text)) #endif irqreturn_t timer_interrupt(int irq, void *dummy) { - write_seqlock(&xtime_lock); - do_timer(1); - write_sequnlock(&xtime_lock); + xtime_update(1); #ifdef CONFIG_IPIPE update_root_process_times(get_irq_regs()); diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c index 00eb36f8debf..20c85b5dc7d0 100644 --- a/arch/cris/arch-v10/kernel/time.c +++ b/arch/cris/arch-v10/kernel/time.c @@ -140,7 +140,7 @@ stop_watchdog(void) /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ //static unsigned short myjiff; /* used by our debug routine print_timestamp */ @@ -176,7 +176,7 @@ timer_interrupt(int irq, void *dev_id) /* call the real timer interrupt handler */ - do_timer(1); + xtime_update(1); cris_do_profile(regs); /* Save profiling information */ return IRQ_HANDLED; diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 84fed3b4b079..4c9e3e1ba5d1 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -26,7 +26,9 @@ #define FLUSH_ALL (void*)0xffffffff /* Vector of locks used for various atomic operations */ -spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; +spinlock_t cris_atomic_locks[] = { + [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks) +}; /* CPU masks */ cpumask_t phys_cpu_present_map = CPU_MASK_NONE; diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c index a545211e999d..bb978ede8985 100644 --- a/arch/cris/arch-v32/kernel/time.c +++ b/arch/cris/arch-v32/kernel/time.c @@ -183,7 +183,7 @@ void handle_watchdog_bite(struct pt_regs *regs) /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick. + * as well as call the "xtime_update()" routine every clocktick. */ extern void cris_do_profile(struct pt_regs *regs); @@ -216,9 +216,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; /* Call the real timer interrupt handler */ - write_seqlock(&xtime_lock); - do_timer(1); - write_sequnlock(&xtime_lock); + xtime_update(1); return IRQ_HANDLED; } diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h index 08b3d1da3583..4bea27f50a7a 100644 --- a/arch/frv/include/asm/futex.h +++ b/arch/frv/include/asm/futex.h @@ -7,10 +7,11 @@ #include <asm/errno.h> #include <asm/uaccess.h> -extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr); +extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr); static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { return -ENOSYS; } diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c index 14f64b054c7e..d155ca9e5098 100644 --- a/arch/frv/kernel/futex.c +++ b/arch/frv/kernel/futex.c @@ -18,7 +18,7 @@ * the various futex operations; MMU fault checking is ignored under no-MMU * conditions */ -static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_oldval) +static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr, int *_oldval) { int oldval, ret; @@ -50,7 +50,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_o return ret; } -static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_oldval) +static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr, int *_oldval) { int oldval, ret; @@ -83,7 +83,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_o return ret; } -static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_oldval) +static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr, int *_oldval) { int oldval, ret; @@ -116,7 +116,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_ol return ret; } -static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_oldval) +static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr, int *_oldval) { int oldval, ret; @@ -149,7 +149,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_o return ret; } -static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_oldval) +static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_oldval) { int oldval, ret; @@ -186,7 +186,7 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_o /* * do the futex operations */ -int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -197,7 +197,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c index 0ddbbae83cb2..b457de496b70 100644 --- a/arch/frv/kernel/time.c +++ b/arch/frv/kernel/time.c @@ -50,21 +50,13 @@ static struct irqaction timer_irq = { /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ static irqreturn_t timer_interrupt(int irq, void *dummy) { profile_tick(CPU_PROFILING); - /* - * Here we are in the timer irq handler. We just have irqs locally - * disabled but we don't know if the timer_bh is running on the other - * CPU. We need to avoid to SMP race with it. NOTE: we don't need - * the irq version of write_lock because as just said we have irq - * locally disabled. -arca - */ - write_seqlock(&xtime_lock); - do_timer(1); + xtime_update(1); #ifdef CONFIG_HEARTBEAT static unsigned short n; @@ -72,8 +64,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) __set_LEDS(n); #endif /* CONFIG_HEARTBEAT */ - write_sequnlock(&xtime_lock); - update_process_times(user_mode(get_irq_regs())); return IRQ_HANDLED; diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c index 165005aff9df..32263a138aa6 100644 --- a/arch/h8300/kernel/time.c +++ b/arch/h8300/kernel/time.c @@ -35,9 +35,7 @@ void h8300_timer_tick(void) { if (current->pid) profile_tick(CPU_PROFILING); - write_seqlock(&xtime_lock); - do_timer(1); - write_sequnlock(&xtime_lock); + xtime_update(1); update_process_times(user_mode(get_irq_regs())); } diff --git a/arch/h8300/kernel/timer/timer8.c b/arch/h8300/kernel/timer/timer8.c index 3946c0fa8374..7a1533fad47d 100644 --- a/arch/h8300/kernel/timer/timer8.c +++ b/arch/h8300/kernel/timer/timer8.c @@ -61,7 +61,7 @@ /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ static irqreturn_t timer_interrupt(int irq, void *dev_id) diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h index c7f0f062239c..8428525ddb22 100644 --- a/arch/ia64/include/asm/futex.h +++ b/arch/ia64/include/asm/futex.h @@ -46,7 +46,7 @@ do { \ } while (0) static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -100,23 +100,26 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; { - register unsigned long r8 __asm ("r8"); + register unsigned long r8 __asm ("r8") = 0; + unsigned long prev; __asm__ __volatile__( " mf;; \n" " mov ar.ccv=%3;; \n" "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n" " .xdata4 \"__ex_table\", 1b-., 2f-. \n" "[2:]" - : "=r" (r8) + : "=r" (prev) : "r" (uaddr), "r" (newval), "rO" ((long) (unsigned) oldval) : "memory"); + *uval = prev; return r8; } } diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 215d5454c7d3..3027e7516d85 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -25,20 +25,8 @@ #error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." #endif -#include <linux/list.h> -#include <linux/spinlock.h> - #include <asm/intrinsics.h> -/* - * the semaphore definition - */ -struct rw_semaphore { - signed long count; - spinlock_t wait_lock; - struct list_head wait_list; -}; - #define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) #define RWSEM_ACTIVE_BIAS (1L) #define RWSEM_ACTIVE_MASK (0xffffffffL) @@ -46,26 +34,6 @@ struct rw_semaphore { #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ - LIST_HEAD_INIT((name).wait_list) } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -static inline void -init_rwsem (struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} - /* * lock for reading */ @@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem) #define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) #define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* _ASM_IA64_RWSEM_H */ diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h index 96fc62366aa4..ed28bcd5bb85 100644 --- a/arch/ia64/include/asm/xen/hypercall.h +++ b/arch/ia64/include/asm/xen/hypercall.h @@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2, static inline int xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg) { - return _hypercall2(int, sched_op_new, cmd, arg); + return _hypercall2(int, sched_op, cmd, arg); } static inline long diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 9702fa92489e..156ad803d5b7 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -190,19 +190,10 @@ timer_interrupt (int irq, void *dev_id) new_itm += local_cpu_data->itm_delta; - if (smp_processor_id() == time_keeper_id) { - /* - * Here we are in the timer irq handler. We have irqs locally - * disabled, but we don't know if the timer_bh is running on - * another CPU. We need to avoid to SMP race by acquiring the - * xtime_lock. - */ - write_seqlock(&xtime_lock); - do_timer(1); - local_cpu_data->itm_next = new_itm; - write_sequnlock(&xtime_lock); - } else - local_cpu_data->itm_next = new_itm; + if (smp_processor_id() == time_keeper_id) + xtime_update(1); + + local_cpu_data->itm_next = new_itm; if (time_after(new_itm, ia64_get_itc())) break; @@ -222,7 +213,7 @@ skip_process_time_accounting: * comfort, we increase the safety margin by * intentionally dropping the next tick(s). We do NOT * update itm.next because that would force us to call - * do_timer() which in turn would let our clock run + * xtime_update() which in turn would let our clock run * too fast (with the potentially devastating effect * of losing monotony of time). */ diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c index fd66b048c6fa..419c8620945a 100644 --- a/arch/ia64/xen/suspend.c +++ b/arch/ia64/xen/suspend.c @@ -37,19 +37,14 @@ xen_mm_unpin_all(void) /* nothing */ } -void xen_pre_device_suspend(void) -{ - /* nothing */ -} - void -xen_pre_suspend() +xen_arch_pre_suspend() { /* nothing */ } void -xen_post_suspend(int suspend_cancelled) +xen_arch_post_suspend(int suspend_cancelled) { if (suspend_cancelled) return; diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index c1c544513e8d..1f8244a78bee 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -139,14 +139,11 @@ consider_steal_time(unsigned long new_itm) run_posix_cpu_timers(p); delta_itm += local_cpu_data->itm_delta * (stolen + blocked); - if (cpu == time_keeper_id) { - write_seqlock(&xtime_lock); - do_timer(stolen + blocked); - local_cpu_data->itm_next = delta_itm + new_itm; - write_sequnlock(&xtime_lock); - } else { - local_cpu_data->itm_next = delta_itm + new_itm; - } + if (cpu == time_keeper_id) + xtime_update(stolen + blocked); + + local_cpu_data->itm_next = delta_itm + new_itm; + per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen; per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked; } diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c index bda86820bffd..84dd04048db9 100644 --- a/arch/m32r/kernel/time.c +++ b/arch/m32r/kernel/time.c @@ -107,15 +107,14 @@ u32 arch_gettimeoffset(void) /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ static irqreturn_t timer_interrupt(int irq, void *dev_id) { #ifndef CONFIG_SMP profile_tick(CPU_PROFILING); #endif - /* XXX FIXME. Uh, the xtime_lock should be held here, no? */ - do_timer(1); + xtime_update(1); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 9fe6fefb5e14..1edd95095cb4 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -45,8 +45,8 @@ extern int bvme6000_set_clock_mmss (unsigned long); extern void bvme6000_reset (void); void bvme6000_set_vectors (void); -/* Save tick handler routine pointer, will point to do_timer() in - * kernel/sched.c, called via bvme6000_process_int() */ +/* Save tick handler routine pointer, will point to xtime_update() in + * kernel/timer/timekeeping.c, called via bvme6000_process_int() */ static irq_handler_t tick_handler; diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 06438dac08ff..18b34ee5db3b 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -37,11 +37,11 @@ static inline int set_rtc_mmss(unsigned long nowtime) /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ static irqreturn_t timer_interrupt(int irq, void *dummy) { - do_timer(1); + xtime_update(1); update_process_times(user_mode(get_irq_regs())); profile_tick(CPU_PROFILING); diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 100baaa692a1..6cb9c3a9b6c9 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -46,8 +46,8 @@ extern void mvme147_reset (void); static int bcd2int (unsigned char b); -/* Save tick handler routine pointer, will point to do_timer() in - * kernel/sched.c, called via mvme147_process_int() */ +/* Save tick handler routine pointer, will point to xtime_update() in + * kernel/time/timekeeping.c, called via mvme147_process_int() */ irq_handler_t tick_handler; diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index 11edf61cc2c4..0b28e2621653 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -51,8 +51,8 @@ extern void mvme16x_reset (void); int bcd2int (unsigned char b); -/* Save tick handler routine pointer, will point to do_timer() in - * kernel/sched.c, called via mvme16x_process_int() */ +/* Save tick handler routine pointer, will point to xtime_update() in + * kernel/time/timekeeping.c, called via mvme16x_process_int() */ static irq_handler_t tick_handler; diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c index 2d9e21bd313a..6464ad3ae3e6 100644 --- a/arch/m68k/sun3/sun3ints.c +++ b/arch/m68k/sun3/sun3ints.c @@ -66,7 +66,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id) #ifdef CONFIG_SUN3 intersil_clear(); #endif - do_timer(1); + xtime_update(1); update_process_times(user_mode(get_irq_regs())); if (!(kstat_cpu(0).irqs[irq] % 20)) sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]); diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index d6ac2a43453c..6623909f70e6 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -36,7 +36,7 @@ static inline int set_rtc_mmss(unsigned long nowtime) #ifndef CONFIG_GENERIC_CLOCKEVENTS /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ irqreturn_t arch_timer_interrupt(int irq, void *dummy) { @@ -44,11 +44,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy) if (current->pid) profile_tick(CPU_PROFILING); - write_seqlock(&xtime_lock); - - do_timer(1); - - write_sequnlock(&xtime_lock); + xtime_update(1); update_process_times(user_mode(get_irq_regs())); diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h index ad3fd61b2fe7..b0526d2716fa 100644 --- a/arch/microblaze/include/asm/futex.h +++ b/arch/microblaze/include/asm/futex.h @@ -29,7 +29,7 @@ }) static inline int -futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -39,7 +39,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -94,31 +94,34 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - int prev, cmp; + int ret = 0, cmp; + u32 prev; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - __asm__ __volatile__ ("1: lwx %0, %2, r0; \ - cmp %1, %0, %3; \ - beqi %1, 3f; \ - 2: swx %4, %2, r0; \ - addic %1, r0, 0; \ - bnei %1, 1b; \ + __asm__ __volatile__ ("1: lwx %1, %3, r0; \ + cmp %2, %1, %4; \ + beqi %2, 3f; \ + 2: swx %5, %3, r0; \ + addic %2, r0, 0; \ + bnei %2, 1b; \ 3: \ .section .fixup,\"ax\"; \ 4: brid 3b; \ - addik %0, r0, %5; \ + addik %0, r0, %6; \ .previous; \ .section __ex_table,\"a\"; \ .word 1b,4b,2b,4b; \ .previous;" \ - : "=&r" (prev), "=&r"(cmp) \ + : "+r" (ret), "=&r" (prev), "=&r"(cmp) \ : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)); - return prev; + *uval = prev; + return ret; } #endif /* __KERNEL__ */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f5ecc0566bc2..d88983516e26 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,6 +4,7 @@ config MIPS select HAVE_GENERIC_DMA_COHERENT select HAVE_IDE select HAVE_OPROFILE + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_ARCH_KGDB @@ -208,6 +209,7 @@ config MACH_JZ4740 select ARCH_REQUIRE_GPIOLIB select SYS_HAS_EARLY_PRINTK select HAVE_PWM + select HAVE_CLK config LASAT bool "LASAT Networks platforms" @@ -333,6 +335,8 @@ config PNX8550_STB810 config PMC_MSP bool "PMC-Sierra MSP chipsets" depends on EXPERIMENTAL + select CEVT_R4K + select CSRC_R4K select DMA_NONCOHERENT select SWAP_IO_SPACE select NO_EXCEPT_FILL diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c index 6398fa95905c..40b84b991191 100644 --- a/arch/mips/alchemy/mtx-1/board_setup.c +++ b/arch/mips/alchemy/mtx-1/board_setup.c @@ -54,8 +54,8 @@ int mtx1_pci_idsel(unsigned int devsel, int assert); static void mtx1_reset(char *c) { - /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ - au_writel(0x00000000, 0xAE00001C); + /* Jump to the reset vector */ + __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000)); } static void mtx1_power_off(void) diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c index e30e42add697..956f946218c5 100644 --- a/arch/mips/alchemy/mtx-1/platform.c +++ b/arch/mips/alchemy/mtx-1/platform.c @@ -28,6 +28,8 @@ #include <linux/mtd/physmap.h> #include <mtd/mtd-abi.h> +#include <asm/mach-au1x00/au1xxx_eth.h> + static struct gpio_keys_button mtx1_gpio_button[] = { { .gpio = 207, @@ -140,10 +142,17 @@ static struct __initdata platform_device * mtx1_devs[] = { &mtx1_mtd, }; +static struct au1000_eth_platform_data mtx1_au1000_eth0_pdata = { + .phy_search_highest_addr = 1, + .phy1_search_mac0 = 1, +}; + static int __init mtx1_register_devices(void) { int rc; + au1xxx_override_eth_cfg(0, &mtx1_au1000_eth0_pdata); + rc = gpio_request(mtx1_gpio_button[0].gpio, mtx1_gpio_button[0].desc); if (rc < 0) { diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c index b43c918925d3..80c521e5290d 100644 --- a/arch/mips/alchemy/xxs1500/board_setup.c +++ b/arch/mips/alchemy/xxs1500/board_setup.c @@ -36,8 +36,8 @@ static void xxs1500_reset(char *c) { - /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ - au_writel(0x00000000, 0xAE00001C); + /* Jump to the reset vector */ + __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000)); } static void xxs1500_power_off(void) diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index b9cce90346cf..6ebf1734b411 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -75,7 +75,7 @@ } static inline int -futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -85,7 +85,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -132,11 +132,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - int retval; + int ret = 0; + u32 val; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; if (cpu_has_llsc && R10000_LLSC_WAR) { @@ -145,25 +147,25 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) " .set push \n" " .set noat \n" " .set mips3 \n" - "1: ll %0, %2 \n" - " bne %0, %z3, 3f \n" + "1: ll %1, %3 \n" + " bne %1, %z4, 3f \n" " .set mips0 \n" - " move $1, %z4 \n" + " move $1, %z5 \n" " .set mips3 \n" - "2: sc $1, %1 \n" + "2: sc $1, %2 \n" " beqzl $1, 1b \n" __WEAK_LLSC_MB "3: \n" " .set pop \n" " .section .fixup,\"ax\" \n" - "4: li %0, %5 \n" + "4: li %0, %6 \n" " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" " "__UA_ADDR "\t1b, 4b \n" " "__UA_ADDR "\t2b, 4b \n" " .previous \n" - : "=&r" (retval), "=R" (*uaddr) + : "+r" (ret), "=&r" (val), "=R" (*uaddr) : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { @@ -172,31 +174,32 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) " .set push \n" " .set noat \n" " .set mips3 \n" - "1: ll %0, %2 \n" - " bne %0, %z3, 3f \n" + "1: ll %1, %3 \n" + " bne %1, %z4, 3f \n" " .set mips0 \n" - " move $1, %z4 \n" + " move $1, %z5 \n" " .set mips3 \n" - "2: sc $1, %1 \n" + "2: sc $1, %2 \n" " beqz $1, 1b \n" __WEAK_LLSC_MB "3: \n" " .set pop \n" " .section .fixup,\"ax\" \n" - "4: li %0, %5 \n" + "4: li %0, %6 \n" " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" " "__UA_ADDR "\t1b, 4b \n" " "__UA_ADDR "\t2b, 4b \n" " .previous \n" - : "=&r" (retval), "=R" (*uaddr) + : "+r" (ret), "=&r" (val), "=R" (*uaddr) : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); } else return -ENOSYS; - return retval; + *uval = val; + return ret; } #endif diff --git a/arch/mips/include/asm/perf_event.h b/arch/mips/include/asm/perf_event.h index e00007cf8162..d0c77496c728 100644 --- a/arch/mips/include/asm/perf_event.h +++ b/arch/mips/include/asm/perf_event.h @@ -11,15 +11,5 @@ #ifndef __MIPS_PERF_EVENT_H__ #define __MIPS_PERF_EVENT_H__ - -/* - * MIPS performance counters do not raise NMI upon overflow, a regular - * interrupt will be signaled. Hence we can do the pending perf event - * work at the tail of the irq handler. - */ -static inline void -set_perf_event_pending(void) -{ -} - +/* Leave it empty here. The file is required by linux/perf_event.h */ #endif /* __MIPS_PERF_EVENT_H__ */ diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 5a84a1f11231..94ca2b018af7 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -17,29 +17,13 @@ #include <asm/cacheflush.h> #include <asm/uasm.h> -/* - * If the Instruction Pointer is in module space (0xc0000000), return true; - * otherwise, it is in kernel space (0x80000000), return false. - * - * FIXME: This will not work when the kernel space and module space are the - * same. If they are the same, we need to modify scripts/recordmcount.pl, - * ftrace_make_nop/call() and the other related parts to ensure the - * enabling/disabling of the calling site to _mcount is right for both kernel - * and module. - */ - -static inline int in_module(unsigned long ip) -{ - return ip & 0x40000000; -} +#include <asm-generic/sections.h> #ifdef CONFIG_DYNAMIC_FTRACE #define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ #define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ -#define INSN_B_1F_4 0x10000004 /* b 1f; offset = 4 */ -#define INSN_B_1F_5 0x10000005 /* b 1f; offset = 5 */ #define INSN_NOP 0x00000000 /* nop */ #define INSN_JAL(addr) \ ((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK))) @@ -69,6 +53,20 @@ static inline void ftrace_dyn_arch_init_insns(void) #endif } +/* + * Check if the address is in kernel space + * + * Clone core_kernel_text() from kernel/extable.c, but doesn't call + * init_kernel_text() for Ftrace doesn't trace functions in init sections. + */ +static inline int in_kernel_space(unsigned long ip) +{ + if (ip >= (unsigned long)_stext && + ip <= (unsigned long)_etext) + return 1; + return 0; +} + static int ftrace_modify_code(unsigned long ip, unsigned int new_code) { int faulted; @@ -84,6 +82,42 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code) return 0; } +/* + * The details about the calling site of mcount on MIPS + * + * 1. For kernel: + * + * move at, ra + * jal _mcount --> nop + * + * 2. For modules: + * + * 2.1 For KBUILD_MCOUNT_RA_ADDRESS and CONFIG_32BIT + * + * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005) + * addiu v1, v1, low_16bit_of_mcount + * move at, ra + * move $12, ra_address + * jalr v1 + * sub sp, sp, 8 + * 1: offset = 5 instructions + * 2.2 For the Other situations + * + * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004) + * addiu v1, v1, low_16bit_of_mcount + * move at, ra + * jalr v1 + * nop | move $12, ra_address | sub sp, sp, 8 + * 1: offset = 4 instructions + */ + +#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) +#define MCOUNT_OFFSET_INSNS 5 +#else +#define MCOUNT_OFFSET_INSNS 4 +#endif +#define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS) + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { @@ -91,39 +125,11 @@ int ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; /* - * We have compiled module with -mlong-calls, but compiled the kernel - * without it, we need to cope with them respectively. + * If ip is in kernel space, no long call, otherwise, long call is + * needed. */ - if (in_module(ip)) { -#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) - /* - * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005) - * addiu v1, v1, low_16bit_of_mcount - * move at, ra - * move $12, ra_address - * jalr v1 - * sub sp, sp, 8 - * 1: offset = 5 instructions - */ - new = INSN_B_1F_5; -#else - /* - * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004) - * addiu v1, v1, low_16bit_of_mcount - * move at, ra - * jalr v1 - * nop | move $12, ra_address | sub sp, sp, 8 - * 1: offset = 4 instructions - */ - new = INSN_B_1F_4; -#endif - } else { - /* - * move at, ra - * jal _mcount --> nop - */ - new = INSN_NOP; - } + new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F; + return ftrace_modify_code(ip, new); } @@ -132,8 +138,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned int new; unsigned long ip = rec->ip; - /* ip, module: 0xc0000000, kernel: 0x80000000 */ - new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller; + new = in_kernel_space(ip) ? insn_jal_ftrace_caller : + insn_lui_v1_hi16_mcount; return ftrace_modify_code(ip, new); } @@ -190,29 +196,25 @@ int ftrace_disable_ftrace_graph_caller(void) #define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */ #define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */ -unsigned long ftrace_get_parent_addr(unsigned long self_addr, - unsigned long parent, - unsigned long parent_addr, - unsigned long fp) +unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long + old_parent_ra, unsigned long parent_ra_addr, unsigned long fp) { - unsigned long sp, ip, ra; + unsigned long sp, ip, tmp; unsigned int code; int faulted; /* - * For module, move the ip from calling site of mcount to the - * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for - * kernel, move to the instruction "move ra, at"(offset is 12) + * For module, move the ip from the return address after the + * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for + * kernel, move after the instruction "move ra, at"(offset is 16) */ - ip = self_addr - (in_module(self_addr) ? 20 : 12); + ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24); /* * search the text until finding the non-store instruction or "s{d,w} * ra, offset(sp)" instruction */ do { - ip -= 4; - /* get the code at "ip": code = *(unsigned int *)ip; */ safe_load_code(code, ip, faulted); @@ -224,18 +226,20 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr, * store the ra on the stack */ if ((code & S_R_SP) != S_R_SP) - return parent_addr; + return parent_ra_addr; - } while (((code & S_RA_SP) != S_RA_SP)); + /* Move to the next instruction */ + ip -= 4; + } while ((code & S_RA_SP) != S_RA_SP); sp = fp + (code & OFFSET_MASK); - /* ra = *(unsigned long *)sp; */ - safe_load_stack(ra, sp, faulted); + /* tmp = *(unsigned long *)sp; */ + safe_load_stack(tmp, sp, faulted); if (unlikely(faulted)) return 0; - if (ra == parent) + if (tmp == old_parent_ra) return sp; return 0; } @@ -246,21 +250,21 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr, * Hook the return address and push it in the stack of return addrs * in current thread info. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, +void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, unsigned long fp) { - unsigned long old; + unsigned long old_parent_ra; struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; - int faulted; + int faulted, insns; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; /* - * "parent" is the stack address saved the return address of the caller - * of _mcount. + * "parent_ra_addr" is the stack address saved the return address of + * the caller of _mcount. * * if the gcc < 4.5, a leaf function does not save the return address * in the stack address, so, we "emulate" one in _mcount's stack space, @@ -275,37 +279,44 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, * do it in ftrace_graph_caller of mcount.S. */ - /* old = *parent; */ - safe_load_stack(old, parent, faulted); + /* old_parent_ra = *parent_ra_addr; */ + safe_load_stack(old_parent_ra, parent_ra_addr, faulted); if (unlikely(faulted)) goto out; #ifndef KBUILD_MCOUNT_RA_ADDRESS - parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old, - (unsigned long)parent, fp); + parent_ra_addr = (unsigned long *)ftrace_get_parent_ra_addr(self_ra, + old_parent_ra, (unsigned long)parent_ra_addr, fp); /* * If fails when getting the stack address of the non-leaf function's * ra, stop function graph tracer and return */ - if (parent == 0) + if (parent_ra_addr == 0) goto out; #endif - /* *parent = return_hooker; */ - safe_store_stack(return_hooker, parent, faulted); + /* *parent_ra_addr = return_hooker; */ + safe_store_stack(return_hooker, parent_ra_addr, faulted); if (unlikely(faulted)) goto out; - if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) == - -EBUSY) { - *parent = old; + if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp) + == -EBUSY) { + *parent_ra_addr = old_parent_ra; return; } - trace.func = self_addr; + /* + * Get the recorded ip of the current mcount calling site in the + * __mcount_loc section, which will be used to filter the function + * entries configured through the tracing/set_graph_function interface. + */ + + insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1; + trace.func = self_ra - (MCOUNT_INSN_SIZE * insns); /* Only trace if the calling function expects to */ if (!ftrace_graph_entry(&trace)) { current->curr_ret_stack--; - *parent = old; + *parent_ra_addr = old_parent_ra; } return; out: diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c index 2b7f3f703b83..a8244854d3dc 100644 --- a/arch/mips/kernel/perf_event.c +++ b/arch/mips/kernel/perf_event.c @@ -161,41 +161,6 @@ mipspmu_event_set_period(struct perf_event *event, return ret; } -static int mipspmu_enable(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - /* To look for a free counter for this event. */ - idx = mipspmu->alloc_counter(cpuc, hwc); - if (idx < 0) { - err = idx; - goto out; - } - - /* - * If there is an event in the counter we are going to use then - * make sure it is disabled. - */ - event->hw.idx = idx; - mipspmu->disable_event(idx); - cpuc->events[idx] = event; - - /* Set the period for the event. */ - mipspmu_event_set_period(event, hwc, idx); - - /* Enable the event. */ - mipspmu->enable_event(hwc, idx); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - return err; -} - static void mipspmu_event_update(struct perf_event *event, struct hw_perf_event *hwc, int idx) @@ -204,7 +169,7 @@ static void mipspmu_event_update(struct perf_event *event, unsigned long flags; int shift = 64 - TOTAL_BITS; s64 prev_raw_count, new_raw_count; - s64 delta; + u64 delta; again: prev_raw_count = local64_read(&hwc->prev_count); @@ -231,32 +196,90 @@ again: return; } -static void mipspmu_disable(struct perf_event *event) +static void mipspmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!mipspmu) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* Set the period for the event. */ + mipspmu_event_set_period(event, hwc, hwc->idx); + + /* Enable the event. */ + mipspmu->enable_event(hwc, hwc->idx); +} + +static void mipspmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!mipspmu) + return; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* We are working on a local event. */ + mipspmu->disable_event(hwc->idx); + barrier(); + mipspmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static int mipspmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; + int idx; + int err = 0; + perf_pmu_disable(event->pmu); - WARN_ON(idx < 0 || idx >= mipspmu->num_counters); + /* To look for a free counter for this event. */ + idx = mipspmu->alloc_counter(cpuc, hwc); + if (idx < 0) { + err = idx; + goto out; + } - /* We are working on a local event. */ + /* + * If there is an event in the counter we are going to use then + * make sure it is disabled. + */ + event->hw.idx = idx; mipspmu->disable_event(idx); + cpuc->events[idx] = event; - barrier(); - - mipspmu_event_update(event, hwc, idx); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + mipspmu_start(event, PERF_EF_RELOAD); + /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; } -static void mipspmu_unthrottle(struct perf_event *event) +static void mipspmu_del(struct perf_event *event, int flags) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; - mipspmu->enable_event(hwc, hwc->idx); + WARN_ON(idx < 0 || idx >= mipspmu->num_counters); + + mipspmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); } static void mipspmu_read(struct perf_event *event) @@ -270,12 +293,17 @@ static void mipspmu_read(struct perf_event *event) mipspmu_event_update(event, hwc, hwc->idx); } -static struct pmu pmu = { - .enable = mipspmu_enable, - .disable = mipspmu_disable, - .unthrottle = mipspmu_unthrottle, - .read = mipspmu_read, -}; +static void mipspmu_enable(struct pmu *pmu) +{ + if (mipspmu) + mipspmu->start(); +} + +static void mipspmu_disable(struct pmu *pmu) +{ + if (mipspmu) + mipspmu->stop(); +} static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmu_reserve_mutex); @@ -318,6 +346,82 @@ static void mipspmu_free_irq(void) perf_irq = save_perf_irq; } +/* + * mipsxx/rm9000/loongson2 have different performance counters, they have + * specific low-level init routines. + */ +static void reset_counters(void *arg); +static int __hw_perf_event_init(struct perf_event *event); + +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (atomic_dec_and_mutex_lock(&active_events, + &pmu_reserve_mutex)) { + /* + * We must not call the destroy function with interrupts + * disabled. + */ + on_each_cpu(reset_counters, + (void *)(long)mipspmu->num_counters, 1); + mipspmu_free_irq(); + mutex_unlock(&pmu_reserve_mutex); + } +} + +static int mipspmu_event_init(struct perf_event *event) +{ + int err = 0; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + if (!mipspmu || event->cpu >= nr_cpumask_bits || + (event->cpu >= 0 && !cpu_online(event->cpu))) + return -ENODEV; + + if (!atomic_inc_not_zero(&active_events)) { + if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) { + atomic_dec(&active_events); + return -ENOSPC; + } + + mutex_lock(&pmu_reserve_mutex); + if (atomic_read(&active_events) == 0) + err = mipspmu_get_irq(); + + if (!err) + atomic_inc(&active_events); + mutex_unlock(&pmu_reserve_mutex); + } + + if (err) + return err; + + err = __hw_perf_event_init(event); + if (err) + hw_perf_event_destroy(event); + + return err; +} + +static struct pmu pmu = { + .pmu_enable = mipspmu_enable, + .pmu_disable = mipspmu_disable, + .event_init = mipspmu_event_init, + .add = mipspmu_add, + .del = mipspmu_del, + .start = mipspmu_start, + .stop = mipspmu_stop, + .read = mipspmu_read, +}; + static inline unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev) { @@ -382,8 +486,9 @@ static int validate_event(struct cpu_hw_events *cpuc, { struct hw_perf_event fake_hwc = event->hw; - if (event->pmu && event->pmu != &pmu) - return 0; + /* Allow mixed event group. So return 1 to pass validation. */ + if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF) + return 1; return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0; } @@ -409,73 +514,6 @@ static int validate_group(struct perf_event *event) return 0; } -/* - * mipsxx/rm9000/loongson2 have different performance counters, they have - * specific low-level init routines. - */ -static void reset_counters(void *arg); -static int __hw_perf_event_init(struct perf_event *event); - -static void hw_perf_event_destroy(struct perf_event *event) -{ - if (atomic_dec_and_mutex_lock(&active_events, - &pmu_reserve_mutex)) { - /* - * We must not call the destroy function with interrupts - * disabled. - */ - on_each_cpu(reset_counters, - (void *)(long)mipspmu->num_counters, 1); - mipspmu_free_irq(); - mutex_unlock(&pmu_reserve_mutex); - } -} - -const struct pmu *hw_perf_event_init(struct perf_event *event) -{ - int err = 0; - - if (!mipspmu || event->cpu >= nr_cpumask_bits || - (event->cpu >= 0 && !cpu_online(event->cpu))) - return ERR_PTR(-ENODEV); - - if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) { - atomic_dec(&active_events); - return ERR_PTR(-ENOSPC); - } - - mutex_lock(&pmu_reserve_mutex); - if (atomic_read(&active_events) == 0) - err = mipspmu_get_irq(); - - if (!err) - atomic_inc(&active_events); - mutex_unlock(&pmu_reserve_mutex); - } - - if (err) - return ERR_PTR(err); - - err = __hw_perf_event_init(event); - if (err) - hw_perf_event_destroy(event); - - return err ? ERR_PTR(err) : &pmu; -} - -void hw_perf_enable(void) -{ - if (mipspmu) - mipspmu->start(); -} - -void hw_perf_disable(void) -{ - if (mipspmu) - mipspmu->stop(); -} - /* This is needed by specific irq handlers in perf_event_*.c */ static void handle_associated_event(struct cpu_hw_events *cpuc, @@ -496,21 +534,13 @@ handle_associated_event(struct cpu_hw_events *cpuc, #include "perf_event_mipsxx.c" /* Callchain handling code. */ -static inline void -callchain_store(struct perf_callchain_entry *entry, - u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} /* * Leave userspace callchain empty for now. When we find a way to trace * the user stack callchains, we add here. */ -static void -perf_callchain_user(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -523,23 +553,21 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry, while (!kstack_end(sp)) { addr = *sp++; if (__kernel_text_address(addr)) { - callchain_store(entry, addr); + perf_callchain_store(entry, addr); if (entry->nr >= PERF_MAX_STACK_DEPTH) break; } } } -static void -perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp = regs->regs[29]; #ifdef CONFIG_KALLSYMS unsigned long ra = regs->regs[31]; unsigned long pc = regs->cp0_epc; - callchain_store(entry, PERF_CONTEXT_KERNEL); if (raw_show_trace || !__kernel_text_address(pc)) { unsigned long stack_page = (unsigned long)task_stack_page(current); @@ -549,53 +577,12 @@ perf_callchain_kernel(struct pt_regs *regs, return; } do { - callchain_store(entry, pc); + perf_callchain_store(entry, pc); if (entry->nr >= PERF_MAX_STACK_DEPTH) break; pc = unwind_stack(current, &sp, pc, &ra); } while (pc); #else - callchain_store(entry, PERF_CONTEXT_KERNEL); save_raw_perf_callchain(entry, sp); #endif } - -static void -perf_do_callchain(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || !current->pid) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - if (regs) - perf_callchain_user(regs, entry); -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); - -struct perf_callchain_entry * -perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - perf_do_callchain(regs, entry); - return entry; -} diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 183e0d226669..d9a7db78ed62 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -696,7 +696,7 @@ static int mipsxx_pmu_handle_shared_irq(void) * interrupt, not NMI. */ if (handled == IRQ_HANDLED) - perf_event_do_pending(); + irq_work_run(); #ifdef CONFIG_MIPS_MT_SMP read_unlock(&pmuint_rwlock); @@ -1045,6 +1045,8 @@ init_hw_perf_events(void) "CPU, irq %d%s\n", mipspmu->name, counters, irq, irq < 0 ? " (share with timer interrupt)" : ""); + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + return 0; } early_initcall(init_hw_perf_events); diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 5922342bca39..dbbe0ce48d89 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -84,7 +84,7 @@ static int protected_save_fp_context(struct sigcontext __user *sc) static int protected_restore_fp_context(struct sigcontext __user *sc) { - int err, tmp; + int err, tmp __maybe_unused; while (1) { lock_fpu_owner(); own_fpu_inatomic(0); diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index a0ed0e052b2e..aae986613795 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -115,7 +115,7 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc) static int protected_restore_fp_context32(struct sigcontext32 __user *sc) { - int err, tmp; + int err, tmp __maybe_unused; while (1) { lock_fpu_owner(); own_fpu_inatomic(0); diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 383aeb95cb49..32a256101082 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -193,6 +193,22 @@ void __devinit smp_prepare_boot_cpu(void) */ static struct task_struct *cpu_idle_thread[NR_CPUS]; +struct create_idle { + struct work_struct work; + struct task_struct *idle; + struct completion done; + int cpu; +}; + +static void __cpuinit do_fork_idle(struct work_struct *work) +{ + struct create_idle *c_idle = + container_of(work, struct create_idle, work); + + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + int __cpuinit __cpu_up(unsigned int cpu) { struct task_struct *idle; @@ -203,8 +219,19 @@ int __cpuinit __cpu_up(unsigned int cpu) * Linux can schedule processes on this slave. */ if (!cpu_idle_thread[cpu]) { - idle = fork_idle(cpu); - cpu_idle_thread[cpu] = idle; + /* + * Schedule work item to avoid forking user task + * Ported from arch/x86/kernel/smpboot.c + */ + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; + + INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + idle = cpu_idle_thread[cpu] = c_idle.idle; if (IS_ERR(idle)) panic(KERN_ERR "Fork failed for CPU %d", cpu); diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 1dc6edff45e0..58beabf50b3c 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -383,12 +383,11 @@ save_static_function(sys_sysmips); static int __used noinline _sys_sysmips(nabi_no_regargs struct pt_regs regs) { - long cmd, arg1, arg2, arg3; + long cmd, arg1, arg2; cmd = regs.regs[4]; arg1 = regs.regs[5]; arg2 = regs.regs[6]; - arg3 = regs.regs[7]; switch (cmd) { case MIPS_ATOMIC_SET: @@ -405,7 +404,7 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs) if (arg1 & 2) set_thread_flag(TIF_LOGADE); else - clear_thread_flag(TIF_FIXADE); + clear_thread_flag(TIF_LOGADE); return 0; diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 6a1fdfef8fde..ab52b7cf3b6b 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -148,9 +148,9 @@ struct { spinlock_t tc_list_lock; struct list_head tc_list; /* Thread contexts */ } vpecontrol = { - .vpe_list_lock = SPIN_LOCK_UNLOCKED, + .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock), .vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list), - .tc_list_lock = SPIN_LOCK_UNLOCKED, + .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock), .tc_list = LIST_HEAD_INIT(vpecontrol.tc_list) }; diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig index 6e1b77fec7ea..aca93eed8779 100644 --- a/arch/mips/loongson/Kconfig +++ b/arch/mips/loongson/Kconfig @@ -1,6 +1,7 @@ +if MACH_LOONGSON + choice prompt "Machine Type" - depends on MACH_LOONGSON config LEMOTE_FULOONG2E bool "Lemote Fuloong(2e) mini-PC" @@ -87,3 +88,5 @@ config LOONGSON_UART_BASE config LOONGSON_MC146818 bool default n + +endif # MACH_LOONGSON diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c index 1a06defc4f7f..353e1d2e41a5 100644 --- a/arch/mips/loongson/common/cmdline.c +++ b/arch/mips/loongson/common/cmdline.c @@ -44,10 +44,5 @@ void __init prom_init_cmdline(void) strcat(arcs_cmdline, " "); } - if ((strstr(arcs_cmdline, "console=")) == NULL) - strcat(arcs_cmdline, " console=ttyS0,115200"); - if ((strstr(arcs_cmdline, "root=")) == NULL) - strcat(arcs_cmdline, " root=/dev/hda1"); - prom_init_machtype(); } diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c index 81fbe6b73f91..2efd5d9dee27 100644 --- a/arch/mips/loongson/common/machtype.c +++ b/arch/mips/loongson/common/machtype.c @@ -41,7 +41,7 @@ void __weak __init mach_prom_init_machtype(void) void __init prom_init_machtype(void) { - char *p, str[MACHTYPE_LEN]; + char *p, str[MACHTYPE_LEN + 1]; int machtype = MACH_LEMOTE_FL2E; mips_machtype = LOONGSON_MACHTYPE; @@ -53,6 +53,7 @@ void __init prom_init_machtype(void) } p += strlen("machtype="); strncpy(str, p, MACHTYPE_LEN); + str[MACHTYPE_LEN] = '\0'; p = strstr(str, " "); if (p) *p = '\0'; diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h index 2701d9500959..2a7d43f4f161 100644 --- a/arch/mips/math-emu/ieee754int.h +++ b/arch/mips/math-emu/ieee754int.h @@ -70,7 +70,7 @@ #define COMPXSP \ - unsigned xm; int xe; int xs; int xc + unsigned xm; int xe; int xs __maybe_unused; int xc #define COMPYSP \ unsigned ym; int ye; int ys; int yc @@ -104,7 +104,7 @@ #define COMPXDP \ -u64 xm; int xe; int xs; int xc +u64 xm; int xe; int xs __maybe_unused; int xc #define COMPYDP \ u64 ym; int ye; int ys; int yc diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 2efcbd24c82f..279599e9a779 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -324,7 +324,7 @@ int page_is_ram(unsigned long pagenr) void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; - unsigned long lastpfn; + unsigned long lastpfn __maybe_unused; pagetable_init(); diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 083d3412d0bc..04f9e17db9d0 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -109,6 +109,8 @@ static bool scratchpad_available(void) static int scratchpad_offset(int i) { BUG(); + /* Really unreachable, but evidently some GCC want this. */ + return 0; } #endif /* diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c index b7c03d80c88c..68798f869c0f 100644 --- a/arch/mips/pci/ops-pmcmsp.c +++ b/arch/mips/pci/ops-pmcmsp.c @@ -308,7 +308,7 @@ static struct resource pci_mem_resource = { * RETURNS: PCIBIOS_SUCCESSFUL - success * ****************************************************************************/ -static int bpci_interrupt(int irq, void *dev_id) +static irqreturn_t bpci_interrupt(int irq, void *dev_id) { struct msp_pci_regs *preg = (void *)PCI_BASE_REG; unsigned int stat = preg->if_status; @@ -326,7 +326,7 @@ static int bpci_interrupt(int irq, void *dev_id) /* write to clear all asserted interrupts */ preg->if_status = stat; - return PCIBIOS_SUCCESSFUL; + return IRQ_HANDLED; } /***************************************************************************** diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig index c139988bb85d..8d798497c614 100644 --- a/arch/mips/pmc-sierra/Kconfig +++ b/arch/mips/pmc-sierra/Kconfig @@ -4,15 +4,11 @@ choice config PMC_MSP4200_EVAL bool "PMC-Sierra MSP4200 Eval Board" - select CEVT_R4K - select CSRC_R4K select IRQ_MSP_SLP select HW_HAS_PCI config PMC_MSP4200_GW bool "PMC-Sierra MSP4200 VoIP Gateway" - select CEVT_R4K - select CSRC_R4K select IRQ_MSP_SLP select HW_HAS_PCI diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c index cca64e15f57f..01df84ce31e2 100644 --- a/arch/mips/pmc-sierra/msp71xx/msp_time.c +++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c @@ -81,7 +81,7 @@ void __init plat_time_init(void) mips_hpt_frequency = cpu_rate/2; } -unsigned int __init get_c0_compare_int(void) +unsigned int __cpuinit get_c0_compare_int(void) { return MSP_INT_VPE0_TIMER; } diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index 92d2f9298e38..9d773a639513 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -139,7 +139,7 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m, * Atomically reads the value of @v. Note that the guaranteed * useful range of an atomic_t is only 24 bits. */ -#define atomic_read(v) ((v)->counter) +#define atomic_read(v) (ACCESS_ONCE((v)->counter)) /** * atomic_set - set atomic variable diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index 679dee0bbd08..3d6e60dad9d9 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -160,9 +160,10 @@ struct __large_struct { unsigned long buf[100]; }; #define __get_user_check(x, ptr, size) \ ({ \ + const __typeof__(ptr) __guc_ptr = (ptr); \ int _e; \ - if (likely(__access_ok((unsigned long) (ptr), (size)))) \ - _e = __get_user_nocheck((x), (ptr), (size)); \ + if (likely(__access_ok((unsigned long) __guc_ptr, (size)))) \ + _e = __get_user_nocheck((x), __guc_ptr, (size)); \ else { \ _e = -EFAULT; \ (x) = (__typeof__(x))0; \ diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c index 75da468090b9..5b955000626d 100644 --- a/arch/mn10300/kernel/time.c +++ b/arch/mn10300/kernel/time.c @@ -104,8 +104,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) unsigned tsc, elapse; irqreturn_t ret; - write_seqlock(&xtime_lock); - while (tsc = get_cycles(), elapse = tsc - mn10300_last_tsc, /* time elapsed since last * tick */ @@ -114,11 +112,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) mn10300_last_tsc += MN10300_TSC_PER_HZ; /* advance the kernel's time tracking system */ - do_timer(1); + xtime_update(1); } - write_sequnlock(&xtime_lock); - ret = local_timer_interrupt(); #ifdef CONFIG_SMP send_IPI_allbutself(LOCAL_TIMER_IPI); diff --git a/arch/mn10300/mm/cache-inv-icache.c b/arch/mn10300/mm/cache-inv-icache.c index a8933a60b2d4..a6b63dde603d 100644 --- a/arch/mn10300/mm/cache-inv-icache.c +++ b/arch/mn10300/mm/cache-inv-icache.c @@ -69,7 +69,7 @@ static void flush_icache_page_range(unsigned long start, unsigned long end) /* invalidate the icache coverage on that region */ mn10300_local_icache_inv_range2(addr + off, size); - smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); + smp_cache_call(SMP_ICACHE_INV_RANGE, start, end); } /** @@ -101,7 +101,7 @@ void flush_icache_range(unsigned long start, unsigned long end) * directly */ start_page = (start >= 0x80000000UL) ? start : 0x80000000UL; mn10300_icache_inv_range(start_page, end); - smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); + smp_cache_call(SMP_ICACHE_INV_RANGE, start, end); if (start_page == start) goto done; end = start_page; diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c index 30394081d9b6..6ab9580b0b00 100644 --- a/arch/parisc/hpux/sys_hpux.c +++ b/arch/parisc/hpux/sys_hpux.c @@ -185,26 +185,21 @@ struct hpux_statfs { int16_t f_pad; }; -static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf) +static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p) { - struct kstatfs st; - int retval; - - retval = vfs_statfs(path, &st); - if (retval) - return retval; - - memset(buf, 0, sizeof(*buf)); - buf->f_type = st.f_type; - buf->f_bsize = st.f_bsize; - buf->f_blocks = st.f_blocks; - buf->f_bfree = st.f_bfree; - buf->f_bavail = st.f_bavail; - buf->f_files = st.f_files; - buf->f_ffree = st.f_ffree; - buf->f_fsid[0] = st.f_fsid.val[0]; - buf->f_fsid[1] = st.f_fsid.val[1]; - + struct hpux_statfs buf; + memset(&buf, 0, sizeof(buf)); + buf.f_type = st->f_type; + buf.f_bsize = st->f_bsize; + buf.f_blocks = st->f_blocks; + buf.f_bfree = st->f_bfree; + buf.f_bavail = st->f_bavail; + buf.f_files = st->f_files; + buf.f_ffree = st->f_ffree; + buf.f_fsid[0] = st->f_fsid.val[0]; + buf.f_fsid[1] = st->f_fsid.val[1]; + if (copy_to_user(p, &buf, sizeof(buf))) + return -EFAULT; return 0; } @@ -212,35 +207,19 @@ static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf) asmlinkage long hpux_statfs(const char __user *pathname, struct hpux_statfs __user *buf) { - struct path path; - int error; - - error = user_path(pathname, &path); - if (!error) { - struct hpux_statfs tmp; - error = do_statfs_hpux(&path, &tmp); - if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) - error = -EFAULT; - path_put(&path); - } + struct kstatfs st; + int error = user_statfs(pathname, &st); + if (!error) + error = do_statfs_hpux(&st, buf); return error; } asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf) { - struct file *file; - struct hpux_statfs tmp; - int error; - - error = -EBADF; - file = fget(fd); - if (!file) - goto out; - error = do_statfs_hpux(&file->f_path, &tmp); - if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) - error = -EFAULT; - fput(file); - out: + struct kstatfs st; + int error = fd_statfs(fd, &st); + if (!error) + error = do_statfs_hpux(&st, buf); return error; } diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index 0c705c3a55ef..67a33cc27ef2 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -8,7 +8,7 @@ #include <asm/errno.h> static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -18,7 +18,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -51,10 +51,10 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) /* Non-atomic version */ static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - int err = 0; - int uval; + u32 val; /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is * our gateway page, and causes no end of trouble... @@ -62,15 +62,15 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) if (segment_eq(KERNEL_DS, get_fs()) && !uaddr) return -EFAULT; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - err = get_user(uval, uaddr); - if (err) return -EFAULT; - if (uval == oldval) - err = put_user(newval, uaddr); - if (err) return -EFAULT; - return uval; + if (get_user(val, uaddr)) + return -EFAULT; + if (val == oldval && put_user(newval, uaddr)) + return -EFAULT; + *uval = val; + return 0; } #endif /*__KERNEL__*/ diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 05511ccb61d2..45b7389d77aa 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -162,11 +162,8 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) update_process_times(user_mode(get_irq_regs())); } - if (cpu == 0) { - write_seqlock(&xtime_lock); - do_timer(ticks_elapsed); - write_sequnlock(&xtime_lock); - } + if (cpu == 0) + xtime_update(ticks_elapsed); return IRQ_HANDLED; } diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 7c589ef81fb0..c94e4a3fe2ef 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -30,7 +30,7 @@ : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ : "cr0", "memory") -static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -40,7 +40,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -82,35 +82,38 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - int prev; + int ret = 0; + u32 prev; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; __asm__ __volatile__ ( PPC_RELEASE_BARRIER -"1: lwarx %0,0,%2 # futex_atomic_cmpxchg_inatomic\n\ - cmpw 0,%0,%3\n\ +"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ + cmpw 0,%1,%4\n\ bne- 3f\n" - PPC405_ERR77(0,%2) -"2: stwcx. %4,0,%2\n\ + PPC405_ERR77(0,%3) +"2: stwcx. %5,0,%3\n\ bne- 1b\n" PPC_ACQUIRE_BARRIER "3: .section .fixup,\"ax\"\n\ -4: li %0,%5\n\ +4: li %0,%6\n\ b 3b\n\ .previous\n\ .section __ex_table,\"a\"\n\ .align 3\n\ " PPC_LONG "1b,4b,2b,4b\n\ .previous" \ - : "=&r" (prev), "+m" (*uaddr) + : "+r" (ret), "=&r" (prev), "+m" (*uaddr) : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); - return prev; + *uval = prev; + return ret; } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 380d48bacd16..26b8c807f8f1 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -33,9 +33,25 @@ // //---------------------------------------------------------------------------- #include <linux/cache.h> +#include <linux/threads.h> #include <asm/types.h> #include <asm/mmu.h> +/* + * We only have to have statically allocated lppaca structs on + * legacy iSeries, which supports at most 64 cpus. + */ +#ifdef CONFIG_PPC_ISERIES +#if NR_CPUS < 64 +#define NR_LPPACAS NR_CPUS +#else +#define NR_LPPACAS 64 +#endif +#else /* not iSeries */ +#define NR_LPPACAS 1 +#endif + + /* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k * alignment is sufficient to prevent this */ struct lppaca { diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h index 8447d89fbe72..bb1e2cdeb9bf 100644 --- a/arch/powerpc/include/asm/rwsem.h +++ b/arch/powerpc/include/asm/rwsem.h @@ -13,11 +13,6 @@ * by Paul Mackerras <paulus@samba.org>. */ -#include <linux/list.h> -#include <linux/spinlock.h> -#include <asm/atomic.h> -#include <asm/system.h> - /* * the semaphore definition */ @@ -33,47 +28,6 @@ #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -struct rw_semaphore { - long count; - spinlock_t wait_lock; - struct list_head wait_list; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } -#else -# define __RWSEM_DEP_MAP_INIT(lockname) -#endif - -#define __RWSEM_INITIALIZER(name) \ -{ \ - RWSEM_UNLOCKED_VALUE, \ - __SPIN_LOCK_UNLOCKED((name).wait_lock), \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) \ -} - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); - -#define init_rwsem(sem) \ - do { \ - static struct lock_class_key __key; \ - \ - __init_rwsem((sem), #sem, &__key); \ - } while (0) - /* * lock for reading */ @@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return sem->count != 0; -} - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_RWSEM_H */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index ebf9846f3c3b..f4adf89d7614 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -27,20 +27,6 @@ extern unsigned long __toc_start; #ifdef CONFIG_PPC_BOOK3S /* - * We only have to have statically allocated lppaca structs on - * legacy iSeries, which supports at most 64 cpus. - */ -#ifdef CONFIG_PPC_ISERIES -#if NR_CPUS < 64 -#define NR_LPPACAS NR_CPUS -#else -#define NR_LPPACAS 64 -#endif -#else /* not iSeries */ -#define NR_LPPACAS 1 -#endif - -/* * The structure which the hypervisor knows about - this structure * should not cross a page boundary. The vpa_init/register_vpa call * is now known to fail if the lppaca structure crosses a page diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index fd4812329570..0dc95c0aa3be 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1516,7 +1516,8 @@ int start_topology_update(void) { int rc = 0; - if (firmware_has_feature(FW_FEATURE_VPHN) && + /* Disabled until races with load balancing are fixed */ + if (0 && firmware_has_feature(FW_FEATURE_VPHN) && get_lppaca()->shared_proc) { vphn_enabled = 1; setup_cpu_associativity_change_counters(); diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 187a7d32f86a..a3d2ce54ea2e 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags, if (!IS_ERR(tmp)) { struct nameidata nd; - ret = path_lookup(tmp, LOOKUP_PARENT, &nd); + ret = kern_path_parent(tmp, &nd); if (!ret) { nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE; ret = spufs_create(&nd, flags, mode, neighbor); diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c index fdb7384c0c4f..f0491cc28900 100644 --- a/arch/powerpc/platforms/iseries/dt.c +++ b/arch/powerpc/platforms/iseries/dt.c @@ -242,8 +242,8 @@ static void __init dt_cpus(struct iseries_flat_dt *dt) pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */ pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE); - for (i = 0; i < NR_CPUS; i++) { - if (lppaca_of(i).dyn_proc_status >= 2) + for (i = 0; i < NR_LPPACAS; i++) { + if (lppaca[i].dyn_proc_status >= 2) continue; snprintf(p, 32 - (p - buf), "@%d", i); @@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt) dt_prop_str(dt, "device_type", device_type_cpu); - index = lppaca_of(i).dyn_hv_phys_proc_index; + index = lppaca[i].dyn_hv_phys_proc_index; d = &xIoHriProcessorVpd[index]; dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index b0863410517f..2946ae10fbfd 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -680,6 +680,7 @@ void * __init iSeries_early_setup(void) * on but calling this function multiple times is fine. */ identify_cpu(0, mfspr(SPRN_PVR)); + initialise_paca(&boot_paca, 0); powerpc_firmware_features |= FW_FEATURE_ISERIES; powerpc_firmware_features |= FW_FEATURE_LPAR; diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 5c5d02de49e9..81cf36b691f1 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -7,7 +7,7 @@ #include <linux/uaccess.h> #include <asm/errno.h> -static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -18,7 +18,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, - int oldval, int newval) +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval); + return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval); } #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 423fdda2322d..d0eb4653cebd 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -43,29 +43,6 @@ #ifdef __KERNEL__ -#include <linux/list.h> -#include <linux/spinlock.h> - -struct rwsem_waiter; - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *); - -/* - * the semaphore definition - */ -struct rw_semaphore { - signed long count; - spinlock_t wait_lock; - struct list_head wait_list; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - #ifndef __s390x__ #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 @@ -81,41 +58,6 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) /* - * initialisation - */ - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } -#else -# define __RWSEM_DEP_MAP_INIT(lockname) -#endif - -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} - -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); - -#define init_rwsem(sem) \ -do { \ - static struct lock_class_key __key; \ - \ - __init_rwsem((sem), #sem, &__key); \ -} while (0) - - -/* * lock for reading */ static inline void __down_read(struct rw_semaphore *sem) @@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) return new; } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d6b1ed0ec52b..2d9ea11f919a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -83,8 +83,8 @@ struct uaccess_ops { size_t (*clear_user)(size_t, void __user *); size_t (*strnlen_user)(size_t, const char __user *); size_t (*strncpy_from_user)(size_t, const char __user *, char *); - int (*futex_atomic_op)(int op, int __user *, int oparg, int *old); - int (*futex_atomic_cmpxchg)(int __user *, int old, int new); + int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old); + int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new); }; extern struct uaccess_ops uaccess; diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h index 126011df14f1..1d2536cb630b 100644 --- a/arch/s390/lib/uaccess.h +++ b/arch/s390/lib/uaccess.h @@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *); extern size_t copy_to_user_std(size_t, void __user *, const void *); extern size_t strnlen_user_std(size_t, const char __user *); extern size_t strncpy_from_user_std(size_t, const char __user *, char *); -extern int futex_atomic_cmpxchg_std(int __user *, int, int); -extern int futex_atomic_op_std(int, int __user *, int, int *); +extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32); +extern int futex_atomic_op_std(int, u32 __user *, int, int *); extern size_t copy_from_user_pt(size_t, const void __user *, void *); extern size_t copy_to_user_pt(size_t, void __user *, const void *); -extern int futex_atomic_op_pt(int, int __user *, int, int *); -extern int futex_atomic_cmpxchg_pt(int __user *, int, int); +extern int futex_atomic_op_pt(int, u32 __user *, int, int *); +extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32); #endif /* __ARCH_S390_LIB_UACCESS_H */ diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 404f2de296dc..74833831417f 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -302,7 +302,7 @@ fault: : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ "m" (*uaddr) : "cc" ); -static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) +static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) { int oldval = 0, newval, ret; @@ -335,7 +335,7 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) return ret; } -int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) +int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) { int ret; @@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) return ret; } -static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) +static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { int ret; asm volatile("0: cs %1,%4,0(%5)\n" - "1: lr %0,%1\n" + "1: la %0,0\n" "2:\n" EX_TABLE(0b,2b) EX_TABLE(1b,2b) : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory" ); + *uval = oldval; return ret; } -int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) +int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { int ret; if (segment_eq(get_fs(), KERNEL_DS)) - return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); + return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); if (!uaddr) { @@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) } get_page(virt_to_page(uaddr)); spin_unlock(¤t->mm->page_table_lock); - ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); + ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); put_page(virt_to_page(uaddr)); return ret; } diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c index a6c4f7ed24a4..bb1a7eed42ce 100644 --- a/arch/s390/lib/uaccess_std.c +++ b/arch/s390/lib/uaccess_std.c @@ -255,7 +255,7 @@ size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst) : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ "m" (*uaddr) : "cc"); -int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) +int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old) { int oldval = 0, newval, ret; @@ -287,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) return ret; } -int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval) +int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { int ret; asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" - "1: lr %0,%1\n" + "1: la %0,0\n" "2: sacf 0\n" EX_TABLE(0b,2b) EX_TABLE(1b,2b) : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory" ); + *uval = oldval; return ret; } diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h index a9f16a7f9aea..6cb9f193a95e 100644 --- a/arch/sh/include/asm/futex-irq.h +++ b/arch/sh/include/asm/futex-irq.h @@ -3,7 +3,7 @@ #include <asm/system.h> -static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, +static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr, int *oldval) { unsigned long flags; @@ -20,7 +20,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, return ret; } -static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, +static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr, int *oldval) { unsigned long flags; @@ -37,7 +37,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, return ret; } -static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, +static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr, int *oldval) { unsigned long flags; @@ -54,7 +54,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, return ret; } -static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, +static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr, int *oldval) { unsigned long flags; @@ -71,7 +71,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, return ret; } -static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, +static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *oldval) { unsigned long flags; @@ -88,11 +88,13 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, return ret; } -static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr, - int oldval, int newval) +static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval, + u32 __user *uaddr, + u32 oldval, u32 newval) { unsigned long flags; - int ret, prev = 0; + int ret; + u32 prev = 0; local_irq_save(flags); @@ -102,10 +104,8 @@ static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr, local_irq_restore(flags); - if (ret) - return ret; - - return prev; + *uval = prev; + return ret; } #endif /* __ASM_SH_FUTEX_IRQ_H */ diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h index 68256ec5fa35..7be39a646fbd 100644 --- a/arch/sh/include/asm/futex.h +++ b/arch/sh/include/asm/futex.h @@ -10,7 +10,7 @@ /* XXX: UP variants, fix for SH-4A and SMP.. */ #include <asm/futex-irq.h> -static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -21,7 +21,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -65,12 +65,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - return atomic_futex_op_cmpxchg_inatomic(uaddr, oldval, newval); + return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval); } #endif /* __KERNEL__ */ diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h index 06e2251a5e48..edab57265293 100644 --- a/arch/sh/include/asm/rwsem.h +++ b/arch/sh/include/asm/rwsem.h @@ -11,64 +11,13 @@ #endif #ifdef __KERNEL__ -#include <linux/list.h> -#include <linux/spinlock.h> -#include <asm/atomic.h> -#include <asm/system.h> -/* - * the semaphore definition - */ -struct rw_semaphore { - long count; #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 #define RWSEM_ACTIVE_MASK 0x0000ffff #define RWSEM_WAITING_BIAS (-0x00010000) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - spinlock_t wait_lock; - struct list_head wait_list; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } -#else -# define __RWSEM_DEP_MAP_INIT(lockname) -#endif - -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); - -#define init_rwsem(sem) \ -do { \ - static struct lock_class_key __key; \ - \ - __init_rwsem((sem), #sem, &__key); \ -} while (0) - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} /* * lock for reading @@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) return atomic_add_return(delta, (atomic_t *)(&sem->count)); } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _ASM_SH_RWSEM_H */ diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h index 47f95839dc69..444e7bea23bc 100644 --- a/arch/sparc/include/asm/futex_64.h +++ b/arch/sparc/include/asm/futex_64.h @@ -30,7 +30,7 @@ : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ : "memory") -static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -38,7 +38,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) int cmparg = (encoded_op << 20) >> 20; int oldval = 0, ret, tem; - if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))) + if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) return -EFAULT; if (unlikely((((unsigned long) uaddr) & 0x3UL))) return -EINVAL; @@ -85,26 +85,30 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { + int ret = 0; + __asm__ __volatile__( - "\n1: casa [%3] %%asi, %2, %0\n" + "\n1: casa [%4] %%asi, %3, %1\n" "2:\n" " .section .fixup,#alloc,#execinstr\n" " .align 4\n" "3: sethi %%hi(2b), %0\n" " jmpl %0 + %%lo(2b), %%g0\n" - " mov %4, %0\n" + " mov %5, %0\n" " .previous\n" " .section __ex_table,\"a\"\n" " .align 4\n" " .word 1b, 3b\n" " .previous\n" - : "=r" (newval) - : "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT) + : "+r" (ret), "=r" (newval) + : "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT) : "memory"); - return newval; + *uval = newval; + return ret; } #endif /* !(_SPARC64_FUTEX_H) */ diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h index a2b4302869bc..069bf4d663a1 100644 --- a/arch/sparc/include/asm/rwsem.h +++ b/arch/sparc/include/asm/rwsem.h @@ -13,53 +13,12 @@ #ifdef __KERNEL__ -#include <linux/list.h> -#include <linux/spinlock.h> - -struct rwsem_waiter; - -struct rw_semaphore { - signed long count; #define RWSEM_UNLOCKED_VALUE 0x00000000L #define RWSEM_ACTIVE_BIAS 0x00000001L #define RWSEM_ACTIVE_MASK 0xffffffffL #define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - spinlock_t wait_lock; - struct list_head wait_list; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } -#else -# define __RWSEM_DEP_MAP_INIT(lockname) -#endif - -#define __RWSEM_INITIALIZER(name) \ -{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); - -#define init_rwsem(sem) \ -do { \ - static struct lock_class_key __key; \ - \ - __init_rwsem((sem), #sem, &__key); \ -} while (0) /* * lock for reading @@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) return atomic64_add_return(delta, (atomic64_t *)(&sem->count)); } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _SPARC64_RWSEM_H */ diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index aeaa09a3c655..2cdc131b50ac 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -700,10 +700,8 @@ static void pcic_clear_clock_irq(void) static irqreturn_t pcic_timer_handler (int irq, void *h) { - write_seqlock(&xtime_lock); /* Dummy, to show that we remember */ pcic_clear_clock_irq(); - do_timer(1); - write_sequnlock(&xtime_lock); + xtime_update(1); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); #endif diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 9c743b1886ff..4211bfc9bcad 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -85,7 +85,7 @@ int update_persistent_clock(struct timespec now) /* * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * as well as call the "xtime_update()" routine every clocktick */ #define TICK_SIZE (tick_nsec / 1000) @@ -96,14 +96,9 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id) profile_tick(CPU_PROFILING); #endif - /* Protect counter clear so that do_gettimeoffset works */ - write_seqlock(&xtime_lock); - clear_clock_irq(); - do_timer(1); - - write_sequnlock(&xtime_lock); + xtime_update(1); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index cbddeb38ffda..d3c7a12ad879 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -16,7 +16,7 @@ #define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)]) spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = { - [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED + [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash) }; #else /* SMP */ diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index fe0d10dcae57..d03ec124a598 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -29,16 +29,16 @@ #include <linux/uaccess.h> #include <linux/errno.h> -extern struct __get_user futex_set(int __user *v, int i); -extern struct __get_user futex_add(int __user *v, int n); -extern struct __get_user futex_or(int __user *v, int n); -extern struct __get_user futex_andn(int __user *v, int n); -extern struct __get_user futex_cmpxchg(int __user *v, int o, int n); +extern struct __get_user futex_set(u32 __user *v, int i); +extern struct __get_user futex_add(u32 __user *v, int n); +extern struct __get_user futex_or(u32 __user *v, int n); +extern struct __get_user futex_andn(u32 __user *v, int n); +extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n); #ifndef __tilegx__ -extern struct __get_user futex_xor(int __user *v, int n); +extern struct __get_user futex_xor(u32 __user *v, int n); #else -static inline struct __get_user futex_xor(int __user *uaddr, int n) +static inline struct __get_user futex_xor(u32 __user *uaddr, int n) { struct __get_user asm_ret = __get_user_4(uaddr); if (!asm_ret.err) { @@ -53,7 +53,7 @@ static inline struct __get_user futex_xor(int __user *uaddr, int n) } #endif -static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -119,16 +119,17 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, - int newval) +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { struct __get_user asm_ret; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; asm_ret = futex_cmpxchg(uaddr, oldval, newval); - return asm_ret.err ? asm_ret.err : asm_ret.val; + *uval = asm_ret.val; + return asm_ret.err; } #ifndef __tilegx__ diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 975613b23dcf..c70e047eed72 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req) #if 0 void mconsole_proc(struct mc_request *req) { - struct nameidata nd; struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; struct file *file; - int n, err; + int n; char *ptr = req->request.data, *buf; mm_segment_t old_fs = get_fs(); ptr += strlen("proc"); ptr = skip_spaces(ptr); - err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd); - if (err) { - mconsole_reply(req, "Failed to look up file", 1, 0); - goto out; - } - - err = may_open(&nd.path, MAY_READ, O_RDONLY); - if (result) { - mconsole_reply(req, "Failed to open file", 1, 0); - path_put(&nd.path); - goto out; - } - - file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, - current_cred()); - err = PTR_ERR(file); + file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); if (IS_ERR(file)) { mconsole_reply(req, "Failed to open file", 1, 0); - path_put(&nd.path); goto out; } diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index ba4a98ba39c0..620f5b70957d 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -185,7 +185,7 @@ struct ubd { .no_cow = 0, \ .shared = 0, \ .cow = DEFAULT_COW, \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ .request = NULL, \ .start_sg = 0, \ .end_sg = 0, \ diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index 646aa78ba5fd..46a823882437 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -62,7 +62,12 @@ int main(int argc, char *argv[]) if (fseek(f, -4L, SEEK_END)) { perror(argv[1]); } - fread(&olen, sizeof olen, 1, f); + + if (fread(&olen, sizeof(olen), 1, f) != 1) { + perror(argv[1]); + return 1; + } + ilen = ftell(f); olen = getle32(&olen); fclose(f); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 7c6aabd01fc5..430312ba6e3f 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -25,6 +25,8 @@ #define sysretl_audit ia32_ret_from_sys_call #endif + .section .entry.text, "ax" + #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) .macro IA32_ARG_FIXUP noebp=0 @@ -842,4 +844,7 @@ ia32_sys_call_table: .quad sys_fanotify_init .quad sys32_fanotify_mark .quad sys_prlimit64 /* 340 */ + .quad sys_name_to_handle_at + .quad compat_sys_open_by_handle_at + .quad compat_sys_clock_adjtime ia32_syscall_end: diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h new file mode 100644 index 000000000000..e656ad8c0a2e --- /dev/null +++ b/arch/x86/include/asm/ce4100.h @@ -0,0 +1,6 @@ +#ifndef _ASM_CE4100_H_ +#define _ASM_CE4100_H_ + +int ce4100_pci_init(void); + +#endif diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 220e2ea08e80..91f3e087cf21 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -160,6 +160,7 @@ #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) +#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 1f11ce44e956..d09bb03653f0 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -37,7 +37,7 @@ "+m" (*uaddr), "=&r" (tem) \ : "r" (oparg), "i" (-EFAULT), "1" (0)) -static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) @@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, - int newval) +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { + int ret = 0; #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) /* Real i386 machines have no cmpxchg instruction */ @@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, return -ENOSYS; #endif - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" + asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" "2:\t.section .fixup, \"ax\"\n" - "3:\tmov %2, %0\n" + "3:\tmov %3, %0\n" "\tjmp 2b\n" "\t.previous\n" _ASM_EXTABLE(1b, 3b) - : "=a" (oldval), "+m" (*uaddr) - : "i" (-EFAULT), "r" (newval), "0" (oldval) + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "i" (-EFAULT), "r" (newval), "1" (oldval) : "memory" ); - return oldval; + *uval = oldval; + return ret; } #endif diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index ca242d35e873..518bbbb9ee59 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -13,7 +13,6 @@ enum die_val { DIE_PANIC, DIE_NMI, DIE_DIE, - DIE_NMIWATCHDOG, DIE_KERNELDEBUG, DIE_TRAP, DIE_GPF, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 43a18c77676d..823d48223400 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -52,6 +52,9 @@ #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 + #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c76f5b92b840..07f46016d3ff 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -7,7 +7,6 @@ #ifdef CONFIG_X86_LOCAL_APIC -extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index d1e41b0f9b60..df4cd32b4cc6 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -37,26 +37,9 @@ #endif #ifdef __KERNEL__ - -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/lockdep.h> #include <asm/asm.h> -struct rwsem_waiter; - -extern asmregparm struct rw_semaphore * - rwsem_down_read_failed(struct rw_semaphore *sem); -extern asmregparm struct rw_semaphore * - rwsem_down_write_failed(struct rw_semaphore *sem); -extern asmregparm struct rw_semaphore * - rwsem_wake(struct rw_semaphore *); -extern asmregparm struct rw_semaphore * - rwsem_downgrade_wake(struct rw_semaphore *sem); - /* - * the semaphore definition - * * The bias values and the counter type limits the number of * potential readers/writers to 32767 for 32 bits and 2147483647 * for 64 bits. @@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore * #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -typedef signed long rwsem_count_t; - -struct rw_semaphore { - rwsem_count_t count; - spinlock_t wait_lock; - struct list_head wait_list; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } -#else -# define __RWSEM_DEP_MAP_INIT(lockname) -#endif - - -#define __RWSEM_INITIALIZER(name) \ -{ \ - RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \ -} - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); - -#define init_rwsem(sem) \ -do { \ - static struct lock_class_key __key; \ - \ - __init_rwsem((sem), #sem, &__key); \ -} while (0) - /* * lock for reading */ @@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem) */ static inline int __down_read_trylock(struct rw_semaphore *sem) { - rwsem_count_t result, tmp; + long result, tmp; asm volatile("# beginning __down_read_trylock\n\t" " mov %0,%1\n\t" "1:\n\t" @@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) */ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) { - rwsem_count_t tmp; + long tmp; asm volatile("# beginning down_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" /* adds 0xffff0001, returns the old value */ @@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem) */ static inline int __down_write_trylock(struct rw_semaphore *sem) { - rwsem_count_t ret = cmpxchg(&sem->count, - RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); + long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); if (ret == RWSEM_UNLOCKED_VALUE) return 1; return 0; @@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) */ static inline void __up_read(struct rw_semaphore *sem) { - rwsem_count_t tmp; + long tmp; asm volatile("# beginning __up_read\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" /* subtracts 1, returns the old value */ @@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { - rwsem_count_t tmp; + long tmp; asm volatile("# beginning __up_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" /* subtracts 0xffff0001, returns the old value */ @@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(rwsem_count_t delta, - struct rw_semaphore *sem) +static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) { asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" : "+m" (sem->count) @@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta, /* * implement exchange and add functionality */ -static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, - struct rw_semaphore *sem) +static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) { - rwsem_count_t tmp = delta; + long tmp = delta; asm volatile(LOCK_PREFIX "xadd %0,%1" : "+r" (tmp), "+m" (sem->count) @@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, return tmp + delta; } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1f4695136776..c1bbfa89a0e2 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -17,10 +17,20 @@ #endif #include <asm/thread_info.h> #include <asm/cpumask.h> +#include <asm/cpufeature.h> extern int smp_num_siblings; extern unsigned int num_processors; +static inline bool cpu_has_ht_siblings(void) +{ + bool has_siblings = false; +#ifdef CONFIG_SMP + has_siblings = cpu_has_ht && smp_num_siblings > 1; +#endif + return has_siblings; +} + DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index b766a5e8ba0e..ffaf183c619a 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -346,10 +346,13 @@ #define __NR_fanotify_init 338 #define __NR_fanotify_mark 339 #define __NR_prlimit64 340 +#define __NR_name_to_handle_at 341 +#define __NR_open_by_handle_at 342 +#define __NR_clock_adjtime 343 #ifdef __KERNEL__ -#define NR_syscalls 341 +#define NR_syscalls 344 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 363e9b8a715b..5466bea670e7 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -669,6 +669,12 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init) __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) #define __NR_prlimit64 302 __SYSCALL(__NR_prlimit64, sys_prlimit64) +#define __NR_name_to_handle_at 303 +__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) +#define __NR_open_by_handle_at 304 +__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) +#define __NR_clock_adjtime 305 +__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index ce1d54c8a433..3e094af443c3 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -176,7 +176,7 @@ struct bau_msg_payload { struct bau_msg_header { unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ /* bits 5:0 */ - unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ + unsigned int base_dest_nodeid:15; /* nasid of the */ /* bits 20:6 */ /* first bit in uvhub map */ unsigned int command:8; /* message type */ /* bits 28:21 */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index a3c28ae4025b..8508bfe52296 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set) static inline int HYPERVISOR_sched_op(int cmd, void *arg) { - return _hypercall2(int, sched_op_new, cmd, arg); + return _hypercall2(int, sched_op, cmd, arg); } static inline long @@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value) #endif static inline int -HYPERVISOR_suspend(unsigned long srec) +HYPERVISOR_suspend(unsigned long start_info_mfn) { - return _hypercall3(int, sched_op, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); + struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; + + /* + * For a PV guest the tools require that the start_info mfn be + * present in rdx/edx when the hypercall is made. Per the + * hypercall calling convention this is the third hypercall + * argument, which is start_info_mfn here. + */ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn); } static inline int diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index f25bdf238a33..c61934fbf22a 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -29,8 +29,10 @@ typedef struct xpaddr { /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ #define INVALID_P2M_ENTRY (~0UL) -#define FOREIGN_FRAME_BIT (1UL<<31) +#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1)) +#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2)) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) +#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) /* Maximum amount of memory we can handle in a domain in pages */ #define MAX_DOMAIN_PAGES \ @@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern unsigned long set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e); extern int m2p_add_override(unsigned long mfn, struct page *page); extern int m2p_remove_override(struct page *page); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); +#ifdef CONFIG_XEN_DEBUG_FS +extern int p2m_dump_show(struct seq_file *m, void *v); +#endif static inline unsigned long pfn_to_mfn(unsigned long pfn) { unsigned long mfn; @@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) mfn = get_phys_to_machine(pfn); if (mfn != INVALID_P2M_ENTRY) - mfn &= ~FOREIGN_FRAME_BIT; + mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); return mfn; } @@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) static inline unsigned long mfn_to_pfn(unsigned long mfn) { unsigned long pfn; + int ret = 0; if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; + if (unlikely((mfn >> machine_to_phys_order) != 0)) { + pfn = ~0; + goto try_override; + } pfn = 0; /* * The array access can fail (e.g., device space beyond end of RAM). * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ - __get_user(pfn, &machine_to_phys_mapping[mfn]); - - /* - * If this appears to be a foreign mfn (because the pfn - * doesn't map back to the mfn), then check the local override - * table to see if there's a better pfn to use. + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); +try_override: + /* ret might be < 0 if there are no entries in the m2p for mfn */ + if (ret < 0) + pfn = ~0; + else if (get_phys_to_machine(pfn) != mfn) + /* + * If this appears to be a foreign mfn (because the pfn + * doesn't map back to the mfn), then check the local override + * table to see if there's a better pfn to use. + * + * m2p_find_override_pfn returns ~0 if it doesn't find anything. + */ + pfn = m2p_find_override_pfn(mfn, ~0); + + /* + * pfn is ~0 if there are no entries in the m2p for mfn or if the + * entry doesn't map back to the mfn and m2p_override doesn't have a + * valid entry for it. */ - if (get_phys_to_machine(pfn) != mfn) - pfn = m2p_find_override_pfn(mfn, pfn); + if (pfn == ~0 && + get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn)) + pfn = mfn; return pfn; } diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 2329b3eaf8d3..aa8620989162 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void) * its own functions. */ struct xen_pci_frontend_ops { - int (*enable_msi)(struct pci_dev *dev, int **vectors); + int (*enable_msi)(struct pci_dev *dev, int vectors[]); void (*disable_msi)(struct pci_dev *dev); - int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); + int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec); void (*disable_msix)(struct pci_dev *dev); }; extern struct xen_pci_frontend_ops *xen_pci_frontend; static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, - int **vectors) + int vectors[]) { if (xen_pci_frontend && xen_pci_frontend->enable_msi) return xen_pci_frontend->enable_msi(dev, vectors); @@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) xen_pci_frontend->disable_msi(dev); } static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, - int **vectors, int nvec) + int vectors[], int nvec) { if (xen_pci_frontend && xen_pci_frontend->enable_msix) return xen_pci_frontend->enable_msix(dev, vectors, nvec); diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 13a389179514..452932d34730 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void) addr += size; } - printk(KERN_INFO "Scanning %d areas for low memory corruption\n", - num_scan_areas); + if (num_scan_areas) + printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas); } @@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy) { check_for_bios_corruption(); schedule_delayed_work(&bios_check_work, - round_jiffies_relative(corruption_check_period*HZ)); + round_jiffies_relative(corruption_check_period*HZ)); } static int start_periodic_check_for_corruption(void) { - if (!memory_corruption_check || corruption_check_period == 0) + if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0) return 0; printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 4f6f679f2799..4a5a42b842ad 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu) cmd_incomplete: iowrite16(0, &pcch_hdr->status); spin_unlock(&pcc_lock); - return -EINVAL; + return 0; } static int pcc_cpufreq_target(struct cpufreq_policy *policy, diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d977a2ea693..26604188aa49 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -30,6 +30,7 @@ #include <asm/stacktrace.h> #include <asm/nmi.h> #include <asm/compat.h> +#include <asm/smp.h> #if 0 #undef wrmsrl @@ -93,6 +94,8 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; +struct intel_percore; + #define MAX_LBR_ENTRIES 16 struct cpu_hw_events { @@ -128,6 +131,13 @@ struct cpu_hw_events { struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; /* + * Intel percore register state. + * Coordinate shared resources between HT threads. + */ + int percore_used; /* Used by this CPU? */ + struct intel_percore *per_core; + + /* * AMD specific bits */ struct amd_nb *amd_nb; @@ -166,8 +176,10 @@ struct cpu_hw_events { /* * Constraint on the Event code + UMask */ -#define PEBS_EVENT_CONSTRAINT(c, n) \ +#define INTEL_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define PEBS_EVENT_CONSTRAINT(c, n) \ + INTEL_UEVENT_CONSTRAINT(c, n) #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -175,6 +187,28 @@ struct cpu_hw_events { #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->weight; (e)++) +/* + * Extra registers for specific events. + * Some events need large masks and require external MSRs. + * Define a mapping to these extra registers. + */ +struct extra_reg { + unsigned int event; + unsigned int msr; + u64 config_mask; + u64 valid_mask; +}; + +#define EVENT_EXTRA_REG(e, ms, m, vm) { \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + } +#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) + union perf_capabilities { struct { u64 lbr_format : 6; @@ -219,6 +253,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; + struct event_constraint *percore_constraints; void (*quirks)(void); int perfctr_second_write; @@ -247,6 +282,11 @@ struct x86_pmu { */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ int lbr_nr; /* hardware stack size */ + + /* + * Extra registers for events + */ + struct extra_reg *extra_regs; }; static struct x86_pmu x86_pmu __read_mostly; @@ -271,6 +311,10 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; +static u64 __read_mostly hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; /* * Propagate event elapsed time into the generic event. @@ -298,7 +342,7 @@ x86_perf_event_update(struct perf_event *event) */ again: prev_raw_count = local64_read(&hwc->prev_count); - rdmsrl(hwc->event_base + idx, new_raw_count); + rdmsrl(hwc->event_base, new_raw_count); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -321,6 +365,49 @@ again: return new_raw_count; } +/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ +static inline int x86_pmu_addr_offset(int index) +{ + if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) + return index << 1; + return index; +} + +static inline unsigned int x86_pmu_config_addr(int index) +{ + return x86_pmu.eventsel + x86_pmu_addr_offset(index); +} + +static inline unsigned int x86_pmu_event_addr(int index) +{ + return x86_pmu.perfctr + x86_pmu_addr_offset(index); +} + +/* + * Find and validate any extra registers to set up. + */ +static int x86_pmu_extra_regs(u64 config, struct perf_event *event) +{ + struct extra_reg *er; + + event->hw.extra_reg = 0; + event->hw.extra_config = 0; + + if (!x86_pmu.extra_regs) + return 0; + + for (er = x86_pmu.extra_regs; er->msr; er++) { + if (er->event != (config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + event->hw.extra_reg = er->msr; + event->hw.extra_config = event->attr.config1; + break; + } + return 0; +} + static atomic_t active_events; static DEFINE_MUTEX(pmc_reserve_mutex); @@ -331,12 +418,12 @@ static bool reserve_pmc_hardware(void) int i; for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) + if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) goto perfctr_fail; } for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) + if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) goto eventsel_fail; } @@ -344,13 +431,13 @@ static bool reserve_pmc_hardware(void) eventsel_fail: for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu.eventsel + i); + release_evntsel_nmi(x86_pmu_config_addr(i)); i = x86_pmu.num_counters; perfctr_fail: for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu.perfctr + i); + release_perfctr_nmi(x86_pmu_event_addr(i)); return false; } @@ -360,8 +447,8 @@ static void release_pmc_hardware(void) int i; for (i = 0; i < x86_pmu.num_counters; i++) { - release_perfctr_nmi(x86_pmu.perfctr + i); - release_evntsel_nmi(x86_pmu.eventsel + i); + release_perfctr_nmi(x86_pmu_event_addr(i)); + release_evntsel_nmi(x86_pmu_config_addr(i)); } } @@ -382,7 +469,7 @@ static bool check_hw_exists(void) * complain and bail. */ for (i = 0; i < x86_pmu.num_counters; i++) { - reg = x86_pmu.eventsel + i; + reg = x86_pmu_config_addr(i); ret = rdmsrl_safe(reg, &val); if (ret) goto msr_fail; @@ -407,8 +494,8 @@ static bool check_hw_exists(void) * that don't trap on the MSR access and always return 0s. */ val = 0xabcdUL; - ret = checking_wrmsrl(x86_pmu.perfctr, val); - ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); + ret = checking_wrmsrl(x86_pmu_event_addr(0), val); + ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); if (ret || val != val_new) goto msr_fail; @@ -442,8 +529,9 @@ static inline int x86_pmu_initialized(void) } static inline int -set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) { + struct perf_event_attr *attr = &event->attr; unsigned int cache_type, cache_op, cache_result; u64 config, val; @@ -470,8 +558,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return -EINVAL; hwc->config |= val; - - return 0; + attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; + return x86_pmu_extra_regs(val, event); } static int x86_setup_perfctr(struct perf_event *event) @@ -496,10 +584,10 @@ static int x86_setup_perfctr(struct perf_event *event) } if (attr->type == PERF_TYPE_RAW) - return 0; + return x86_pmu_extra_regs(event->attr.config, event); if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, attr); + return set_ext_hw_attr(hwc, event); if (attr->config >= x86_pmu.max_events) return -EINVAL; @@ -617,11 +705,11 @@ static void x86_pmu_disable_all(void) if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(x86_pmu.eventsel + idx, val); + rdmsrl(x86_pmu_config_addr(idx), val); if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) continue; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu.eventsel + idx, val); + wrmsrl(x86_pmu_config_addr(idx), val); } } @@ -642,21 +730,26 @@ static void x86_pmu_disable(struct pmu *pmu) x86_pmu.disable_all(); } +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, + u64 enable_mask) +{ + if (hwc->extra_reg) + wrmsrl(hwc->extra_reg, hwc->extra_config); + wrmsrl(hwc->config_base, hwc->config | enable_mask); +} + static void x86_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_event *event = cpuc->events[idx]; - u64 val; + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; if (!test_bit(idx, cpuc->active_mask)) continue; - val = event->hw.config; - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu.eventsel + idx, val); + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } } @@ -821,15 +914,10 @@ static inline void x86_assign_hw_event(struct perf_event *event, hwc->event_base = 0; } else if (hwc->idx >= X86_PMC_IDX_FIXED) { hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that event_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->event_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0; } else { - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu_config_addr(hwc->idx); + hwc->event_base = x86_pmu_event_addr(hwc->idx); } } @@ -915,17 +1003,11 @@ static void x86_pmu_enable(struct pmu *pmu) x86_pmu.enable_all(added); } -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, - u64 enable_mask) -{ - wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); -} - static inline void x86_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base + hwc->idx, hwc->config); + wrmsrl(hwc->config_base, hwc->config); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -978,7 +1060,7 @@ x86_perf_event_set_period(struct perf_event *event) */ local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* * Due to erratum on certan cpu we need @@ -986,7 +1068,7 @@ x86_perf_event_set_period(struct perf_event *event) * is updated properly */ if (x86_pmu.perfctr_second_write) { - wrmsrl(hwc->event_base + idx, + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); } @@ -1113,8 +1195,8 @@ void perf_event_print_debug(void) pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); - rdmsrl(x86_pmu.perfctr + idx, pmc_count); + rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); + rdmsrl(x86_pmu_event_addr(idx), pmc_count); prev_left = per_cpu(pmc_prev_left[idx], cpu); @@ -1389,7 +1471,7 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } -int __init init_hw_perf_events(void) +static int __init init_hw_perf_events(void) { struct event_constraint *c; int err; @@ -1608,7 +1690,7 @@ out: return ret; } -int x86_pmu_event_init(struct perf_event *event) +static int x86_pmu_event_init(struct perf_event *event) { struct pmu *tmp; int err; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67e2202a6039..461f62bbd774 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event) /* * AMD64 events are detected based on their event codes. */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + static inline int amd_is_nb_event(struct hw_perf_event *hwc) { return (hwc->config & 0xe0) == 0xe0; @@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = { .cpu_dead = amd_pmu_cpu_dead, }; +/* AMD Family 15h */ + +#define AMD_EVENT_TYPE_MASK 0x000000F0ULL + +#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL +#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL +#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL +#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL +#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL +#define AMD_EVENT_EX_LS 0x000000C0ULL +#define AMD_EVENT_DE 0x000000D0ULL +#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL + +/* + * AMD family 15h event code/PMC mappings: + * + * type = event_code & 0x0F0: + * + * 0x000 FP PERF_CTL[5:3] + * 0x010 FP PERF_CTL[5:3] + * 0x020 LS PERF_CTL[5:0] + * 0x030 LS PERF_CTL[5:0] + * 0x040 DC PERF_CTL[5:0] + * 0x050 DC PERF_CTL[5:0] + * 0x060 CU PERF_CTL[2:0] + * 0x070 CU PERF_CTL[2:0] + * 0x080 IC/DE PERF_CTL[2:0] + * 0x090 IC/DE PERF_CTL[2:0] + * 0x0A0 --- + * 0x0B0 --- + * 0x0C0 EX/LS PERF_CTL[5:0] + * 0x0D0 DE PERF_CTL[2:0] + * 0x0E0 NB NB_PERF_CTL[3:0] + * 0x0F0 NB NB_PERF_CTL[3:0] + * + * Exceptions: + * + * 0x003 FP PERF_CTL[3] + * 0x00B FP PERF_CTL[3] + * 0x00D FP PERF_CTL[3] + * 0x023 DE PERF_CTL[2:0] + * 0x02D LS PERF_CTL[3] + * 0x02E LS PERF_CTL[3,0] + * 0x043 CU PERF_CTL[2:0] + * 0x045 CU PERF_CTL[2:0] + * 0x046 CU PERF_CTL[2:0] + * 0x054 CU PERF_CTL[2:0] + * 0x055 CU PERF_CTL[2:0] + * 0x08F IC PERF_CTL[0] + * 0x187 DE PERF_CTL[0] + * 0x188 DE PERF_CTL[0] + * 0x0DB EX PERF_CTL[5:0] + * 0x0DC LS PERF_CTL[5:0] + * 0x0DD LS PERF_CTL[5:0] + * 0x0DE LS PERF_CTL[5:0] + * 0x0DF LS PERF_CTL[5:0] + * 0x1D6 EX PERF_CTL[5:0] + * 0x1D8 EX PERF_CTL[5:0] + */ + +static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); +static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); +static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); +static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); +static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); +static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); + +static struct event_constraint * +amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + unsigned int event_code = amd_get_event_code(&event->hw); + + switch (event_code & AMD_EVENT_TYPE_MASK) { + case AMD_EVENT_FP: + switch (event_code) { + case 0x003: + case 0x00B: + case 0x00D: + return &amd_f15_PMC3; + default: + return &amd_f15_PMC53; + } + case AMD_EVENT_LS: + case AMD_EVENT_DC: + case AMD_EVENT_EX_LS: + switch (event_code) { + case 0x023: + case 0x043: + case 0x045: + case 0x046: + case 0x054: + case 0x055: + return &amd_f15_PMC20; + case 0x02D: + return &amd_f15_PMC3; + case 0x02E: + return &amd_f15_PMC30; + default: + return &amd_f15_PMC50; + } + case AMD_EVENT_CU: + case AMD_EVENT_IC_DE: + case AMD_EVENT_DE: + switch (event_code) { + case 0x08F: + case 0x187: + case 0x188: + return &amd_f15_PMC0; + case 0x0DB ... 0x0DF: + case 0x1D6: + case 0x1D8: + return &amd_f15_PMC50; + default: + return &amd_f15_PMC20; + } + case AMD_EVENT_NB: + /* not yet implemented */ + return &emptyconstraint; + default: + return &emptyconstraint; + } +} + +static __initconst const struct x86_pmu amd_pmu_f15h = { + .name = "AMD Family 15h", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, + .hw_config = amd_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_F15H_PERF_CTL, + .perfctr = MSR_F15H_PERF_CTR, + .event_map = amd_pmu_event_map, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_counters = 6, + .cntval_bits = 48, + .cntval_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, + .get_event_constraints = amd_get_event_constraints_f15h, + /* nortbridge counters not yet implemented: */ +#if 0 + .put_event_constraints = amd_put_event_constraints, + + .cpu_prepare = amd_pmu_cpu_prepare, + .cpu_starting = amd_pmu_cpu_starting, + .cpu_dead = amd_pmu_cpu_dead, +#endif +}; + static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) return -ENODEV; - x86_pmu = amd_pmu; + /* + * If core performance counter extensions exists, it must be + * family 15h, otherwise fail. See x86_pmu_addr_offset(). + */ + switch (boot_cpu_data.x86) { + case 0x15: + if (!cpu_has_perfctr_core) + return -ENODEV; + x86_pmu = amd_pmu_f15h; + break; + default: + if (cpu_has_perfctr_core) + return -ENODEV; + x86_pmu = amd_pmu; + break; + } /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 008835c1d79c..8fc2b2cee1da 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1,5 +1,27 @@ #ifdef CONFIG_CPU_SUP_INTEL +#define MAX_EXTRA_REGS 2 + +/* + * Per register state. + */ +struct er_account { + int ref; /* reference count */ + unsigned int extra_reg; /* extra MSR number */ + u64 extra_config; /* extra MSR config */ +}; + +/* + * Per core state + * This used to coordinate shared registers for HT threads. + */ +struct intel_percore { + raw_spinlock_t lock; /* protect structure */ + struct er_account regs[MAX_EXTRA_REGS]; + int refcnt; /* number of threads */ + unsigned core_id; +}; + /* * Intel PerfMon, used on Core and later. */ @@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] = EVENT_CONSTRAINT_END }; +static struct extra_reg intel_nehalem_extra_regs[] = +{ + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_nehalem_percore_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xb7, 0), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_westmere_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_snb_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ + INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_westmere_extra_regs[] = +{ + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_westmere_percore_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xb7, 0), + INTEL_EVENT_CONSTRAINT(0xbb, 0), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_gen_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } +static __initconst const u64 snb_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ + [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + /* + * TBD: Need Off-core Response Performance Monitoring support + */ + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + [ C(OP_WRITE) ] = { + /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + [ C(OP_PREFETCH) ] = { + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + static __initconst const u64 westmere_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, }, + /* + * Use RFO, not WRITEBACK, because a write miss would typically occur + * on RFO. + */ [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01bb, + /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, }, }, [ C(DTLB) ] = { @@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids }, }; +/* + * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 + */ + +#define DMND_DATA_RD (1 << 0) +#define DMND_RFO (1 << 1) +#define DMND_WB (1 << 3) +#define PF_DATA_RD (1 << 4) +#define PF_DATA_RFO (1 << 5) +#define RESP_UNCORE_HIT (1 << 8) +#define RESP_MISS (0xf600) /* non uncore hit */ + +static __initconst const u64 nehalem_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, + }, + } +}; + static __initconst const u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, + /* + * Use RFO, not WRITEBACK, because a write miss would typically occur + * on RFO. + */ [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -691,8 +905,8 @@ static void intel_pmu_reset(void) printk("clearing PMU state on CPU#%d\n", smp_processor_id()); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); - checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); + checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); + checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); } for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); @@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event) } static struct event_constraint * +intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; + struct event_constraint *c; + struct intel_percore *pc; + struct er_account *era; + int i; + int free_slot; + int found; + + if (!x86_pmu.percore_constraints || hwc->extra_alloc) + return NULL; + + for (c = x86_pmu.percore_constraints; c->cmask; c++) { + if (e != c->code) + continue; + + /* + * Allocate resource per core. + */ + pc = cpuc->per_core; + if (!pc) + break; + c = &emptyconstraint; + raw_spin_lock(&pc->lock); + free_slot = -1; + found = 0; + for (i = 0; i < MAX_EXTRA_REGS; i++) { + era = &pc->regs[i]; + if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { + /* Allow sharing same config */ + if (hwc->extra_config == era->extra_config) { + era->ref++; + cpuc->percore_used = 1; + hwc->extra_alloc = 1; + c = NULL; + } + /* else conflict */ + found = 1; + break; + } else if (era->ref == 0 && free_slot == -1) + free_slot = i; + } + if (!found && free_slot != -1) { + era = &pc->regs[free_slot]; + era->ref = 1; + era->extra_reg = hwc->extra_reg; + era->extra_config = hwc->extra_config; + cpuc->percore_used = 1; + hwc->extra_alloc = 1; + c = NULL; + } + raw_spin_unlock(&pc->lock); + return c; + } + + return NULL; +} + +static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { struct event_constraint *c; @@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event if (c) return c; + c = intel_percore_constraints(cpuc, event); + if (c) + return c; + return x86_get_event_constraints(cpuc, event); } +static void intel_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct extra_reg *er; + struct intel_percore *pc; + struct er_account *era; + struct hw_perf_event *hwc = &event->hw; + int i, allref; + + if (!cpuc->percore_used) + return; + + for (er = x86_pmu.extra_regs; er->msr; er++) { + if (er->event != (hwc->config & er->config_mask)) + continue; + + pc = cpuc->per_core; + raw_spin_lock(&pc->lock); + for (i = 0; i < MAX_EXTRA_REGS; i++) { + era = &pc->regs[i]; + if (era->ref > 0 && + era->extra_config == hwc->extra_config && + era->extra_reg == er->msr) { + era->ref--; + hwc->extra_alloc = 0; + break; + } + } + allref = 0; + for (i = 0; i < MAX_EXTRA_REGS; i++) + allref += pc->regs[i].ref; + if (allref == 0) + cpuc->percore_used = 0; + raw_spin_unlock(&pc->lock); + break; + } +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = { */ .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, }; +static int intel_pmu_cpu_prepare(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + if (!cpu_has_ht_siblings()) + return NOTIFY_OK; + + cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), + GFP_KERNEL, cpu_to_node(cpu)); + if (!cpuc->per_core) + return NOTIFY_BAD; + + raw_spin_lock_init(&cpuc->per_core->lock); + cpuc->per_core->core_id = -1; + return NOTIFY_OK; +} + static void intel_pmu_cpu_starting(int cpu) { + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int core_id = topology_core_id(cpu); + int i; + init_debug_store_on_cpu(cpu); /* * Deal with CPUs that don't clear their LBRs on power-up. */ intel_pmu_lbr_reset(); + + if (!cpu_has_ht_siblings()) + return; + + for_each_cpu(i, topology_thread_cpumask(cpu)) { + struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; + + if (pc && pc->core_id == core_id) { + kfree(cpuc->per_core); + cpuc->per_core = pc; + break; + } + } + + cpuc->per_core->core_id = core_id; + cpuc->per_core->refcnt++; } static void intel_pmu_cpu_dying(int cpu) { + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct intel_percore *pc = cpuc->per_core; + + if (pc) { + if (pc->core_id == -1 || --pc->refcnt == 0) + kfree(pc); + cpuc->per_core = NULL; + } + fini_debug_store_on_cpu(cpu); } @@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = { */ .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, + .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, }; @@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_core(); x86_pmu.event_constraints = intel_core2_event_constraints; + x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; pr_cont("Core2 events, "); break; @@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void) case 46: /* 45 nm nehalem-ex, "Beckton" */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_nehalem_event_constraints; + x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; + x86_pmu.percore_constraints = intel_nehalem_percore_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; + x86_pmu.extra_regs = intel_nehalem_extra_regs; pr_cont("Nehalem events, "); break; @@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_atom(); x86_pmu.event_constraints = intel_gen_event_constraints; + x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; pr_cont("Atom events, "); break; @@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void) case 44: /* 32 nm nehalem, "Gulftown" */ memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_westmere_event_constraints; + x86_pmu.percore_constraints = intel_westmere_percore_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; + x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; + x86_pmu.extra_regs = intel_westmere_extra_regs; pr_cont("Westmere events, "); break; + case 42: /* SandyBridge */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + intel_pmu_lbr_init_nhm(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_snb_pebs_events; + pr_cont("SandyBridge events, "); + break; + default: /* * default constraints for v2 and up diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b7dcd9f2b8a0..b95c66ae4a2a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -361,30 +361,88 @@ static int intel_pmu_drain_bts_buffer(void) /* * PEBS */ - -static struct event_constraint intel_core_pebs_events[] = { - PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ +static struct event_constraint intel_core2_pebs_event_constraints[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ - PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ - PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ - PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_atom_pebs_event_constraints[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ EVENT_CONSTRAINT_END }; -static struct event_constraint intel_nehalem_pebs_events[] = { - PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ - PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ - PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ - PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ - PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ +static struct event_constraint intel_nehalem_pebs_event_constraints[] = { + INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ + INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_westmere_pebs_event_constraints[] = { + INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_snb_pebs_events[] = { + PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */ + PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */ + PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */ + PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */ + PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */ + PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ + PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ + PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */ + PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */ + PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ + PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ + PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ + PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ + PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ + PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ + PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ + PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ + PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ + PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */ + PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ + PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ + PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ + PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */ + PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */ + PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; @@ -695,20 +753,17 @@ static void intel_ds_init(void) printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; - x86_pmu.pebs_constraints = intel_core_pebs_events; break; case 1: printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; - x86_pmu.pebs_constraints = intel_nehalem_pebs_events; break; default: printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; - break; } } } diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ff751a9f182b..3769ac822f96 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -764,9 +764,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) u64 v; /* an official way for overflow indication */ - rdmsrl(hwc->config_base + hwc->idx, v); + rdmsrl(hwc->config_base, v); if (v & P4_CCCR_OVF) { - wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); + wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); return 1; } @@ -815,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) * state we need to clear P4_CCCR_OVF, otherwise interrupt get * asserted again and again */ - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (void)checking_wrmsrl(hwc->config_base, (u64)(p4_config_unpack_cccr(hwc->config)) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); } @@ -885,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event) p4_pmu_enable_pebs(hwc->config); (void)checking_wrmsrl(escr_addr, escr_conf); - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (void)checking_wrmsrl(hwc->config_base, (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 34ba07be2cda..20c097e33860 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event) if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); + (void)checking_wrmsrl(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) @@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event) if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); + (void)checking_wrmsrl(hwc->config_base, val); } static __initconst const struct x86_pmu p6_pmu = { diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d5a236615501..966512b2cacf 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) /* returns the bit offset of the performance counter register */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: + if (msr >= MSR_F15H_PERF_CTR) + return (msr - MSR_F15H_PERF_CTR) >> 1; return msr - MSR_K7_PERFCTR0; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) @@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) /* returns the bit offset of the event selection register */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: + if (msr >= MSR_F15H_PERF_CTL) + return (msr - MSR_F15H_PERF_CTL) >> 1; return msr - MSR_K7_EVNTSEL0; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index df20723a6a1b..220a1c11cfde 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } -void notrace __kprobes -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - unsigned long flags; - - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - /* - * We are in trouble anyway, lets at least try - * to get a message out. - */ - flags = oops_begin(); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - oops_end(flags, regs, 0); - if (do_panic || panic_on_oops) - panic("Non maskable interrupt"); - nmi_exit(); - local_irq_enable(); - do_exit(SIGBUS); -} - static int __init oops_setup(char *s) { if (!s) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2878821cb8c1..fa41f7298c84 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -65,6 +65,8 @@ #define sysexit_audit syscall_exit_work #endif + .section .entry.text, "ax" + /* * We use macros for low-level operations which need to be overridden * for paravirtualization. The following will never clobber any registers: @@ -788,7 +790,7 @@ ENDPROC(ptregs_clone) */ .section .init.rodata,"a" ENTRY(interrupt) -.text +.section .entry.text, "ax" .p2align 5 .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) @@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR .endif .previous .long 1b - .text + .section .entry.text, "ax" vector=vector+1 .endif .endr diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bbd5c80cb09b..c32cbbcff7b9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -61,6 +61,8 @@ #define __AUDIT_ARCH_LE 0x40000000 .code64 + .section .entry.text, "ax" + #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) @@ -744,7 +746,7 @@ END(stub_rt_sigreturn) */ .section .init.rodata,"a" ENTRY(interrupt) - .text + .section .entry.text .p2align 5 .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) @@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR .endif .previous .quad 1b - .text + .section .entry.text vector=vector+1 .endif .endr diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 382eb2936d4d..a93742a57468 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, return; } - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer) == -EBUSY) { - *parent = old; - return; - } - trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; *parent = old; + return; + } + + if (ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer) == -EBUSY) { + *parent = old; + return; } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index a4130005028a..7c64c420a9f6 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMIWATCHDOG: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup: */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - return NOTIFY_STOP; - } - /* Enter debugger: */ - break; - case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index d91c477b3f62..c969fd9d1566 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr) if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) return 0; + /* + * Do not optimize in the entry code due to the unstable + * stack handling. + */ + if ((paddr >= (unsigned long )__entry_text_start) && + (paddr < (unsigned long )__entry_text_end)) + return 0; + /* Check there is enough space for a relative jump. */ if (size - offset < RELATIVEJUMP_SIZE) return 0; diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index b35786dc9b8f..5f181742e8f9 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -340,3 +340,6 @@ ENTRY(sys_call_table) .long sys_fanotify_init .long sys_fanotify_mark .long sys_prlimit64 /* 340 */ + .long sys_name_to_handle_at + .long sys_open_by_handle_at + .long sys_clock_adjtime diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e9f7a3c838c3..0381e1f3baed 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -105,6 +105,7 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT + ENTRY_TEXT IRQENTRY_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 1357d7cf4ec8..db932760ea82 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -62,21 +62,21 @@ TRACE_EVENT(kvm_hv_hypercall, TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), TP_STRUCT__entry( - __field( __u16, code ) - __field( bool, fast ) __field( __u16, rep_cnt ) __field( __u16, rep_idx ) __field( __u64, ingpa ) __field( __u64, outgpa ) + __field( __u16, code ) + __field( bool, fast ) ), TP_fast_assign( - __entry->code = code; - __entry->fast = fast; __entry->rep_cnt = rep_cnt; __entry->rep_idx = rep_idx; __entry->ingpa = ingpa; __entry->outgpa = outgpa; + __entry->code = code; + __entry->fast = fast; ), TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7d90ceb882a4..20e3f8702d1e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -229,15 +229,14 @@ void vmalloc_sync_all(void) for (address = VMALLOC_START & PMD_MASK; address >= TASK_SIZE && address < FIXADDR_TOP; address += PMD_SIZE) { - - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { spinlock_t *pgt_lock; pmd_t *ret; + /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); @@ -247,7 +246,7 @@ void vmalloc_sync_all(void) if (!ret) break; } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } } @@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { if (fault & VM_FAULT_OOM) { + /* Kernel mode? Handle exceptions or die: */ + if (!(error_code & PF_USER)) { + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address); + return; + } + out_of_memory(regs, error_code, address); } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 71a59296af80..c14a5422e152 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) for (address = start; address <= end; address += PGDIR_SIZE) { const pgd_t *pgd_ref = pgd_offset_k(address); - unsigned long flags; struct page *page; if (pgd_none(*pgd_ref)) continue; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; spinlock_t *pgt_lock; pgd = (pgd_t *)page_address(page) + pgd_index(address); + /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); @@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) spin_unlock(pgt_lock); } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 95ea1551eebc..1337c51b07d7 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -780,11 +780,7 @@ void __cpuinit numa_add_cpu(int cpu) int physnid; int nid = NUMA_NO_NODE; - apicid = early_per_cpu(x86_cpu_to_apicid, cpu); - if (apicid != BAD_APICID) - nid = apicid_to_node[apicid]; - if (nid == NUMA_NO_NODE) - nid = early_cpu_to_node(cpu); + nid = early_cpu_to_node(cpu); BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); /* diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d343b3c81f3c..90825f2eb0f4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM]; void update_page_count(int level, unsigned long pages) { - unsigned long flags; - /* Protect against CPA */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); direct_pages_count[level] += pages; - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } static void split_page_count(int level) @@ -394,7 +392,7 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { - unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; + unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; @@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, if (cpa->force_split) return 1; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up already: @@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } out_unlock: - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return do_split; } static int split_large_page(pte_t *kpte, unsigned long address) { - unsigned long flags, pfn, pfninc = 1; + unsigned long pfn, pfninc = 1; unsigned int i, level; pte_t *pbase, *tmp; pgprot_t ref_prot; @@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) if (!base) return -ENOMEM; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up for us already: @@ -591,7 +589,7 @@ out_unlock: */ if (base) __free_page(base); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return 0; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 500242d3c96d..0113d19c8aa6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) static void pgd_dtor(pgd_t *pgd) { - unsigned long flags; /* can be called from interrupt context */ - if (SHARED_KERNEL_PMD) return; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *pmds[PREALLOCATED_PMDS]; - unsigned long flags; pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); @@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) * respect to anything walking the pgd_list, so that they * never see a partially populated pgd. */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_ctor(mm, pgd); pgd_prepopulate_pmd(mm, pgd, pmds); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return pgd; diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c index 85b68ef5e809..9260b3eb18d4 100644 --- a/arch/x86/pci/ce4100.c +++ b/arch/x86/pci/ce4100.c @@ -34,6 +34,7 @@ #include <linux/pci.h> #include <linux/init.h> +#include <asm/ce4100.h> #include <asm/pci_x86.h> struct sim_reg { @@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = { .write = ce4100_conf_write, }; -static int __init ce4100_pci_init(void) +int __init ce4100_pci_init(void) { init_sim_regs(); raw_pci_ops = &ce4100_pci_conf; - return 0; + /* Indicate caller that it should invoke pci_legacy_init() */ + return 1; } -subsys_initcall(ce4100_pci_init); diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 25cd4a07d09f..8c4085a95ef1 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -20,7 +20,8 @@ #include <asm/xen/pci.h> #ifdef CONFIG_ACPI -static int xen_hvm_register_pirq(u32 gsi, int triggering) +static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, + int trigger, int polarity) { int rc, irq; struct physdev_map_pirq map_irq; @@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) return -1; } - if (triggering == ACPI_EDGE_SENSITIVE) { + if (trigger == ACPI_EDGE_SENSITIVE) { shareable = 0; name = "ioapic-edge"; } else { @@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) return irq; } - -static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, - int trigger, int polarity) -{ - return xen_hvm_register_pirq(gsi, trigger); -} #endif #if defined(CONFIG_PCI_MSI) @@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, pirq, ret = 0; + int irq, pirq; struct msi_desc *msidesc; struct msi_msg msg; @@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) __read_msi_msg(msidesc, &msg); pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { - xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); - if (irq < 0) + if (msg.data != XEN_PIRQ_MSI_DATA || + xen_irq_from_pirq(pirq) < 0) { + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) goto error; - ret = set_irq_msi(irq, msidesc); - if (ret < 0) - goto error_while; - printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" - " pirq=%d\n", irq, pirq); - return 0; + xen_msi_compose_msg(dev, pirq, &msg); + __write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); + } else { + dev_dbg(&dev->dev, + "xen: msi already bound to pirq=%d\n", pirq); } - xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); - if (irq < 0 || pirq < 0) + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, + (type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi"); + if (irq < 0) goto error; - printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); - xen_msi_compose_msg(dev, pirq, &msg); - ret = set_irq_msi(irq, msidesc); - if (ret < 0) - goto error_while; - write_msi_msg(irq, &msg); + dev_dbg(&dev->dev, + "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq); } return 0; -error_while: - unbind_from_irqhandler(irq, NULL); error: - if (ret == -ENODEV) - dev_err(&dev->dev, "Xen PCI frontend has not registered" \ - " MSI/MSI-X support!\n"); - - return ret; + dev_err(&dev->dev, + "Xen PCI frontend has not registered MSI/MSI-X support!\n"); + return -ENODEV; } /* @@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return -ENOMEM; if (type == PCI_CAP_ID_MSIX) - ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + ret = xen_pci_frontend_enable_msix(dev, v, nvec); else - ret = xen_pci_frontend_enable_msi(dev, &v); + ret = xen_pci_frontend_enable_msi(dev, v); if (ret) goto error; i = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = xen_allocate_pirq(v[i], 0, /* not sharable */ - (type == PCI_CAP_ID_MSIX) ? - "pcifront-msi-x" : "pcifront-msi"); - if (irq < 0) { - ret = -1; + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, + (type == PCI_CAP_ID_MSIX) ? + "pcifront-msi-x" : + "pcifront-msi"); + if (irq < 0) goto free; - } - - ret = set_irq_msi(irq, msidesc); - if (ret) - goto error_while; i++; } kfree(v); return 0; -error_while: - unbind_from_irqhandler(irq, NULL); error: - if (ret == -ENODEV) - dev_err(&dev->dev, "Xen PCI frontend has not registered" \ - " MSI/MSI-X support!\n"); + dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); free: kfree(v); return ret; @@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) xen_pci_frontend_disable_msix(dev); else xen_pci_frontend_disable_msi(dev); + + /* Free the IRQ's and the msidesc using the generic code. */ + default_teardown_msi_irqs(dev); } static void xen_teardown_msi_irq(unsigned int irq) @@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq) xen_destroy_irq(irq); } +#ifdef CONFIG_XEN_DOM0 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, ret; + int ret = 0; struct msi_desc *msidesc; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = xen_create_msi_irq(dev, msidesc, type); - if (irq < 0) - return -1; + struct physdev_map_pirq map_irq; - ret = set_irq_msi(irq, msidesc); - if (ret) - goto error; - } - return 0; + memset(&map_irq, 0, sizeof(map_irq)); + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + map_irq.devfn = dev->devfn; -error: - xen_destroy_irq(irq); + if (type == PCI_CAP_ID_MSIX) { + int pos; + u32 table_offset, bir; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + + pci_read_config_dword(dev, pos + PCI_MSIX_TABLE, + &table_offset); + bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); + + map_irq.table_base = pci_resource_start(dev, bir); + map_irq.entry_nr = msidesc->msi_attrib.entry_nr; + } + + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (ret) { + dev_warn(&dev->dev, "xen map irq failed %d\n", ret); + goto out; + } + + ret = xen_bind_pirq_msi_to_irq(dev, msidesc, + map_irq.pirq, map_irq.index, + (type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi"); + if (ret < 0) + goto out; + } + ret = 0; +out: return ret; } #endif +#endif static int xen_pcifront_enable_irq(struct pci_dev *dev) { int rc; int share = 1; + u8 gsi; - dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); - - if (dev->irq < 0) - return -EINVAL; + rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", + rc); + return rc; + } - if (dev->irq < NR_IRQS_LEGACY) + if (gsi < NR_IRQS_LEGACY) share = 0; - rc = xen_allocate_pirq(dev->irq, share, "pcifront"); + rc = xen_allocate_pirq(gsi, share, "pcifront"); if (rc < 0) { - dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", - dev->irq, rc); + dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n", + gsi, rc); return rc; } + + dev->irq = rc; + dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); return 0; } diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index d2c0d51a7178..cd6f184c3b3f 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -15,6 +15,7 @@ #include <linux/serial_reg.h> #include <linux/serial_8250.h> +#include <asm/ce4100.h> #include <asm/setup.h> #include <asm/io.h> @@ -129,4 +130,5 @@ void __init x86_ce4100_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_init.mpparse.find_smp_config = sdv_find_smp_config; + x86_init.pci.init = ce4100_pci_init; } diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index df58e9cad96a..a7b38d35c29a 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode) memset(bd2, 0, sizeof(struct bau_desc)); bd2->header.sw_ack_flag = 1; /* - * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub + * base_dest_nodeid is the nasid of the first uvhub * in the partition. The bit map will indicate uvhub numbers, * which are 0-N in a partition. Pnodes are unique system-wide. */ - bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; + bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); bd2->header.dest_subnodeid = 0x10; /* the LB */ bd2->header.command = UV_NET_ENDPOINT_INTD; bd2->header.int_both = 1; diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 5b54892e4bc3..e4343fe488ed 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -48,3 +48,11 @@ config XEN_DEBUG_FS help Enable statistics output and various tuning options in debugfs. Enabling this option may incur a significant performance overhead. + +config XEN_DEBUG + bool "Enable Xen debug checks" + depends on XEN + default n + help + Enable various WARN_ON checks in the Xen MMU code. + Enabling this option WILL incur a significant performance overhead. diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 50542efe45fb..49dbd78ec3cb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor) xen_setup_features(); - pv_info = xen_info; - pv_info.kernel_rpl = 0; + pv_info.name = "Xen HVM"; xen_domain_type = XEN_HVM_DOMAIN; return 0; } -void xen_hvm_init_shared_info(void) +void __ref xen_hvm_init_shared_info(void) { int cpu; struct xen_add_to_physmap xatp; @@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + if (xen_have_vector_callback) + xen_init_lock_cpu(cpu); break; default: break; @@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void) if (xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; + xen_hvm_smp_init(); register_cpu_notifier(&xen_hvm_cpu_notifier); xen_unplug_emulated_devices(); have_vcpu_info_placement = 0; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5e92b61ad574..832765c0fb8c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -46,6 +46,7 @@ #include <linux/module.h> #include <linux/gfp.h> #include <linux/memblock.h> +#include <linux/seq_file.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; pteval_t flags = val & PTE_FLAGS_MASK; - unsigned long mfn = pfn_to_mfn(pfn); + unsigned long mfn; + if (!xen_feature(XENFEAT_auto_translated_physmap)) + mfn = get_phys_to_machine(pfn); + else + mfn = pfn; /* * If there's no mfn for the pfn, then just create an * empty non-present pte. Unfortunately this loses @@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (unlikely(mfn == INVALID_P2M_ENTRY)) { mfn = 0; flags = 0; + } else { + /* + * Paramount to do this test _after_ the + * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & + * IDENTITY_FRAME_BIT resolves to true. + */ + mfn &= ~FOREIGN_FRAME_BIT; + if (mfn & IDENTITY_FRAME_BIT) { + mfn &= ~IDENTITY_FRAME_BIT; + flags |= _PAGE_IOMAP; + } } - val = ((pteval_t)mfn << PAGE_SHIFT) | flags; } @@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); +#ifdef CONFIG_XEN_DEBUG +pte_t xen_make_pte_debug(pteval_t pte) +{ + phys_addr_t addr = (pte & PTE_PFN_MASK); + phys_addr_t other_addr; + bool io_page = false; + pte_t _pte; + + if (pte & _PAGE_IOMAP) + io_page = true; + + _pte = xen_make_pte(pte); + + if (!addr) + return _pte; + + if (io_page && + (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { + other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; + WARN(addr != other_addr, + "0x%lx is using VM_IO, but it is 0x%lx!\n", + (unsigned long)addr, (unsigned long)other_addr); + } else { + pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; + other_addr = (_pte.pte & PTE_PFN_MASK); + WARN((addr == other_addr) && (!io_page) && (!iomap_set), + "0x%lx is missing VM_IO (and wasn't fixed)!\n", + (unsigned long)addr); + } + + return _pte; +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); +#endif + pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); @@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm) */ void xen_mm_pin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (!PagePinned(page)) { @@ -998,7 +1047,7 @@ void xen_mm_pin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm) */ void xen_mm_unpin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (PageSavePinned(page)) { @@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) @@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void) static __init void xen_post_allocator_init(void) { +#ifdef CONFIG_XEN_DEBUG + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); +#endif pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; @@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, in_frames[i] = virt_to_mfn(vaddr); MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); - set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); + __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); if (out_frames) out_frames[i] = virt_to_pfn(vaddr); @@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); #ifdef CONFIG_XEN_DEBUG_FS +static int p2m_dump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, p2m_dump_show, NULL); +} + +static const struct file_operations p2m_dump_fops = { + .open = p2m_dump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static struct dentry *d_mmu_debug; static int __init xen_mmu_debugfs(void) @@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void) debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, &mmu_stats.prot_commit_batched); + debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); return 0; } fs_initcall(xen_mmu_debugfs); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index fd12d7ce7ff9..215a3ce61068 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -23,6 +23,129 @@ * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to * 512 and 1024 entries respectively. + * + * In short, these structures contain the Machine Frame Number (MFN) of the PFN. + * + * However not all entries are filled with MFNs. Specifically for all other + * leaf entries, or for the top root, or middle one, for which there is a void + * entry, we assume it is "missing". So (for example) + * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. + * + * We also have the possibility of setting 1-1 mappings on certain regions, so + * that: + * pfn_to_mfn(0xc0000)=0xc0000 + * + * The benefit of this is, that we can assume for non-RAM regions (think + * PCI BARs, or ACPI spaces), we can create mappings easily b/c we + * get the PFN value to match the MFN. + * + * For this to work efficiently we have one new page p2m_identity and + * allocate (via reserved_brk) any other pages we need to cover the sides + * (1GB or 4MB boundary violations). All entries in p2m_identity are set to + * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, + * no other fancy value). + * + * On lookup we spot that the entry points to p2m_identity and return the + * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. + * If the entry points to an allocated page, we just proceed as before and + * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in + * appropriate functions (pfn_to_mfn). + * + * The reason for having the IDENTITY_FRAME_BIT instead of just returning the + * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a + * non-identity pfn. To protect ourselves against we elect to set (and get) the + * IDENTITY_FRAME_BIT on all identity mapped PFNs. + * + * This simplistic diagram is used to explain the more subtle piece of code. + * There is also a digram of the P2M at the end that can help. + * Imagine your E820 looking as so: + * + * 1GB 2GB + * /-------------------+---------\/----\ /----------\ /---+-----\ + * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | + * \-------------------+---------/\----/ \----------/ \---+-----/ + * ^- 1029MB ^- 2001MB + * + * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), + * 2048MB = 524288 (0x80000)] + * + * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB + * is actually not present (would have to kick the balloon driver to put it in). + * + * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: + * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start + * of the PFN and the end PFN (263424 and 512256 respectively). The first step + * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page + * covers 512^2 of page estate (1GB) and in case the start or end PFN is not + * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn + * to end pfn. We reserve_brk top leaf pages if they are missing (means they + * point to p2m_mid_missing). + * + * With the E820 example above, 263424 is not 1GB aligned so we allocate a + * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. + * Each entry in the allocate page is "missing" (points to p2m_missing). + * + * Next stage is to determine if we need to do a more granular boundary check + * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. + * We check if the start pfn and end pfn violate that boundary check, and if + * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer + * granularity of setting which PFNs are missing and which ones are identity. + * In our example 263424 and 512256 both fail the check so we reserve_brk two + * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" + * values) and assign them to p2m[1][2] and p2m[1][488] respectively. + * + * At this point we would at minimum reserve_brk one page, but could be up to + * three. Each call to set_phys_range_identity has at maximum a three page + * cost. If we were to query the P2M at this stage, all those entries from + * start PFN through end PFN (so 1029MB -> 2001MB) would return + * INVALID_P2M_ENTRY ("missing"). + * + * The next step is to walk from the start pfn to the end pfn setting + * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. + * If we find that the middle leaf is pointing to p2m_missing we can swap it + * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this + * point we do not need to worry about boundary aligment (so no need to + * reserve_brk a middle page, figure out which PFNs are "missing" and which + * ones are identity), as that has been done earlier. If we find that the + * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference + * that page (which covers 512 PFNs) and set the appropriate PFN with + * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we + * set from p2m[1][2][256->511] and p2m[1][488][0->256] with + * IDENTITY_FRAME_BIT set. + * + * All other regions that are void (or not filled) either point to p2m_missing + * (considered missing) or have the default value of INVALID_P2M_ENTRY (also + * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] + * contain the INVALID_P2M_ENTRY value and are considered "missing." + * + * This is what the p2m ends up looking (for the E820 above) with this + * fabulous drawing: + * + * p2m /--------------\ + * /-----\ | &mfn_list[0],| /-----------------\ + * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | + * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | + * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | + * |-----| \ | [p2m_identity]+\\ | .... | + * | 2 |--\ \-------------------->| ... | \\ \----------------/ + * |-----| \ \---------------/ \\ + * | 3 |\ \ \\ p2m_identity + * |-----| \ \-------------------->/---------------\ /-----------------\ + * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | + * \-----/ / | [p2m_identity]+-->| ..., ~0 | + * / /---------------\ | .... | \-----------------/ + * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | + * / | IDENTITY[@256]|<----/ \---------------/ + * / | ~0, ~0, .... | + * | \---------------/ + * | + * p2m_missing p2m_missing + * /------------------\ /------------\ + * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | + * | [p2m_mid_missing]+---->| ..., ~0 | + * \------------------/ \------------/ + * + * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ #include <linux/init.h> @@ -30,6 +153,7 @@ #include <linux/list.h> #include <linux/hash.h> #include <linux/sched.h> +#include <linux/seq_file.h> #include <asm/cache.h> #include <asm/setup.h> @@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); +static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); + RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); +/* We might hit two boundary violations at the start and end, at max each + * boundary violation will require three middle nodes. */ +RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); + static inline unsigned p2m_top_index(unsigned long pfn) { BUG_ON(pfn >= MAX_P2M_PFN); @@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m) * - After resume we're called from within stop_machine, but the mfn * tree should alreay be completely allocated. */ -void xen_build_mfn_list_list(void) +void __ref xen_build_mfn_list_list(void) { unsigned long pfn; @@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void) p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_init(p2m_top); + p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_init(p2m_identity); + /* * The domain builder gives us a pre-constructed p2m array in * mfn_list for all the pages initially given to us, so we just @@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn) mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); + /* + * The INVALID_P2M_ENTRY is filled in both p2m_*identity + * and in p2m_*missing, so returning the INVALID_P2M_ENTRY + * would be wrong. + */ + if (p2m_top[topidx][mididx] == p2m_identity) + return IDENTITY_FRAME(pfn); + return p2m_top[topidx][mididx][idx]; } EXPORT_SYMBOL_GPL(get_phys_to_machine); @@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn) p2m_top_mfn_p[topidx] = mid_mfn; } - if (p2m_top[topidx][mididx] == p2m_missing) { + if (p2m_top[topidx][mididx] == p2m_identity || + p2m_top[topidx][mididx] == p2m_missing) { /* p2m leaf page is missing */ unsigned long *p2m; + unsigned long *p2m_orig = p2m_top[topidx][mididx]; p2m = alloc_p2m_page(); if (!p2m) @@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn) p2m_init(p2m); - if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) + if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) free_p2m_page(p2m); else mid_mfn[mididx] = virt_to_mfn(p2m); @@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn) return true; } +bool __early_alloc_p2m(unsigned long pfn) +{ + unsigned topidx, mididx, idx; + + topidx = p2m_top_index(pfn); + mididx = p2m_mid_index(pfn); + idx = p2m_index(pfn); + + /* Pfff.. No boundary cross-over, lets get out. */ + if (!idx) + return false; + + WARN(p2m_top[topidx][mididx] == p2m_identity, + "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", + topidx, mididx); + + /* + * Could be done by xen_build_dynamic_phys_to_machine.. + */ + if (p2m_top[topidx][mididx] != p2m_missing) + return false; + + /* Boundary cross-over for the edges: */ + if (idx) { + unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); + + p2m_init(p2m); + + p2m_top[topidx][mididx] = p2m; + + } + return idx != 0; +} +unsigned long set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e) +{ + unsigned long pfn; + + if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) + return 0; + + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return pfn_e - pfn_s; + + if (pfn_s > pfn_e) + return 0; + + for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); + pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); + pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) + { + unsigned topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == p2m_mid_missing) { + unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); + + p2m_mid_init(mid); + + p2m_top[topidx] = mid; + } + } + + __early_alloc_p2m(pfn_s); + __early_alloc_p2m(pfn_e); + + for (pfn = pfn_s; pfn < pfn_e; pfn++) + if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) + break; + + if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), + "Identity mapping failed. We are %ld short of 1-1 mappings!\n", + (pfn_e - pfn_s) - (pfn - pfn_s))) + printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn); + + return pfn - pfn_s; +} + /* Try to install p2m mapping; fail if intermediate bits missing */ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { unsigned topidx, mididx, idx; + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return true; + } if (unlikely(pfn >= MAX_P2M_PFN)) { BUG_ON(mfn != INVALID_P2M_ENTRY); return true; @@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); + /* For sparse holes were the p2m leaf has real PFN along with + * PCI holes, stick in the PFN as the MFN value. + */ + if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { + if (p2m_top[topidx][mididx] == p2m_identity) + return true; + + /* Swap over from MISSING to IDENTITY if needed. */ + if (p2m_top[topidx][mididx] == p2m_missing) { + WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, + p2m_identity) != p2m_missing); + return true; + } + } + if (p2m_top[topidx][mididx] == p2m_missing) return mfn == INVALID_P2M_ENTRY; @@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return true; - } - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { if (!alloc_p2m(pfn)) return false; @@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page) { unsigned long flags; unsigned long pfn; - unsigned long address; + unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; @@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page) unsigned long flags; unsigned long mfn; unsigned long pfn; - unsigned long address; + unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; @@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) return ret; } EXPORT_SYMBOL_GPL(m2p_find_override_pfn); + +#ifdef CONFIG_XEN_DEBUG_FS + +int p2m_dump_show(struct seq_file *m, void *v) +{ + static const char * const level_name[] = { "top", "middle", + "entry", "abnormal" }; + static const char * const type_name[] = { "identity", "missing", + "pfn", "abnormal"}; +#define TYPE_IDENTITY 0 +#define TYPE_MISSING 1 +#define TYPE_PFN 2 +#define TYPE_UNKNOWN 3 + unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; + unsigned int uninitialized_var(prev_level); + unsigned int uninitialized_var(prev_type); + + if (!p2m_top) + return 0; + + for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx = p2m_mid_index(pfn); + unsigned idx = p2m_index(pfn); + unsigned lvl, type; + + lvl = 4; + type = TYPE_UNKNOWN; + if (p2m_top[topidx] == p2m_mid_missing) { + lvl = 0; type = TYPE_MISSING; + } else if (p2m_top[topidx] == NULL) { + lvl = 0; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx] == NULL) { + lvl = 1; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx] == p2m_identity) { + lvl = 1; type = TYPE_IDENTITY; + } else if (p2m_top[topidx][mididx] == p2m_missing) { + lvl = 1; type = TYPE_MISSING; + } else if (p2m_top[topidx][mididx][idx] == 0) { + lvl = 2; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { + lvl = 2; type = TYPE_IDENTITY; + } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { + lvl = 2; type = TYPE_MISSING; + } else if (p2m_top[topidx][mididx][idx] == pfn) { + lvl = 2; type = TYPE_PFN; + } else if (p2m_top[topidx][mididx][idx] != pfn) { + lvl = 2; type = TYPE_PFN; + } + if (pfn == 0) { + prev_level = lvl; + prev_type = type; + } + if (pfn == MAX_DOMAIN_PAGES-1) { + lvl = 3; + type = TYPE_UNKNOWN; + } + if (prev_type != type) { + seq_printf(m, " [0x%lx->0x%lx] %s\n", + prev_pfn_type, pfn, type_name[prev_type]); + prev_pfn_type = pfn; + prev_type = type; + } + if (prev_level != lvl) { + seq_printf(m, " [0x%lx->0x%lx] level %s\n", + prev_pfn_level, pfn, level_name[prev_level]); + prev_pfn_level = pfn; + prev_level = lvl; + } + } + return 0; +#undef TYPE_IDENTITY +#undef TYPE_MISSING +#undef TYPE_PFN +#undef TYPE_UNKNOWN +} +#endif diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a8a66a50d446..fa0269a99377 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; static __init void xen_add_extra_mem(unsigned long pages) { + unsigned long pfn; + u64 size = (u64)pages * PAGE_SIZE; u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; @@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages) xen_extra_mem_size += size; xen_max_p2m_pfn = PFN_DOWN(extra_start + size); + + for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } static unsigned long __init xen_release_chunk(phys_addr_t start_addr, @@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", start, end, ret); if (ret == 1) { - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); len++; } } @@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, return released; } +static unsigned long __init xen_set_identity(const struct e820entry *list, + ssize_t map_size) +{ + phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; + phys_addr_t start_pci = last; + const struct e820entry *entry; + unsigned long identity = 0; + int i; + + for (i = 0, entry = list; i < map_size; i++, entry++) { + phys_addr_t start = entry->addr; + phys_addr_t end = start + entry->size; + + if (start < last) + start = last; + + if (end <= start) + continue; + + /* Skip over the 1MB region. */ + if (last > end) + continue; + + if (entry->type == E820_RAM) { + if (start > start_pci) + identity += set_phys_range_identity( + PFN_UP(start_pci), PFN_DOWN(start)); + + /* Without saving 'last' we would gooble RAM too + * at the end of the loop. */ + last = end; + start_pci = end; + continue; + } + start_pci = min(start, start_pci); + last = end; + } + if (last > start_pci) + identity += set_phys_range_identity( + PFN_UP(start_pci), PFN_DOWN(last)); + return identity; +} /** * machine_specific_memory_setup - Hook for machine specific memory setup. **/ char * __init xen_memory_setup(void) { static struct e820entry map[E820MAX] __initdata; + static struct e820entry map_raw[E820MAX] __initdata; unsigned long max_pfn = xen_start_info->nr_pages; unsigned long long mem_end; @@ -151,6 +199,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long extra_pages = 0; unsigned long extra_limit; + unsigned long identity_pages = 0; int i; int op; @@ -176,6 +225,7 @@ char * __init xen_memory_setup(void) } BUG_ON(rc); + memcpy(map_raw, map, sizeof(map)); e820.nr_map = 0; xen_extra_mem_start = mem_end; for (i = 0; i < memmap.nr_entries; i++) { @@ -194,6 +244,15 @@ char * __init xen_memory_setup(void) end -= delta; extra_pages += PFN_DOWN(delta); + /* + * Set RAM below 4GB that is not for us to be unusable. + * This prevents "System RAM" address space from being + * used as potential resource for I/O address (happens + * when 'allocate_resource' is called). + */ + if (delta && + (xen_initial_domain() && end < 0x100000000ULL)) + e820_add_region(end, delta, E820_UNUSABLE); } if (map[i].size > 0 && end > xen_extra_mem_start) @@ -251,6 +310,13 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(extra_pages); + /* + * Set P2M for all non-RAM pages and E820 gaps to be identity + * type PFNs. We supply it with the non-sanitized version + * of the E820. + */ + identity_pages = xen_set_identity(map_raw, memmap.nr_entries); + printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); return "Xen"; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 72a4c7959045..30612441ed99 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -509,3 +509,41 @@ void __init xen_smp_init(void) xen_fill_possible_map(); xen_init_spinlocks(); } + +static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) +{ + native_smp_prepare_cpus(max_cpus); + WARN_ON(xen_smp_intr_init(0)); + + if (!xen_have_vector_callback) + return; + xen_init_lock_cpu(0); + xen_init_spinlocks(); +} + +static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) +{ + int rc; + rc = native_cpu_up(cpu); + WARN_ON (xen_smp_intr_init(cpu)); + return rc; +} + +static void xen_hvm_cpu_die(unsigned int cpu) +{ + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); + native_cpu_die(cpu); +} + +void __init xen_hvm_smp_init(void) +{ + smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; + smp_ops.smp_send_reschedule = xen_smp_send_reschedule; + smp_ops.cpu_up = xen_hvm_cpu_up; + smp_ops.cpu_die = xen_hvm_cpu_die; + smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; + smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; +} diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 9bbd63a129b5..45329c8c226e 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -12,7 +12,7 @@ #include "xen-ops.h" #include "mmu.h" -void xen_pre_suspend(void) +void xen_arch_pre_suspend(void) { xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); xen_start_info->console.domU.mfn = @@ -26,8 +26,9 @@ void xen_pre_suspend(void) BUG(); } -void xen_hvm_post_suspend(int suspend_cancelled) +void xen_arch_hvm_post_suspend(int suspend_cancelled) { +#ifdef CONFIG_XEN_PVHVM int cpu; xen_hvm_init_shared_info(); xen_callback_vector(); @@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled) xen_setup_runstate_info(cpu); } } +#endif } -void xen_post_suspend(int suspend_cancelled) +void xen_arch_post_suspend(int suspend_cancelled) { xen_build_mfn_list_list(); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 067759e3d6a5..2e2d370a47b1 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -397,7 +397,9 @@ void xen_setup_timer(int cpu) name = "<timer kasprintf failed>"; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, + IRQF_DISABLED|IRQF_PERCPU| + IRQF_NOBALANCING|IRQF_TIMER| + IRQF_FORCE_RESUME, name, NULL); evt = &per_cpu(xen_clock_events, cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9d41bf985757..3112f55638c4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); +void __init xen_hvm_smp_init(void); extern cpumask_var_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} +static inline void xen_hvm_smp_init(void) {} #endif #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h index e39edf5c86f2..249619e7e7f2 100644 --- a/arch/xtensa/include/asm/rwsem.h +++ b/arch/xtensa/include/asm/rwsem.h @@ -17,44 +17,12 @@ #error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." #endif -#include <linux/list.h> -#include <linux/spinlock.h> -#include <asm/atomic.h> -#include <asm/system.h> - -/* - * the semaphore definition - */ -struct rw_semaphore { - signed long count; #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 #define RWSEM_ACTIVE_MASK 0x0000ffff #define RWSEM_WAITING_BIAS (-0x00010000) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - spinlock_t wait_lock; - struct list_head wait_list; -}; - -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} /* * lock for reading @@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) return atomic_add_return(delta, (atomic_t *)(&sem->count)); } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* _XTENSA_RWSEM_H */ diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 19df764f6399..f3e5eb43f71c 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -96,16 +96,12 @@ again: update_process_times(user_mode(get_irq_regs())); #endif - write_seqlock(&xtime_lock); - - do_timer(1); /* Linux handler in kernel/timer.c */ + xtime_update(1); /* Linux handler in kernel/time/timekeeping */ /* Note that writing CCOMPARE clears the interrupt. */ next += CCOUNT_PER_JIFFY; set_linux_timer(next); - - write_sequnlock(&xtime_lock); } /* Allow platform to do something useful (Wdog). */ |