diff options
Diffstat (limited to 'arch/powerpc/kernel')
42 files changed, 1016 insertions, 577 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ce4f7f179117..ee728e433aa2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -85,6 +85,8 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds +obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o + obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7c5324f1ec9c..04caee7d9bc1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -208,6 +208,7 @@ int main(void) DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); + DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); #endif /* CONFIG_PPC64 */ /* RTAS */ diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index a3c684b4c862..92c6b008dd2b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -451,15 +451,15 @@ out: static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) { struct cache_dir *cache_dir; - struct sys_device *sysdev; + struct device *dev; struct kobject *kobj = NULL; - sysdev = get_cpu_sysdev(cpu_id); - WARN_ONCE(!sysdev, "no sysdev for CPU %i\n", cpu_id); - if (!sysdev) + dev = get_cpu_device(cpu_id); + WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id); + if (!dev) goto err; - kobj = kobject_create_and_add("cache", &sysdev->kobj); + kobj = kobject_create_and_add("cache", &dev->kobj); if (!kobj) goto err; diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S index 7f818feaa7a5..ebc62f42a237 100644 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -41,11 +41,16 @@ _GLOBAL(__setup_cpu_a2) * core local but doing it always won't hurt */ -#ifdef CONFIG_PPC_WSP_COPRO +#ifdef CONFIG_PPC_ICSWX /* Make sure ACOP starts out as zero */ li r3,0 mtspr SPRN_ACOP,r3 + /* Skip the following if we are in Guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne _icswx_skip_guest + /* Enable icswx instruction */ mfspr r3,SPRN_A2_CCR2 ori r3,r3,A2_CCR2_ENABLE_ICSWX @@ -54,7 +59,8 @@ _GLOBAL(__setup_cpu_a2) /* Unmask all CTs in HACOP */ li r3,-1 mtspr SPRN_HACOP,r3 -#endif /* CONFIG_PPC_WSP_COPRO */ +_icswx_skip_guest: +#endif /* CONFIG_PPC_ICSWX */ /* Enable doorbell */ mfspr r3,SPRN_A2_CCR2 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index edae5bb06f1f..81db9e2a8a20 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1505,6 +1505,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_4xx, .platform = "ppc405", }, + { /* APM8018X */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff11432, + .cpu_name = "APM8018X", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1830,6 +1843,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, + { /* 476fpe */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff50000, + .cpu_name = "476fpe", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, { /* 476 iss */ .pvr_mask = 0xffff0000, .pvr_value = 0x00050000, diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d879809d5c45..abef75176c07 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -10,85 +10,85 @@ * */ -#undef DEBUG - #include <linux/kernel.h> #include <linux/smp.h> #include <linux/reboot.h> #include <linux/kexec.h> -#include <linux/bootmem.h> #include <linux/export.h> #include <linux/crash_dump.h> #include <linux/delay.h> -#include <linux/elf.h> -#include <linux/elfcore.h> #include <linux/init.h> #include <linux/irq.h> #include <linux/types.h> -#include <linux/memblock.h> #include <asm/processor.h> #include <asm/machdep.h> #include <asm/kexec.h> #include <asm/kdump.h> #include <asm/prom.h> -#include <asm/firmware.h> #include <asm/smp.h> #include <asm/system.h> #include <asm/setjmp.h> -#ifdef DEBUG -#include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 -/* This keeps a track of which one is crashing cpu. */ +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 + +/* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; -static cpumask_t cpus_in_crash = CPU_MASK_NONE; -cpumask_t cpus_in_sr = CPU_MASK_NONE; +static int time_to_dump; #define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; +} + #ifdef CONFIG_SMP -static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); +static atomic_t cpus_in_crash; void crash_ipi_callback(struct pt_regs *regs) { + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; hard_irq_disable(); - if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); - cpumask_set_cpu(cpu, &cpus_in_crash); - - /* - * Entered via soft-reset - could be the kdump - * process is invoked using soft-reset or user activated - * it if some CPU did not respond to an IPI. - * For soft-reset, the secondary CPU can enter this func - * twice. 1 - using IPI, and 2. soft-reset. - * Tell the kexec CPU that entered via soft-reset and ready - * to go down. - */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) { - cpumask_clear_cpu(cpu, &cpus_in_sr); - atomic_inc(&enter_on_soft_reset); + cpumask_set_cpu(cpu, &cpus_state_saved); } + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic_inc(); + /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. - * If not, soft-reset will be invoked to bring other CPUs. */ - while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) + while (!time_to_dump) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -103,106 +103,99 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -/* - * Wait until all CPUs are entered via soft-reset. - */ -static void crash_soft_reset_check(int cpu) -{ - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - - cpumask_clear_cpu(cpu, &cpus_in_sr); - while (atomic_read(&enter_on_soft_reset) != ncpus) - cpu_relax(); -} - - static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); + + printk(KERN_EMERG "Sending IPI to other CPUs\n"); crash_send_ipi(crash_ipi_callback); smp_wmb(); +again: /* * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to * respond. - * Delay of at least 10 seconds. */ - printk(KERN_EMERG "Sending IPI to other cpus...\n"); - msecs = 10000; - while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { - cpu_relax(); + msecs = IPI_TIMEOUT; + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) mdelay(1); - } /* Would it be better to replace the trap vector here? */ + if (atomic_read(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - atomic_read(&cpus_in_crash)); + /* - * FIXME: In case if we do not get all CPUs, one possibility: ask the - * user to do soft reset such that we get all. - * Soft-reset will be used until better mechanism is implemented. + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. */ - if (cpumask_weight(&cpus_in_crash) < ncpus) { - printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", - ncpus - cpumask_weight(&cpus_in_crash)); - printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpumask_clear(&cpus_in_sr); - atomic_set(&enter_on_soft_reset, 0); - while (cpumask_weight(&cpus_in_crash) < ncpus) - cpu_relax(); - } + if ((panic_timeout > 0) || (tries > 0)) + return; + /* - * Make sure all CPUs are entered via soft-reset if the kdump is - * invoked using soft-reset. + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) - crash_soft_reset_check(cpu); - /* Leave the IPI callback set */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + atomic_set(&cpus_in_crash, 0); + smp_mb(); + + while (atomic_read(&cpus_in_crash) < ncpus) + cpu_relax(); + } + + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; } /* - * This function will be called by secondary cpus or by kexec cpu - * if soft-reset is activated to stop some CPUs. + * This function will be called by secondary cpus. */ void crash_kexec_secondary(struct pt_regs *regs) { - int cpu = smp_processor_id(); unsigned long flags; - int msecs = 5; + int msecs = SECONDARY_TIMEOUT; local_irq_save(flags); - /* Wait 5ms if the kexec CPU is not entered yet. */ + + /* Wait for the primary crash CPU to signal its progress */ while (crashing_cpu < 0) { if (--msecs < 0) { - /* - * Either kdump image is not loaded or - * kdump process is not started - Probably xmon - * exited using 'x'(exit and recover) or - * kexec_should_crash() failed for all running tasks. - */ - cpumask_clear_cpu(cpu, &cpus_in_sr); + /* No response, kdump image may not have been loaded */ local_irq_restore(flags); return; } + mdelay(1); - cpu_relax(); - } - if (cpu == crashing_cpu) { - /* - * Panic CPU will enter this func only via soft-reset. - * Wait until all secondary CPUs entered and - * then start kexec boot. - */ - crash_soft_reset_check(cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); - machine_kexec(kexec_crash_image); - /* NOTREACHED */ } + crash_ipi_callback(regs); } @@ -211,7 +204,7 @@ void crash_kexec_secondary(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { /* - * move the secondarys to us so that we can copy + * move the secondaries to us so that we can copy * the new kernel 0-0x100 safely * * do this if kexec in setup.c ? @@ -225,7 +218,6 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpumask_clear(&cpus_in_sr); } #endif /* CONFIG_SMP */ @@ -236,7 +228,7 @@ static void crash_kexec_wait_realmode(int cpu) unsigned int msecs; int i; - msecs = 10000; + msecs = REAL_MODE_TIMEOUT; for (i=0; i < nr_cpu_ids && msecs > 0; i++) { if (i == cpu) continue; @@ -308,22 +300,11 @@ int crash_shutdown_unregister(crash_shutdown_t handler) } EXPORT_SYMBOL(crash_shutdown_unregister); -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; -static int crash_shutdown_cpu = -1; - -static int handle_fault(struct pt_regs *regs) -{ - if (crash_shutdown_cpu == smp_processor_id()) - longjmp(crash_shutdown_buf, 1); - return 0; -} - void default_machine_crash_shutdown(struct pt_regs *regs) { unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* * This function is only called after the system * has panicked or is otherwise in a critical state. @@ -341,15 +322,26 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_save_cpu(regs, crashing_cpu); + + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); + crash_kexec_prepare_cpus(crashing_cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); /* - * Call registered shutdown routines savely. Swap out + * Call registered shutdown routines safely. Swap out * __debugger_fault_handler, and replace on exit. */ old_handler = __debugger_fault_handler; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 424afb6b8fba..b3ba5163eae2 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -28,7 +28,7 @@ #define DBG(fmt...) #endif -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL void __init reserve_kdump_trampoline(void) { memblock_reserve(0, KDUMP_RESERVE_LIMIT); @@ -67,7 +67,7 @@ void __init setup_kdump_trampoline(void) DBG(" <- setup_kdump_trampoline()\n"); } -#endif /* CONFIG_RELOCATABLE */ +#endif /* CONFIG_NONSTATIC_KERNEL */ static int __init parse_savemaxmem(char *p) { diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 4f80cf1ce77b..3e57a00b8cba 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1213,7 +1213,7 @@ do_user_signal: /* r10 contains MSR_KERNEL here */ stw r3,_TRAP(r1) 2: addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r9 - bl do_signal + bl do_notify_resume REST_NVGPRS(r1) b recheck diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d834425186ae..866462cbe2d8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -751,12 +751,16 @@ user_work: andi. r0,r4,_TIF_NEED_RESCHED beq 1f + li r5,1 + TRACE_AND_RESTORE_IRQ(r5); bl .schedule b .ret_from_except_lite 1: bl .save_nvgprs + li r5,1 + TRACE_AND_RESTORE_IRQ(r5); addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_signal + bl .do_notify_resume b .ret_from_except unrecov_restore: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cf9c69b9189c..15c5a4f6de01 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -65,7 +65,7 @@ BEGIN_FTR_SECTION lbz r0,PACAPROCSTART(r13) cmpwi r0,0x80 bne 1f - li r0,0 + li r0,1 stb r0,PACAPROCSTART(r13) b kvm_start_guest 1: @@ -774,8 +774,8 @@ alignment_common: program_check_common: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl .save_nvgprs + DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS bl .program_check_exception b .ret_from_except diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b725dab0f88a..7dd2981bcc50 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -64,6 +64,35 @@ _ENTRY(_start); mr r31,r3 /* save device tree ptr */ li r24,0 /* CPU number */ +#ifdef CONFIG_RELOCATABLE +/* + * Relocate ourselves to the current runtime address. + * This is called only by the Boot CPU. + * "relocate" is called with our current runtime virutal + * address. + * r21 will be loaded with the physical runtime address of _stext + */ + bl 0f /* Get our runtime address */ +0: mflr r21 /* Make it accessible */ + addis r21,r21,(_stext - 0b)@ha + addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */ + + /* + * We have the runtime (virutal) address of our base. + * We calculate our shift of offset from a 256M page. + * We could map the 256M page we belong to at PAGE_OFFSET and + * get going from there. + */ + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + rlwinm r6,r21,0,4,31 /* r6 = PHYS_START % 256M */ + rlwinm r5,r4,0,4,31 /* r5 = KERNELBASE % 256M */ + subf r3,r5,r6 /* r3 = r6 - r5 */ + add r3,r4,r3 /* Required Virutal Address */ + + bl relocate +#endif + bl init_cpu_state /* @@ -88,6 +117,65 @@ _ENTRY(_start); #ifdef CONFIG_RELOCATABLE /* + * Relocatable kernel support based on processing of dynamic + * relocation entries. + * + * r25 will contain RPN/ERPN for the start address of memory + * r21 will contain the current offset of _stext + */ + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) + + /* + * Compute the kernstart_addr. + * kernstart_addr => (r6,r8) + * kernstart_addr & ~0xfffffff => (r6,r7) + */ + rlwinm r6,r25,0,28,31 /* ERPN. Bits 32-35 of Address */ + rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */ + rlwinm r8,r21,0,4,31 /* r8 = (_stext & 0xfffffff) */ + or r8,r7,r8 /* Compute the lower 32bit of kernstart_addr */ + + /* Store kernstart_addr */ + stw r6,0(r3) /* higher 32bit */ + stw r8,4(r3) /* lower 32bit */ + + /* + * Compute the virt_phys_offset : + * virt_phys_offset = stext.run - kernstart_addr + * + * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff) + * When we relocate, we have : + * + * (kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff) + * + * hence: + * virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff) + * + */ + + /* KERNELBASE&~0xfffffff => (r4,r5) */ + li r4, 0 /* higer 32bit */ + lis r5,KERNELBASE@h + rlwinm r5,r5,0,0,3 /* Align to 256M, lower 32bit */ + + /* + * 64bit subtraction. + */ + subfc r5,r7,r5 + subfe r4,r6,r4 + + /* Store virt_phys_offset */ + lis r3,virt_phys_offset@ha + la r3,virt_phys_offset@l(r3) + + stw r4,0(r3) + stw r5,4(r3) + +#elif defined(CONFIG_DYNAMIC_MEMSTART) + /* + * Mapping based, page aligned dynamic kernel loading. + * * r25 will contain RPN/ERPN for the start address of memory * * Add the difference between KERNELBASE and PAGE_OFFSET to the @@ -732,6 +820,8 @@ _GLOBAL(init_cpu_state) /* We use the PVR to differenciate 44x cores from 476 */ mfspr r3,SPRN_PVR srwi r3,r3,16 + cmplwi cr0,r3,PVR_476FPE@h + beq head_start_47x cmplwi cr0,r3,PVR_476@h beq head_start_47x cmplwi cr0,r3,PVR_476_ISS@h @@ -800,12 +890,29 @@ skpinv: addi r4,r4,1 /* Increment */ /* * Configure and load pinned entry into TLB slot 63. */ +#ifdef CONFIG_NONSTATIC_KERNEL + /* + * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT + * entries of the initial mapping set by the boot loader. + * The XLAT entry is stored in r25 + */ + + /* Read the XLAT entry for our current mapping */ + tlbre r25,r23,PPC44x_TLB_XLAT + + lis r3,KERNELBASE@h + ori r3,r3,KERNELBASE@l + + /* Use our current RPN entry */ + mr r4,r25 +#else lis r3,PAGE_OFFSET@h ori r3,r3,PAGE_OFFSET@l /* Kernel is at the base of RAM */ li r4, 0 /* Load the kernel physical address */ +#endif /* Load the kernel PID = 0 */ li r0,0 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 9f5d210ddf3f..d5d78c4ceef6 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -197,7 +197,7 @@ _ENTRY(__early_start) bl early_init -#ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_DYNAMIC_MEMSTART lis r3,kernstart_addr@ha la r3,kernstart_addr@l(r3) #ifdef CONFIG_PHYS_64BIT diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad58..0a48bf5db6c8 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -39,9 +39,13 @@ #define cpu_should_die() 0 #endif +unsigned long cpuidle_disable = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(cpuidle_disable); + static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; + cpuidle_disable = IDLE_POWERSAVE_OFF; return 0; } __setup("powersave=off", powersave_off); @@ -56,7 +60,9 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); + while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +99,8 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); @@ -102,6 +109,29 @@ void cpu_idle(void) } } + +/* + * cpu_idle_wait - Used to ensure that all the CPUs come out of the old + * idle loop and start using the new idle loop. + * Required while changing idle handler on SMP systems. + * Caller must have changed idle handler to the new value before the call. + * This window may be larger on shared systems. + */ +void cpu_idle_wait(void) +{ + int cpu; + smp_mb(); + + /* kick all the CPUs so that they exit out of old idle routine */ + get_online_cpus(); + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + smp_send_reschedule(cpu); + } + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + int powersave_nap; #ifdef CONFIG_SYSCTL diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 3a70845a51c7..fcdff198da4b 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -54,6 +54,7 @@ _GLOBAL(power7_idle) li r0,0 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ stb r0,PACAHARDIRQEN(r13) + stb r0,PACA_NAPSTATELOST(r13) /* Continue saving state */ SAVE_GPR(2, r1) @@ -86,6 +87,9 @@ _GLOBAL(power7_wakeup_loss) rfid _GLOBAL(power7_wakeup_noloss) + lbz r0,PACA_NAPSTATELOST(r13) + cmpwi r0,0 + bne .power7_wakeup_loss ld r1,PACAR1(r13) ld r4,_MSR(r1) ld r5,_NIP(r1) diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 262791807397..97a3715ac8bd 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -119,24 +119,6 @@ EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); #ifdef CONFIG_PCI -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) -{ - resource_size_t start = pci_resource_start(dev, bar); - resource_size_t len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (!len) - return NULL; - if (max && len > max) - len = max; - if (flags & IORESOURCE_IO) - return ioport_map(start, len); - if (flags & IORESOURCE_MEM) - return ioremap(start, len); - /* What? */ - return NULL; -} - void pci_iounmap(struct pci_dev *dev, void __iomem *addr) { if (isa_vaddr_is_ioport(addr)) @@ -146,6 +128,5 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *addr) iounmap(addr); } -EXPORT_SYMBOL(pci_iomap); EXPORT_SYMBOL(pci_iounmap); #endif /* CONFIG_PCI */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5c3c46948d94..01e2877e8e04 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -115,6 +115,19 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } +static inline notrace void decrementer_check_overflow(void) +{ + u64 now = get_tb_or_rtc(); + u64 *next_tb; + + preempt_disable(); + next_tb = &__get_cpu_var(decrementers_next_tb); + + if (now >= *next_tb) + set_dec(1); + preempt_enable(); +} + notrace void arch_local_irq_restore(unsigned long en) { /* @@ -164,24 +177,21 @@ notrace void arch_local_irq_restore(unsigned long en) */ local_paca->hard_enabled = en; -#ifndef CONFIG_BOOKE - /* On server, re-trigger the decrementer if it went negative since - * some processors only trigger on edge transitions of the sign bit. - * - * BookE has a level sensitive decrementer (latches in TSR) so we - * don't need that + /* + * Trigger the decrementer if we have a pending event. Some processors + * only trigger on edge transitions of the sign bit. We might also + * have disabled interrupts long enough that the decrementer wrapped + * to positive. */ - if ((int)mfspr(SPRN_DEC) < 0) - mtspr(SPRN_DEC, 1); -#endif /* CONFIG_BOOKE */ + decrementer_check_overflow(); /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. */ if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp; - lv1_get_version_info(&tmp); + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); } __hard_irq_enable(); diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index c7b5afeecaf2..bedd12e1cfbc 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -441,6 +441,11 @@ static void __init fixup_port_irq(int index, return; port->irq = virq; + +#ifdef CONFIG_SERIAL_8250_FSL + if (of_device_is_compatible(np, "fsl,ns16550")) + port->handle_irq = fsl8250_handle_irq; +#endif } static void __init fixup_port_pio(int index, diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 84daabe2fcba..578f35f18723 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -783,7 +783,7 @@ static const struct file_operations lparcfg_fops = { static int __init lparcfg_init(void) { struct proc_dir_entry *ent; - mode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; /* Allow writing if we have FW_FEATURE_SPLPAR */ if (firmware_has_feature(FW_FEATURE_SPLPAR) && diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 9ce1672afb59..c957b1202bdc 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -107,9 +107,6 @@ void __init reserve_crashkernel(void) unsigned long long crash_size, crash_base; int ret; - /* this is necessary because of memblock_phys_mem_size() */ - memblock_analyze(); - /* use common parsing */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); @@ -128,7 +125,7 @@ void __init reserve_crashkernel(void) crash_size = resource_size(&crashk_res); -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL if (crashk_res.start != KDUMP_KERNELBASE) printk("Crash kernel location must be 0x%x\n", KDUMP_KERNELBASE); diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c index e63f2e7d2efb..affe5dcce7f4 100644 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -16,10 +16,10 @@ #include <asm/hw_irq.h> #include <asm/io.h> -typedef NORET_TYPE void (*relocate_new_kernel_t)( +typedef void (*relocate_new_kernel_t)( unsigned long indirection_page, unsigned long reboot_code_buffer, - unsigned long start_address) ATTRIB_NORET; + unsigned long start_address) __noreturn; /* * This is a generic machine_kexec function suitable at least for diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 26ccbf77dd41..d7f609086a99 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -307,9 +307,9 @@ static union thread_union kexec_stack __init_task_data = struct paca_struct kexec_paca; /* Our assembly helper, in kexec_stub.S */ -extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, - void *image, void *control, - void (*clear_all)(void)) ATTRIB_NORET; +extern void kexec_sequence(void *newstack, unsigned long start, + void *image, void *control, + void (*clear_all)(void)) __noreturn; /* too late to fail here */ void default_machine_kexec(struct kimage *image) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 458ed3bee663..cce98d76e905 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -214,7 +214,7 @@ char __devinit *pcibios_setup(char *str) * If the interrupt is used, then gets the interrupt line from the * openfirmware and sets it in the pci_dev and pci_config line. */ -int pci_read_irq_line(struct pci_dev *pci_dev) +static int pci_read_irq_line(struct pci_dev *pci_dev) { struct of_irq oirq; unsigned int virq; @@ -283,7 +283,6 @@ int pci_read_irq_line(struct pci_dev *pci_dev) return 0; } -EXPORT_SYMBOL(pci_read_irq_line); /* * Platform support for /proc/bus/pci/X/Y mmap()s, @@ -921,18 +920,22 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev) struct resource *res = dev->resource + i; if (!res->flags) continue; - /* On platforms that have PCI_PROBE_ONLY set, we don't - * consider 0 as an unassigned BAR value. It's technically - * a valid value, but linux doesn't like it... so when we can - * re-assign things, we do so, but if we can't, we keep it - * around and hope for the best... + + /* If we're going to re-assign everything, we mark all resources + * as unset (and 0-base them). In addition, we mark BARs starting + * at 0 as unset as well, except if PCI_PROBE_ONLY is also set + * since in that case, we don't want to re-assign anything */ - if (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY)) { - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] is unassigned\n", - pci_name(dev), i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned int)res->flags); + if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || + (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { + /* Only print message if not re-assigning */ + if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) + pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] " + "is unassigned\n", + pci_name(dev), i, + (unsigned long long)res->start, + (unsigned long long)res->end, + (unsigned int)res->flags); res->end -= res->start; res->start = 0; res->flags |= IORESOURCE_UNSET; @@ -1042,6 +1045,16 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) if (i >= 3 && bus->self->transparent) continue; + /* If we are going to re-assign everything, mark the resource + * as unset and move it down to 0 + */ + if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { + res->flags |= IORESOURCE_UNSET; + res->end -= res->start; + res->start = 0; + continue; + } + pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", pci_name(dev), i, (unsigned long long)res->start,\ @@ -1118,6 +1131,11 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) } } +void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + void __devinit pcibios_fixup_bus(struct pci_bus *bus) { /* When called from the generic PCI probe, read PCI<->PCI bridge @@ -1262,18 +1280,15 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) pci_bus_for_each_resource(bus, res, i) { if (!res || !res->flags || res->start > res->end || res->parent) continue; + + /* If the resource was left unset at this point, we clear it */ + if (res->flags & IORESOURCE_UNSET) + goto clear_resource; + if (bus->parent == NULL) pr = (res->flags & IORESOURCE_IO) ? &ioport_resource : &iomem_resource; else { - /* Don't bother with non-root busses when - * re-assigning all resources. We clear the - * resource flags as if they were colliding - * and as such ensure proper re-allocation - * later. - */ - if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) - goto clear_resource; pr = pci_find_parent_resource(bus->self, res); if (pr == res) { /* this happens when the generic PCI @@ -1304,9 +1319,9 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) if (reparent_resources(pr, res) == 0) continue; } - printk(KERN_WARNING "PCI: Cannot allocate resource region " - "%d of PCI bridge %d, will remap\n", i, bus->number); -clear_resource: + pr_warning("PCI: Cannot allocate resource region " + "%d of PCI bridge %d, will remap\n", i, bus->number); + clear_resource: res->start = res->end = 0; res->flags = 0; } @@ -1451,16 +1466,11 @@ void __init pcibios_resource_survey(void) { struct pci_bus *b; - /* Allocate and assign resources. If we re-assign everything, then - * we skip the allocate phase - */ + /* Allocate and assign resources */ list_for_each_entry(b, &pci_root_buses, node) pcibios_allocate_bus_resources(b); - - if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); - } + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); /* Before we start assigning unassigned resource, we try to reserve * the low IO area and the VGA memory area if they intersect the @@ -1555,14 +1565,13 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -void __devinit pcibios_setup_phb_resources(struct pci_controller *hose) +static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources) { - struct pci_bus *bus = hose->bus; struct resource *res; int i; /* Hookup PHB IO resource */ - bus->resource[0] = res = &hose->io_resource; + res = &hose->io_resource; if (!res->flags) { printk(KERN_WARNING "PCI: I/O resource not set for host" @@ -1580,6 +1589,7 @@ void __devinit pcibios_setup_phb_resources(struct pci_controller *hose) (unsigned long long)res->start, (unsigned long long)res->end, (unsigned long)res->flags); + pci_add_resource(resources, res); /* Hookup PHB Memory resources */ for (i = 0; i < 3; ++i) { @@ -1597,12 +1607,12 @@ void __devinit pcibios_setup_phb_resources(struct pci_controller *hose) res->flags = IORESOURCE_MEM; #endif /* CONFIG_PPC32 */ } - bus->resource[i+1] = res; pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n", i, (unsigned long long)res->start, (unsigned long long)res->end, (unsigned long)res->flags); + pci_add_resource(resources, res); } pr_debug("PCI: PHB MEM offset = %016llx\n", @@ -1696,6 +1706,7 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) */ void __devinit pcibios_scan_phb(struct pci_controller *hose) { + LIST_HEAD(resources); struct pci_bus *bus; struct device_node *node = hose->dn; int mode; @@ -1703,22 +1714,24 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) pr_debug("PCI: Scanning PHB %s\n", node ? node->full_name : "<NO NAME>"); + /* Get some IO space for the new PHB */ + pcibios_setup_phb_io_space(hose); + + /* Wire up PHB bus resources */ + pcibios_setup_phb_resources(hose, &resources); + /* Create an empty bus for the toplevel */ - bus = pci_create_bus(hose->parent, hose->first_busno, hose->ops, hose); + bus = pci_create_root_bus(hose->parent, hose->first_busno, + hose->ops, hose, &resources); if (bus == NULL) { pr_err("Failed to create bus for PCI domain %04x\n", hose->global_number); + pci_free_resource_list(&resources); return; } bus->secondary = hose->first_busno; hose->bus = bus; - /* Get some IO space for the new PHB */ - pcibios_setup_phb_io_space(hose); - - /* Wire up PHB bus resources */ - pcibios_setup_phb_resources(hose); - /* Get probe mode and perform scan */ mode = PCI_PROBE_NORMAL; if (node && ppc_md.pci_probe_mode) @@ -1732,6 +1745,12 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) if (mode == PCI_PROBE_NORMAL) hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); + /* Platform gets a chance to do some global fixups before + * we proceed to resource allocation + */ + if (ppc_md.pcibios_fixup_phb) + ppc_md.pcibios_fixup_phb(hose); + /* Configure PCI Express settings */ if (bus && !pci_has_flag(PCI_PROBE_ONLY)) { struct pci_bus *child; @@ -1747,10 +1766,13 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) static void fixup_hide_host_resource_fsl(struct pci_dev *dev) { int i, class = dev->class >> 8; + /* When configured as agent, programing interface = 1 */ + int prog_if = dev->class & 0xf; if ((class == PCI_CLASS_PROCESSOR_POWERPC || class == PCI_CLASS_BRIDGE_OTHER) && (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) && + (prog_if == 0) && (dev->bus->parent == NULL)) { for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { dev->resource[i].start = 0; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index bcf4bf9e72d9..3318d39b7d4c 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -131,30 +131,13 @@ EXPORT_SYMBOL_GPL(pcibios_unmap_io_space); #endif /* CONFIG_HOTPLUG */ -int __devinit pcibios_map_io_space(struct pci_bus *bus) +static int __devinit pcibios_map_phb_io_space(struct pci_controller *hose) { struct vm_struct *area; unsigned long phys_page; unsigned long size_page; unsigned long io_virt_offset; - struct pci_controller *hose; - - WARN_ON(bus == NULL); - - /* If this not a PHB, nothing to do, page tables still exist and - * thus HPTEs will be faulted in when needed - */ - if (bus->self) { - pr_debug("IO mapping for PCI-PCI bridge %s\n", - pci_name(bus->self)); - pr_debug(" virt=0x%016llx...0x%016llx\n", - bus->resource[0]->start + _IO_BASE, - bus->resource[0]->end + _IO_BASE); - return 0; - } - /* Get the host bridge */ - hose = pci_bus_to_host(bus); phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE); @@ -198,11 +181,30 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus) return 0; } + +int __devinit pcibios_map_io_space(struct pci_bus *bus) +{ + WARN_ON(bus == NULL); + + /* If this not a PHB, nothing to do, page tables still exist and + * thus HPTEs will be faulted in when needed + */ + if (bus->self) { + pr_debug("IO mapping for PCI-PCI bridge %s\n", + pci_name(bus->self)); + pr_debug(" virt=0x%016llx...0x%016llx\n", + bus->resource[0]->start + _IO_BASE, + bus->resource[0]->end + _IO_BASE); + return 0; + } + + return pcibios_map_phb_io_space(pci_bus_to_host(bus)); +} EXPORT_SYMBOL_GPL(pcibios_map_io_space); void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose) { - pcibios_map_io_space(hose->bus); + pcibios_map_phb_io_space(hose); } #define IOBASE_BRIDGE_NUMBER 0 diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 4e69deb89b37..dd9e4a04bf79 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -50,6 +50,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data) dn->data = pdn; pdn->node = dn; pdn->phb = phb; +#ifdef CONFIG_PPC_POWERNV + pdn->pe_number = IODA_INVALID_PE; +#endif regs = of_get_property(dn, "reg", NULL); if (regs) { /* First register entry is addr (00BBSS00) */ diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 10a140f82cb8..64483fde95c6 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -865,6 +865,7 @@ static void power_pmu_start(struct perf_event *event, int ef_flags) { unsigned long flags; s64 left; + unsigned long val; if (!event->hw.idx || !event->hw.sample_period) return; @@ -880,7 +881,12 @@ static void power_pmu_start(struct perf_event *event, int ef_flags) event->hw.state = 0; left = local64_read(&event->hw.period_left); - write_pmc(event->hw.idx, left); + + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + + write_pmc(event->hw.idx, val); perf_event_update_userpage(event); perf_pmu_enable(event->pmu); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6457574c0b2f..d817ab018486 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -566,12 +566,12 @@ static void show_instructions(struct pt_regs *regs) */ if (!__kernel_text_address(pc) || __get_user(instr, (unsigned int __user *)pc)) { - printk("XXXXXXXX "); + printk(KERN_CONT "XXXXXXXX "); } else { if (regs->nip == pc) - printk("<%08x> ", instr); + printk(KERN_CONT "<%08x> ", instr); else - printk("%08x ", instr); + printk(KERN_CONT "%08x ", instr); } pc += sizeof(int); @@ -584,16 +584,32 @@ static struct regbit { unsigned long bit; const char *name; } msr_bits[] = { +#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE) + {MSR_SF, "SF"}, + {MSR_HV, "HV"}, +#endif + {MSR_VEC, "VEC"}, + {MSR_VSX, "VSX"}, +#ifdef CONFIG_BOOKE + {MSR_CE, "CE"}, +#endif {MSR_EE, "EE"}, {MSR_PR, "PR"}, {MSR_FP, "FP"}, - {MSR_VEC, "VEC"}, - {MSR_VSX, "VSX"}, {MSR_ME, "ME"}, - {MSR_CE, "CE"}, +#ifdef CONFIG_BOOKE {MSR_DE, "DE"}, +#else + {MSR_SE, "SE"}, + {MSR_BE, "BE"}, +#endif {MSR_IR, "IR"}, {MSR_DR, "DR"}, + {MSR_PMM, "PMM"}, +#ifndef CONFIG_BOOKE + {MSR_RI, "RI"}, + {MSR_LE, "LE"}, +#endif {0, NULL} }; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fa1235b0503b..abe405dab34d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -733,8 +733,6 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); /* Scan memory nodes and rebuild MEMBLOCKs */ - memblock_init(); - of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -756,20 +754,14 @@ void __init early_init_devtree(void *params) early_reserve_mem(); phyp_dump_reserve_mem(); - limit = memory_limit; - if (! limit) { - phys_addr_t memsize; - - /* Ensure that total memory size is page-aligned, because - * otherwise mark_bootmem() gets upset. */ - memblock_analyze(); - memsize = memblock_phys_mem_size(); - if ((memsize & PAGE_MASK) != memsize) - limit = memsize & PAGE_MASK; - } + /* + * Ensure that total memory size is page-aligned, because otherwise + * mark_bootmem() gets upset. + */ + limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); - memblock_analyze(); + memblock_allow_resize(); memblock_dump_all(); DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index cc584865b3df..eca626ea3f23 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -742,7 +742,7 @@ static unsigned char ibm_architecture_vec[] = { W(0xffffffff), /* virt_base */ W(0xffffffff), /* virt_size */ W(0xffffffff), /* load_base */ - W(64), /* 64MB min RMA */ + W(256), /* 256MB min RMA */ W(0xffffffff), /* full client load */ 0, /* min RMA percentage of total RAM */ 48, /* max log_2(hash table size) */ @@ -1224,14 +1224,6 @@ static void __init prom_init_mem(void) RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000); - /* Check if we have an initrd after the kernel, if we do move our bottom - * point to after it - */ - if (RELOC(prom_initrd_start)) { - if (RELOC(prom_initrd_end) > RELOC(alloc_bottom)) - RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); - } - /* * If prom_memory_limit is set we reduce the upper limits *except* for * alloc_top_high. This must be the real top of RAM so we can put @@ -1269,6 +1261,15 @@ static void __init prom_init_mem(void) RELOC(alloc_top) = RELOC(rmo_top); RELOC(alloc_top_high) = RELOC(ram_top); + /* + * Check if we have an initrd after the kernel but still inside + * the RMO. If we do move our bottom point to after it. + */ + if (RELOC(prom_initrd_start) && + RELOC(prom_initrd_start) < RELOC(rmo_top) && + RELOC(prom_initrd_end) > RELOC(alloc_bottom)) + RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); + prom_printf("memory layout at init:\n"); prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); @@ -2079,7 +2080,7 @@ static void __init prom_check_displays(void) /* Setup a usable color table when the appropriate * method is available. Should update this to set-colors */ clut = RELOC(default_colors); - for (i = 0; i < 32; i++, clut += 3) + for (i = 0; i < 16; i++, clut += 3) if (prom_set_color(ih, i, clut[0], clut[1], clut[2]) != 0) break; @@ -2844,7 +2845,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, RELOC(of_platform) = prom_find_machine_type(); prom_printf("Detected machine type: %x\n", RELOC(of_platform)); -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL /* Bail if this is a kdump kernel. */ if (PHYSICAL_START > 0) prom_panic("Error: You can't boot a kdump kernel from OF!\n"); @@ -2969,9 +2970,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* * in case stdin is USB and still active on IBM machines... * Unfortunately quiesce crashes on some powermacs if we have - * closed stdin already (in particular the powerbook 101). + * closed stdin already (in particular the powerbook 101). It + * appears that the OPAL version of OFW doesn't like it either. */ - if (RELOC(of_platform) != PLATFORM_POWERMAC) + if (RELOC(of_platform) != PLATFORM_POWERMAC && + RELOC(of_platform) != PLATFORM_OPAL) prom_close_stdin(); /* @@ -2987,8 +2990,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * is common to us and kexec */ hdr = RELOC(dt_header_start); - prom_printf("returning from prom_init\n"); - prom_debug("->dt_header_start=0x%x\n", hdr); + + /* Don't print anything after quiesce under OPAL, it crashes OFW */ + if (RELOC(of_platform) != PLATFORM_OPAL) { + prom_printf("returning from prom_init\n"); + prom_debug("->dt_header_start=0x%x\n", hdr); + } #ifdef CONFIG_PPC32 reloc_got2(-offset); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 5de73dbd15c7..5b43325402bc 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1724,22 +1724,20 @@ long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gpr[0]); - if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 - if (!is_32bit_task()) - audit_syscall_entry(AUDIT_ARCH_PPC64, - regs->gpr[0], - regs->gpr[3], regs->gpr[4], - regs->gpr[5], regs->gpr[6]); - else + if (!is_32bit_task()) + audit_syscall_entry(AUDIT_ARCH_PPC64, + regs->gpr[0], + regs->gpr[3], regs->gpr[4], + regs->gpr[5], regs->gpr[6]); + else #endif - audit_syscall_entry(AUDIT_ARCH_PPC, - regs->gpr[0], - regs->gpr[3] & 0xffffffff, - regs->gpr[4] & 0xffffffff, - regs->gpr[5] & 0xffffffff, - regs->gpr[6] & 0xffffffff); - } + audit_syscall_entry(AUDIT_ARCH_PPC, + regs->gpr[0], + regs->gpr[3] & 0xffffffff, + regs->gpr[4] & 0xffffffff, + regs->gpr[5] & 0xffffffff, + regs->gpr[6] & 0xffffffff); return ret ?: regs->gpr[0]; } @@ -1748,9 +1746,7 @@ void do_syscall_trace_leave(struct pt_regs *regs) { int step; - if (unlikely(current->audit_context)) - audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, - regs->result); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->result); diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S new file mode 100644 index 000000000000..ef46ba6e094f --- /dev/null +++ b/arch/powerpc/kernel/reloc_32.S @@ -0,0 +1,208 @@ +/* + * Code to process dynamic relocations for PPC32. + * + * Copyrights (C) IBM Corporation, 2011. + * Author: Suzuki Poulose <suzuki@in.ibm.com> + * + * - Based on ppc64 code - reloc_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> + +/* Dynamic section table entry tags */ +DT_RELA = 7 /* Tag for Elf32_Rela section */ +DT_RELASZ = 8 /* Size of the Rela relocs */ +DT_RELAENT = 9 /* Size of one Rela reloc entry */ + +STN_UNDEF = 0 /* Undefined symbol index */ +STB_LOCAL = 0 /* Local binding for the symbol */ + +R_PPC_ADDR16_LO = 4 /* Lower half of (S+A) */ +R_PPC_ADDR16_HI = 5 /* Upper half of (S+A) */ +R_PPC_ADDR16_HA = 6 /* High Adjusted (S+A) */ +R_PPC_RELATIVE = 22 + +/* + * r3 = desired final address + */ + +_GLOBAL(relocate) + + mflr r0 /* Save our LR */ + bl 0f /* Find our current runtime address */ +0: mflr r12 /* Make it accessible */ + mtlr r0 + + lwz r11, (p_dyn - 0b)(r12) + add r11, r11, r12 /* runtime address of .dynamic section */ + lwz r9, (p_rela - 0b)(r12) + add r9, r9, r12 /* runtime address of .rela.dyn section */ + lwz r10, (p_st - 0b)(r12) + add r10, r10, r12 /* runtime address of _stext section */ + lwz r13, (p_sym - 0b)(r12) + add r13, r13, r12 /* runtime address of .dynsym section */ + + /* + * Scan the dynamic section for RELA, RELASZ entries + */ + li r6, 0 + li r7, 0 + li r8, 0 +1: lwz r5, 0(r11) /* ELF_Dyn.d_tag */ + cmpwi r5, 0 /* End of ELF_Dyn[] */ + beq eodyn + cmpwi r5, DT_RELA + bne relasz + lwz r7, 4(r11) /* r7 = rela.link */ + b skip +relasz: + cmpwi r5, DT_RELASZ + bne relaent + lwz r8, 4(r11) /* r8 = Total Rela relocs size */ + b skip +relaent: + cmpwi r5, DT_RELAENT + bne skip + lwz r6, 4(r11) /* r6 = Size of one Rela reloc */ +skip: + addi r11, r11, 8 + b 1b +eodyn: /* End of Dyn Table scan */ + + /* Check if we have found all the entries */ + cmpwi r7, 0 + beq done + cmpwi r8, 0 + beq done + cmpwi r6, 0 + beq done + + + /* + * Work out the current offset from the link time address of .rela + * section. + * cur_offset[r7] = rela.run[r9] - rela.link [r7] + * _stext.link[r12] = _stext.run[r10] - cur_offset[r7] + * final_offset[r3] = _stext.final[r3] - _stext.link[r12] + */ + subf r7, r7, r9 /* cur_offset */ + subf r12, r7, r10 + subf r3, r12, r3 /* final_offset */ + + subf r8, r6, r8 /* relaz -= relaent */ + /* + * Scan through the .rela table and process each entry + * r9 - points to the current .rela table entry + * r13 - points to the symbol table + */ + + /* + * Check if we have a relocation based on symbol + * r5 will hold the value of the symbol. + */ +applyrela: + lwz r4, 4(r9) /* r4 = rela.r_info */ + srwi r5, r4, 8 /* ELF32_R_SYM(r_info) */ + cmpwi r5, STN_UNDEF /* sym == STN_UNDEF ? */ + beq get_type /* value = 0 */ + /* Find the value of the symbol at index(r5) */ + slwi r5, r5, 4 /* r5 = r5 * sizeof(Elf32_Sym) */ + add r12, r13, r5 /* r12 = &__dyn_sym[Index] */ + + /* + * GNU ld has a bug, where dynamic relocs based on + * STB_LOCAL symbols, the value should be assumed + * to be zero. - Alan Modra + */ + /* XXX: Do we need to check if we are using GNU ld ? */ + lbz r5, 12(r12) /* r5 = dyn_sym[Index].st_info */ + extrwi r5, r5, 4, 24 /* r5 = ELF32_ST_BIND(r5) */ + cmpwi r5, STB_LOCAL /* st_value = 0, ld bug */ + beq get_type /* We have r5 = 0 */ + lwz r5, 4(r12) /* r5 = __dyn_sym[Index].st_value */ + +get_type: + /* Load the relocation type to r4 */ + extrwi r4, r4, 8, 24 /* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */ + + /* R_PPC_RELATIVE */ + cmpwi r4, R_PPC_RELATIVE + bne hi16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 /* final addend */ + stwx r0, r4, r7 /* memory[r4+r7]) = (u32)r0 */ + b nxtrela /* continue */ + + /* R_PPC_ADDR16_HI */ +hi16: + cmpwi r4, R_PPC_ADDR16_HI + bne ha16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */ + b store_half + + /* R_PPC_ADDR16_HA */ +ha16: + cmpwi r4, R_PPC_ADDR16_HA + bne lo16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r5, r0, 1, 16 /* Extract bit 16 */ + extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */ + add r0, r0, r5 /* Add it to r0 */ + b store_half + + /* R_PPC_ADDR16_LO */ +lo16: + cmpwi r4, R_PPC_ADDR16_LO + bne nxtrela + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r0, r0, 16, 16 /* r0 &= 0xffff */ + /* Fall through to */ + + /* Store half word */ +store_half: + sthx r0, r4, r7 /* memory[r4+r7] = (u16)r0 */ + +nxtrela: + /* + * We have to flush the modified instructions to the + * main storage from the d-cache. And also, invalidate the + * cached instructions in i-cache which has been modified. + * + * We delay the sync / isync operation till the end, since + * we won't be executing the modified instructions until + * we return from here. + */ + dcbst r4,r7 + sync /* Ensure the data is flushed before icbi */ + icbi r4,r7 + cmpwi r8, 0 /* relasz = 0 ? */ + ble done + add r9, r9, r6 /* move to next entry in the .rela table */ + subf r8, r6, r8 /* relasz -= relaent */ + b applyrela + +done: + sync /* Wait for the flush to finish */ + isync /* Discard prefetched instructions */ + blr + +p_dyn: .long __dynamic_start - 0b +p_rela: .long __rela_dyn_start - 0b +p_sym: .long __dynamic_symtab - 0b +p_st: .long _stext - 0b diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 517b1d8f455b..9f843cdfee9e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -716,7 +716,6 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w int cpu; slb_set_size(SLB_MIN_SIZE); - stop_topology_update(); printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && @@ -732,7 +731,6 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w rc = atomic_read(&data->error); atomic_set(&data->error, rc); - start_topology_update(); pSeries_coalesce_init(); if (wake_when_done) { @@ -846,6 +844,7 @@ int rtas_ibm_suspend_me(struct rtas_args *args) atomic_set(&data.error, 0); data.token = rtas_token("ibm,suspend-me"); data.complete = &done; + stop_topology_update(); /* Call function on all CPUs. One of us will make the * rtas call @@ -858,6 +857,8 @@ int rtas_ibm_suspend_me(struct rtas_args *args) if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); + start_topology_update(); + return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index e037c7494fd8..4174b4b23246 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -568,6 +568,12 @@ static void rtas_flash_firmware(int reboot_type) } /* + * Just before starting the firmware flash, cancel the event scan work + * to avoid any soft lockup issues. + */ + rtas_cancel_event_scan(); + + /* * NOTE: the "first" block must be under 4GB, so we create * an entry with no data blocks in the reserved buffer in * the kernel data segment. diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 481ef064c8f1..1045ff49cc6d 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -472,6 +472,13 @@ static void start_event_scan(void) &event_scan_work, event_scan_delay); } +/* Cancel the rtas event scan work */ +void rtas_cancel_event_scan(void) +{ + cancel_delayed_work_sync(&event_scan_work); +} +EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); + static int __init rtas_init(void) { struct proc_dir_entry *entry; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fb9bb46e7e88..4cb8f1e9d044 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -35,6 +35,8 @@ #include <linux/pci.h> #include <linux/lockdep.h> #include <linux/memblock.h> +#include <linux/hugetlb.h> + #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -64,6 +66,7 @@ #include <asm/mmu_context.h> #include <asm/code-patching.h> #include <asm/kvm_ppc.h> +#include <asm/hugetlb.h> #include "setup.h" @@ -217,6 +220,13 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + DBG(" <- early_setup()\n"); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 2300426e531a..ac6e437b1021 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -11,6 +11,7 @@ #include <linux/tracehook.h> #include <linux/signal.h> +#include <linux/key.h> #include <asm/hw_breakpoint.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -113,8 +114,9 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, } } -static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) +static int do_signal(struct pt_regs *regs) { + sigset_t *oldset; siginfo_t info; int signr; struct k_sigaction ka; @@ -123,7 +125,7 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) if (current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; - else if (!oldset) + else oldset = ¤t->blocked; signr = get_signal_to_deliver(&info, &ka, regs, NULL); @@ -191,14 +193,16 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) return ret; } -void do_signal(struct pt_regs *regs, unsigned long thread_info_flags) +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { if (thread_info_flags & _TIF_SIGPENDING) - do_signal_pending(NULL, regs); + do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } } diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 6c0ddfc0603e..8dde973aaaf5 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -12,7 +12,7 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -extern void do_signal(struct pt_regs *regs, unsigned long thread_info_flags); +extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, int is_32); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6df70907d60a..46695febc09f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -27,7 +27,7 @@ #include <linux/spinlock.h> #include <linux/cache.h> #include <linux/err.h> -#include <linux/sysdev.h> +#include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/topology.h> @@ -187,7 +187,8 @@ int smp_request_message_ipi(int virq, int msg) return 1; } #endif - err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU, + err = request_irq(virq, smp_ipi_action[msg], + IRQF_PERCPU | IRQF_NO_THREAD, smp_ipi_name[msg], 0); WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", virq, smp_ipi_name[msg], err); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ce035c1905f0..883e74c0d1b3 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -1,4 +1,4 @@ -#include <linux/sysdev.h> +#include <linux/device.h> #include <linux/cpu.h> #include <linux/smp.h> #include <linux/percpu.h> @@ -18,6 +18,7 @@ #include <asm/machdep.h> #include <asm/smp.h> #include <asm/pmc.h> +#include <asm/system.h> #include "cacheinfo.h" @@ -37,12 +38,12 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); /* Time in microseconds we delay before sleeping in the idle loop */ DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; -static ssize_t store_smt_snooze_delay(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_smt_snooze_delay(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); ssize_t ret; long snooze; @@ -50,21 +51,22 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, if (ret != 1) return -EINVAL; - per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; + per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; + update_smt_snooze_delay(snooze); return count; } -static ssize_t show_smt_snooze_delay(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t show_smt_snooze_delay(struct device *dev, + struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); + return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); } -static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, +static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, store_smt_snooze_delay); static int __init setup_smt_snooze_delay(char *str) @@ -117,25 +119,25 @@ static void write_##NAME(void *val) \ ppc_enable_pmcs(); \ mtspr(ADDRESS, *(unsigned long *)val); \ } \ -static ssize_t show_##NAME(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, \ char *buf) \ { \ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ unsigned long val; \ - smp_call_function_single(cpu->sysdev.id, read_##NAME, &val, 1); \ + smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \ return sprintf(buf, "%lx\n", val); \ } \ static ssize_t __used \ - store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \ + store_##NAME(struct device *dev, struct device_attribute *attr, \ const char *buf, size_t count) \ { \ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ unsigned long val; \ int ret = sscanf(buf, "%lx", &val); \ if (ret != 1) \ return -EINVAL; \ - smp_call_function_single(cpu->sysdev.id, write_##NAME, &val, 1); \ + smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \ return count; \ } @@ -177,23 +179,25 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); SYSFS_PMCSETUP(purr, SPRN_PURR); SYSFS_PMCSETUP(spurr, SPRN_SPURR); SYSFS_PMCSETUP(dscr, SPRN_DSCR); +SYSFS_PMCSETUP(pir, SPRN_PIR); -static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); -static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); -static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); -static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static DEVICE_ATTR(spurr, 0600, show_spurr, NULL); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); +static DEVICE_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(pir, 0400, show_pir, NULL); unsigned long dscr_default = 0; EXPORT_SYMBOL(dscr_default); -static ssize_t show_dscr_default(struct sysdev_class *class, - struct sysdev_class_attribute *attr, char *buf) +static ssize_t show_dscr_default(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%lx\n", dscr_default); } -static ssize_t __used store_dscr_default(struct sysdev_class *class, - struct sysdev_class_attribute *attr, const char *buf, +static ssize_t __used store_dscr_default(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -207,15 +211,14 @@ static ssize_t __used store_dscr_default(struct sysdev_class *class, return count; } -static SYSDEV_CLASS_ATTR(dscr_default, 0600, +static DEVICE_ATTR(dscr_default, 0600, show_dscr_default, store_dscr_default); static void sysfs_create_dscr_default(void) { int err = 0; if (cpu_has_feature(CPU_FTR_DSCR)) - err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_dscr_default.attr); + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); } #endif /* CONFIG_PPC64 */ @@ -259,72 +262,72 @@ SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3); #endif /* HAS_PPC_PMC_PA6T */ #ifdef HAS_PPC_PMC_IBM -static struct sysdev_attribute ibm_common_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), +static struct device_attribute ibm_common_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), }; #endif /* HAS_PPC_PMC_G4 */ #ifdef HAS_PPC_PMC_G4 -static struct sysdev_attribute g4_common_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), - _SYSDEV_ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2), +static struct device_attribute g4_common_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), + __ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2), }; #endif /* HAS_PPC_PMC_G4 */ -static struct sysdev_attribute classic_pmc_attrs[] = { - _SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1), - _SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2), - _SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3), - _SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4), - _SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5), - _SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6), +static struct device_attribute classic_pmc_attrs[] = { + __ATTR(pmc1, 0600, show_pmc1, store_pmc1), + __ATTR(pmc2, 0600, show_pmc2, store_pmc2), + __ATTR(pmc3, 0600, show_pmc3, store_pmc3), + __ATTR(pmc4, 0600, show_pmc4, store_pmc4), + __ATTR(pmc5, 0600, show_pmc5, store_pmc5), + __ATTR(pmc6, 0600, show_pmc6, store_pmc6), #ifdef CONFIG_PPC64 - _SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7), - _SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8), + __ATTR(pmc7, 0600, show_pmc7, store_pmc7), + __ATTR(pmc8, 0600, show_pmc8, store_pmc8), #endif }; #ifdef HAS_PPC_PMC_PA6T -static struct sysdev_attribute pa6t_attrs[] = { - _SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), - _SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), - _SYSDEV_ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), - _SYSDEV_ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1), - _SYSDEV_ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2), - _SYSDEV_ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), - _SYSDEV_ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), - _SYSDEV_ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), +static struct device_attribute pa6t_attrs[] = { + __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), + __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), + __ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), + __ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1), + __ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2), + __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), + __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), + __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), #ifdef CONFIG_DEBUG_KERNEL - _SYSDEV_ATTR(hid0, 0600, show_hid0, store_hid0), - _SYSDEV_ATTR(hid1, 0600, show_hid1, store_hid1), - _SYSDEV_ATTR(hid4, 0600, show_hid4, store_hid4), - _SYSDEV_ATTR(hid5, 0600, show_hid5, store_hid5), - _SYSDEV_ATTR(ima0, 0600, show_ima0, store_ima0), - _SYSDEV_ATTR(ima1, 0600, show_ima1, store_ima1), - _SYSDEV_ATTR(ima2, 0600, show_ima2, store_ima2), - _SYSDEV_ATTR(ima3, 0600, show_ima3, store_ima3), - _SYSDEV_ATTR(ima4, 0600, show_ima4, store_ima4), - _SYSDEV_ATTR(ima5, 0600, show_ima5, store_ima5), - _SYSDEV_ATTR(ima6, 0600, show_ima6, store_ima6), - _SYSDEV_ATTR(ima7, 0600, show_ima7, store_ima7), - _SYSDEV_ATTR(ima8, 0600, show_ima8, store_ima8), - _SYSDEV_ATTR(ima9, 0600, show_ima9, store_ima9), - _SYSDEV_ATTR(imaat, 0600, show_imaat, store_imaat), - _SYSDEV_ATTR(btcr, 0600, show_btcr, store_btcr), - _SYSDEV_ATTR(pccr, 0600, show_pccr, store_pccr), - _SYSDEV_ATTR(rpccr, 0600, show_rpccr, store_rpccr), - _SYSDEV_ATTR(der, 0600, show_der, store_der), - _SYSDEV_ATTR(mer, 0600, show_mer, store_mer), - _SYSDEV_ATTR(ber, 0600, show_ber, store_ber), - _SYSDEV_ATTR(ier, 0600, show_ier, store_ier), - _SYSDEV_ATTR(sier, 0600, show_sier, store_sier), - _SYSDEV_ATTR(siar, 0600, show_siar, store_siar), - _SYSDEV_ATTR(tsr0, 0600, show_tsr0, store_tsr0), - _SYSDEV_ATTR(tsr1, 0600, show_tsr1, store_tsr1), - _SYSDEV_ATTR(tsr2, 0600, show_tsr2, store_tsr2), - _SYSDEV_ATTR(tsr3, 0600, show_tsr3, store_tsr3), + __ATTR(hid0, 0600, show_hid0, store_hid0), + __ATTR(hid1, 0600, show_hid1, store_hid1), + __ATTR(hid4, 0600, show_hid4, store_hid4), + __ATTR(hid5, 0600, show_hid5, store_hid5), + __ATTR(ima0, 0600, show_ima0, store_ima0), + __ATTR(ima1, 0600, show_ima1, store_ima1), + __ATTR(ima2, 0600, show_ima2, store_ima2), + __ATTR(ima3, 0600, show_ima3, store_ima3), + __ATTR(ima4, 0600, show_ima4, store_ima4), + __ATTR(ima5, 0600, show_ima5, store_ima5), + __ATTR(ima6, 0600, show_ima6, store_ima6), + __ATTR(ima7, 0600, show_ima7, store_ima7), + __ATTR(ima8, 0600, show_ima8, store_ima8), + __ATTR(ima9, 0600, show_ima9, store_ima9), + __ATTR(imaat, 0600, show_imaat, store_imaat), + __ATTR(btcr, 0600, show_btcr, store_btcr), + __ATTR(pccr, 0600, show_pccr, store_pccr), + __ATTR(rpccr, 0600, show_rpccr, store_rpccr), + __ATTR(der, 0600, show_der, store_der), + __ATTR(mer, 0600, show_mer, store_mer), + __ATTR(ber, 0600, show_ber, store_ber), + __ATTR(ier, 0600, show_ier, store_ier), + __ATTR(sier, 0600, show_sier, store_sier), + __ATTR(siar, 0600, show_siar, store_siar), + __ATTR(tsr0, 0600, show_tsr0, store_tsr0), + __ATTR(tsr1, 0600, show_tsr1, store_tsr1), + __ATTR(tsr2, 0600, show_tsr2, store_tsr2), + __ATTR(tsr3, 0600, show_tsr3, store_tsr3), #endif /* CONFIG_DEBUG_KERNEL */ }; #endif /* HAS_PPC_PMC_PA6T */ @@ -333,14 +336,14 @@ static struct sysdev_attribute pa6t_attrs[] = { static void __cpuinit register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - struct sysdev_attribute *attrs, *pmc_attrs; + struct device *s = &c->dev; + struct device_attribute *attrs, *pmc_attrs; int i, nattrs; #ifdef CONFIG_PPC64 if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) - sysdev_create_file(s, &attr_smt_snooze_delay); + device_create_file(s, &dev_attr_smt_snooze_delay); #endif /* PMC stuff */ @@ -348,14 +351,14 @@ static void __cpuinit register_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -363,7 +366,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; #endif /* HAS_PPC_PMC_PA6T */ @@ -374,24 +377,27 @@ static void __cpuinit register_cpu_online(unsigned int cpu) } for (i = 0; i < nattrs; i++) - sysdev_create_file(s, &attrs[i]); + device_create_file(s, &attrs[i]); if (pmc_attrs) for (i = 0; i < cur_cpu_spec->num_pmcs; i++) - sysdev_create_file(s, &pmc_attrs[i]); + device_create_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_MMCRA)) - sysdev_create_file(s, &attr_mmcra); + device_create_file(s, &dev_attr_mmcra); if (cpu_has_feature(CPU_FTR_PURR)) - sysdev_create_file(s, &attr_purr); + device_create_file(s, &dev_attr_purr); if (cpu_has_feature(CPU_FTR_SPURR)) - sysdev_create_file(s, &attr_spurr); + device_create_file(s, &dev_attr_spurr); if (cpu_has_feature(CPU_FTR_DSCR)) - sysdev_create_file(s, &attr_dscr); + device_create_file(s, &dev_attr_dscr); + + if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) + device_create_file(s, &dev_attr_pir); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_online(cpu); @@ -401,8 +407,8 @@ static void __cpuinit register_cpu_online(unsigned int cpu) static void unregister_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - struct sysdev_attribute *attrs, *pmc_attrs; + struct device *s = &c->dev; + struct device_attribute *attrs, *pmc_attrs; int i, nattrs; BUG_ON(!c->hotpluggable); @@ -410,7 +416,7 @@ static void unregister_cpu_online(unsigned int cpu) #ifdef CONFIG_PPC64 if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) - sysdev_remove_file(s, &attr_smt_snooze_delay); + device_remove_file(s, &dev_attr_smt_snooze_delay); #endif /* PMC stuff */ @@ -418,14 +424,14 @@ static void unregister_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -433,7 +439,7 @@ static void unregister_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute); + nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; #endif /* HAS_PPC_PMC_PA6T */ @@ -444,24 +450,27 @@ static void unregister_cpu_online(unsigned int cpu) } for (i = 0; i < nattrs; i++) - sysdev_remove_file(s, &attrs[i]); + device_remove_file(s, &attrs[i]); if (pmc_attrs) for (i = 0; i < cur_cpu_spec->num_pmcs; i++) - sysdev_remove_file(s, &pmc_attrs[i]); + device_remove_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_MMCRA)) - sysdev_remove_file(s, &attr_mmcra); + device_remove_file(s, &dev_attr_mmcra); if (cpu_has_feature(CPU_FTR_PURR)) - sysdev_remove_file(s, &attr_purr); + device_remove_file(s, &dev_attr_purr); if (cpu_has_feature(CPU_FTR_SPURR)) - sysdev_remove_file(s, &attr_spurr); + device_remove_file(s, &dev_attr_spurr); if (cpu_has_feature(CPU_FTR_DSCR)) - sysdev_remove_file(s, &attr_dscr); + device_remove_file(s, &dev_attr_dscr); + + if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) + device_remove_file(s, &dev_attr_pir); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_offline(cpu); @@ -513,70 +522,70 @@ static struct notifier_block __cpuinitdata sysfs_cpu_nb = { static DEFINE_MUTEX(cpu_mutex); -int cpu_add_sysdev_attr(struct sysdev_attribute *attr) +int cpu_add_dev_attr(struct device_attribute *attr) { int cpu; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev_create_file(get_cpu_sysdev(cpu), attr); + device_create_file(get_cpu_device(cpu), attr); } mutex_unlock(&cpu_mutex); return 0; } -EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr); +EXPORT_SYMBOL_GPL(cpu_add_dev_attr); -int cpu_add_sysdev_attr_group(struct attribute_group *attrs) +int cpu_add_dev_attr_group(struct attribute_group *attrs) { int cpu; - struct sys_device *sysdev; + struct device *dev; int ret; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - ret = sysfs_create_group(&sysdev->kobj, attrs); + dev = get_cpu_device(cpu); + ret = sysfs_create_group(&dev->kobj, attrs); WARN_ON(ret != 0); } mutex_unlock(&cpu_mutex); return 0; } -EXPORT_SYMBOL_GPL(cpu_add_sysdev_attr_group); +EXPORT_SYMBOL_GPL(cpu_add_dev_attr_group); -void cpu_remove_sysdev_attr(struct sysdev_attribute *attr) +void cpu_remove_dev_attr(struct device_attribute *attr) { int cpu; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev_remove_file(get_cpu_sysdev(cpu), attr); + device_remove_file(get_cpu_device(cpu), attr); } mutex_unlock(&cpu_mutex); } -EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr); +EXPORT_SYMBOL_GPL(cpu_remove_dev_attr); -void cpu_remove_sysdev_attr_group(struct attribute_group *attrs) +void cpu_remove_dev_attr_group(struct attribute_group *attrs) { int cpu; - struct sys_device *sysdev; + struct device *dev; mutex_lock(&cpu_mutex); for_each_possible_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - sysfs_remove_group(&sysdev->kobj, attrs); + dev = get_cpu_device(cpu); + sysfs_remove_group(&dev->kobj, attrs); } mutex_unlock(&cpu_mutex); } -EXPORT_SYMBOL_GPL(cpu_remove_sysdev_attr_group); +EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group); /* NUMA stuff */ @@ -590,18 +599,18 @@ static void register_nodes(void) register_one_node(i); } -int sysfs_add_device_to_node(struct sys_device *dev, int nid) +int sysfs_add_device_to_node(struct device *dev, int nid) { struct node *node = &node_devices[nid]; - return sysfs_create_link(&node->sysdev.kobj, &dev->kobj, + return sysfs_create_link(&node->dev.kobj, &dev->kobj, kobject_name(&dev->kobj)); } EXPORT_SYMBOL_GPL(sysfs_add_device_to_node); -void sysfs_remove_device_from_node(struct sys_device *dev, int nid) +void sysfs_remove_device_from_node(struct device *dev, int nid) { struct node *node = &node_devices[nid]; - sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj)); + sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj)); } EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); @@ -614,14 +623,14 @@ static void register_nodes(void) #endif /* Only valid if CPU is present. */ -static ssize_t show_physical_id(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_physical_id(struct device *dev, + struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu *cpu = container_of(dev, struct cpu, dev); - return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id)); + return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->dev.id)); } -static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL); +static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL); static int __init topology_init(void) { @@ -646,7 +655,7 @@ static int __init topology_init(void) if (cpu_online(cpu) || c->hotpluggable) { register_cpu(c, cpu); - sysdev_create_file(&c->sysdev, &attr_physical_id); + device_create_file(&c->dev, &dev_attr_physical_id); } if (cpu_online(cpu)) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 522bb1dfc353..567dd7c3ac2a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -86,8 +86,6 @@ static struct clocksource clocksource_rtc = { .rating = 400, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .mult = 0, /* To be filled in */ .read = rtc_read, }; @@ -97,8 +95,6 @@ static struct clocksource clocksource_timebase = { .rating = 400, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .mult = 0, /* To be filled in */ .read = timebase_read, }; @@ -110,22 +106,16 @@ static void decrementer_set_mode(enum clock_event_mode mode, struct clock_event_device *dev); static struct clock_event_device decrementer_clockevent = { - .name = "decrementer", - .rating = 200, - .shift = 0, /* To be filled in */ - .mult = 0, /* To be filled in */ - .irq = 0, - .set_next_event = decrementer_set_next_event, - .set_mode = decrementer_set_mode, - .features = CLOCK_EVT_FEAT_ONESHOT, + .name = "decrementer", + .rating = 200, + .irq = 0, + .set_next_event = decrementer_set_next_event, + .set_mode = decrementer_set_mode, + .features = CLOCK_EVT_FEAT_ONESHOT, }; -struct decrementer_clock { - struct clock_event_device event; - u64 next_tb; -}; - -static DEFINE_PER_CPU(struct decrementer_clock, decrementers); +DEFINE_PER_CPU(u64, decrementers_next_tb); +static DEFINE_PER_CPU(struct clock_event_device, decrementers); #ifdef CONFIG_PPC_ISERIES static unsigned long __initdata iSeries_recal_titan; @@ -168,13 +158,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Factors for converting from cputime_t (timebase ticks) to - * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds). + * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). * These are all stored as 0.64 fixed-point binary fractions. */ u64 __cputime_jiffies_factor; EXPORT_SYMBOL(__cputime_jiffies_factor); -u64 __cputime_msec_factor; -EXPORT_SYMBOL(__cputime_msec_factor); +u64 __cputime_usec_factor; +EXPORT_SYMBOL(__cputime_usec_factor); u64 __cputime_sec_factor; EXPORT_SYMBOL(__cputime_sec_factor); u64 __cputime_clockt_factor; @@ -192,8 +182,8 @@ static void calc_cputime_factors(void) div128_by_32(HZ, 0, tb_ticks_per_sec, &res); __cputime_jiffies_factor = res.result_low; - div128_by_32(1000, 0, tb_ticks_per_sec, &res); - __cputime_msec_factor = res.result_low; + div128_by_32(1000000, 0, tb_ticks_per_sec, &res); + __cputime_usec_factor = res.result_low; div128_by_32(1, 0, tb_ticks_per_sec, &res); __cputime_sec_factor = res.result_low; div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); @@ -441,7 +431,7 @@ EXPORT_SYMBOL(profile_pc); /* * This function recalibrates the timebase based on the 49-bit time-of-day * value in the Titan chip. The Titan is much more accurate than the value - * returned by the service processor for the timebase frequency. + * returned by the service processor for the timebase frequency. */ static int __init iSeries_tb_recal(void) @@ -576,9 +566,8 @@ void arch_irq_work_raise(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - struct decrementer_clock *decrementer = &__get_cpu_var(decrementers); - struct clock_event_device *evt = &decrementer->event; - u64 now; + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + struct clock_event_device *evt = &__get_cpu_var(decrementers); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -613,16 +602,9 @@ void timer_interrupt(struct pt_regs * regs) get_lppaca()->int_dword.fields.decr_int = 0; #endif - now = get_tb_or_rtc(); - if (now >= decrementer->next_tb) { - decrementer->next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); - } else { - now = decrementer->next_tb - now; - if (now <= DECREMENTER_MAX) - set_dec((int)now); - } + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) @@ -650,9 +632,9 @@ static void generic_suspend_disable_irqs(void) * with suspending. */ - set_dec(0x7fffffff); + set_dec(DECREMENTER_MAX); local_irq_disable(); - set_dec(0x7fffffff); + set_dec(DECREMENTER_MAX); } static void generic_suspend_enable_irqs(void) @@ -824,9 +806,8 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, ++vdso_data->tb_update_count; smp_mb(); - /* XXX this assumes clock->shift == 22 */ - /* 4611686018 ~= 2^(20+64-22) / 1e9 */ - new_tb_to_xs = (u64) mult * 4611686018ULL; + /* 19342813113834067 ~= 2^(20+64) / 1e9 */ + new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; do_div(new_stamp_xsec, 1000000000); new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; @@ -877,9 +858,7 @@ static void __init clocksource_init(void) else clock = &clocksource_timebase; - clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift); - - if (clocksource_register(clock)) { + if (clocksource_register_hz(clock, tb_ticks_per_sec)) { printk(KERN_ERR "clocksource: %s is already registered\n", clock->name); return; @@ -892,7 +871,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt; + __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; set_dec(evt); return 0; } @@ -904,34 +883,9 @@ static void decrementer_set_mode(enum clock_event_mode mode, decrementer_set_next_event(DECREMENTER_MAX, dev); } -static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec, - int shift) -{ - uint64_t tmp = ((uint64_t)ticks) << shift; - - do_div(tmp, nsec); - return tmp; -} - -static void __init setup_clockevent_multiplier(unsigned long hz) -{ - u64 mult, shift = 32; - - while (1) { - mult = div_sc64(hz, NSEC_PER_SEC, shift); - if (mult && (mult >> 32UL) == 0UL) - break; - - shift--; - } - - decrementer_clockevent.shift = shift; - decrementer_clockevent.mult = mult; -} - static void register_decrementer_clockevent(int cpu) { - struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; + struct clock_event_device *dec = &per_cpu(decrementers, cpu); *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); @@ -946,7 +900,8 @@ static void __init init_decrementer_clockevent(void) { int cpu = smp_processor_id(); - setup_clockevent_multiplier(ppc_tb_freq); + clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); + decrementer_clockevent.max_delta_ns = clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); decrementer_clockevent.min_delta_ns = @@ -1014,10 +969,10 @@ void __init time_init(void) boot_tb = get_tb_or_rtc(); /* If platform provided a timezone (pmac), we correct the time */ - if (timezone_offset) { + if (timezone_offset) { sys_tz.tz_minuteswest = -timezone_offset / 60; sys_tz.tz_dsttime = 0; - } + } vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5459d148a0f6..c091527efd89 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -98,18 +98,14 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif -int die(const char *str, struct pt_regs *regs, long err) +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; +static int die_counter; + +static unsigned __kprobes long oops_begin(struct pt_regs *regs) { - static struct { - raw_spinlock_t lock; - u32 lock_owner; - int lock_owner_depth; - } die = { - .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock), - .lock_owner = -1, - .lock_owner_depth = 0 - }; - static int die_counter; + int cpu; unsigned long flags; if (debugger(regs)) @@ -117,66 +113,109 @@ int die(const char *str, struct pt_regs *regs, long err) oops_enter(); - if (die.lock_owner != raw_smp_processor_id()) { - console_verbose(); - raw_spin_lock_irqsave(&die.lock, flags); - die.lock_owner = smp_processor_id(); - die.lock_owner_depth = 0; - bust_spinlocks(1); - if (machine_is(powermac)) - pmac_backlight_unblank(); - } else { - local_save_flags(flags); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + if (machine_is(powermac)) + pmac_backlight_unblank(); + return flags; +} - if (++die.lock_owner_depth < 3) { - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); -#endif - printk("%s\n", ppc_md.name ? ppc_md.name : ""); +static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, + int signr) +{ + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + die_nest_count--; + oops_exit(); + printk("\n"); + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); - if (notify_die(DIE_OOPS, str, regs, err, 255, - SIGSEGV) == NOTIFY_STOP) - return 1; + /* + * A system reset (0x100) is a request to dump, so we always send + * it through the crashdump code. + */ + if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) { + crash_kexec(regs); - print_modules(); - show_regs(regs); - } else { - printk("Recursive die() failure, output suppressed\n"); + /* + * We aren't the primary crash CPU. We need to send it + * to a holding pattern to avoid it ending up in the panic + * code. + */ + crash_kexec_secondary(regs); } - bust_spinlocks(0); - die.lock_owner = -1; - add_taint(TAINT_DIE); - raw_spin_unlock_irqrestore(&die.lock, flags); + if (!signr) + return; - if (kexec_should_crash(current) || - kexec_sr_activated(smp_processor_id())) - crash_kexec(regs); - crash_kexec_secondary(regs); + /* + * While our oops output is serialised by a spinlock, output + * from panic() called below can race and corrupt it. If we + * know we are going to panic, delay for 1 second so we have a + * chance to get clean backtraces from all CPUs that are oopsing. + */ + if (in_interrupt() || panic_on_oops || !current->pid || + is_global_init(current)) { + mdelay(MSEC_PER_SEC); + } if (in_interrupt()) panic("Fatal exception in interrupt"); - if (panic_on_oops) panic("Fatal exception"); + do_exit(signr); +} - oops_exit(); - do_exit(err); +static int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); +#endif + printk("%s\n", ppc_md.name ? ppc_md.name : ""); + + if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) + return 1; + + print_modules(); + show_regs(regs); return 0; } +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(regs); + + if (__die(str, regs, err)) + err = 0; + oops_end(flags, regs, err); +} + void user_single_step_siginfo(struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info) { @@ -195,10 +234,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) "at %016lx nip %016lx lr %016lx code %x\n"; if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } else if (show_unhandled_signals && - unhandled_signal(current, signr)) { + die("Exception in kernel mode", regs, signr); + return; + } + + if (show_unhandled_signals && unhandled_signal(current, signr)) { printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); @@ -220,25 +260,8 @@ void system_reset_exception(struct pt_regs *regs) return; } -#ifdef CONFIG_KEXEC - cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); -#endif - die("System Reset", regs, SIGABRT); - /* - * Some CPUs when released from the debugger will execute this path. - * These CPUs entered the debugger via a soft-reset. If the CPU was - * hung before entering the debugger it will return to the hung - * state when exiting this function. This causes a problem in - * kdump since the hung CPU(s) will not respond to the IPI sent - * from kdump. To prevent the problem we call crash_kexec_secondary() - * here. If a kdump had not been initiated or we exit the debugger - * with the "exit and recover" command (x) crash_kexec_secondary() - * will return after 5ms and the CPU returns to its previous state. - */ - crash_kexec_secondary(regs); - /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable System Reset"); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index f65af61996bd..8b086299ba25 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1406,7 +1406,6 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, - .pm = GENERIC_SUBSYS_PM_OPS, }; /** diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 920276c0f6a1..710a54005dfb 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -170,7 +170,13 @@ SECTIONS } #ifdef CONFIG_RELOCATABLE . = ALIGN(8); - .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { *(.dynsym) } + .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) + { +#ifdef CONFIG_RELOCATABLE_PPC32 + __dynamic_symtab = .; +#endif + *(.dynsym) + } .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } .dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET) { |