From 228b1a473037c89d524e03a569c688a22241b4ea Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 8 Aug 2013 15:56:09 +0300 Subject: powerpc/booke64: Add LRAT error exception handler LRAT (Logical to Real Address Translation) present in MMU v2 provides hardware translation from a logical page number (LPN) to a real page number (RPN) when tlbwe is executed by a guest or when a page table translation occurs from a guest virtual address. Add LRAT error exception handler to Booke3E 64-bit kernel and the basic KVM handler to avoid build breakage. This is a prerequisite for KVM LRAT support that will follow. Signed-off-by: Mihai Caraman Signed-off-by: Scott Wood --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 12 ++++++++++++ arch/powerpc/kernel/exceptions-64e.S | 17 +++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index bfb18c7290b7..fa6862db8a02 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -57,6 +57,12 @@ _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 bl .setup_altivec_ivors + /* Touch IVOR42 only if the CPU supports E.HV category */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl .setup_lrat_ivor +1: #endif bl __setup_cpu_e5500 mtlr r6 @@ -119,6 +125,12 @@ _GLOBAL(__setup_cpu_e5500) _GLOBAL(__restore_cpu_e6500) mflr r5 bl .setup_altivec_ivors + /* Touch IVOR42 only if the CPU supports E.HV category */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl .setup_lrat_ivor +1: bl __restore_cpu_e5500 mtlr r5 blr diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e7751561fd1d..4d5a0b1034e8 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -308,6 +308,7 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x2e0, guest_doorbell_crit) EXCEPTION_STUB(0x300, hypercall) EXCEPTION_STUB(0x320, ehpriv) + EXCEPTION_STUB(0x340, lrat_error) .globl interrupt_end_book3e interrupt_end_book3e: @@ -677,6 +678,17 @@ kernel_dbg_exc: bl .unknown_exception b .ret_from_except +/* LRAT Error interrupt */ + START_EXCEPTION(lrat_error); + NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, + PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x340, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except + /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable @@ -859,6 +871,7 @@ BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) BAD_STACK_TRAMPOLINE(0x310) BAD_STACK_TRAMPOLINE(0x320) +BAD_STACK_TRAMPOLINE(0x340) BAD_STACK_TRAMPOLINE(0x400) BAD_STACK_TRAMPOLINE(0x500) BAD_STACK_TRAMPOLINE(0x600) @@ -1414,3 +1427,7 @@ _GLOBAL(setup_ehv_ivors) SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ blr + +_GLOBAL(setup_lrat_ivor) + SET_IVOR(42, 0x340) /* LRAT Error */ + blr -- cgit v1.2.3 From 640e922501103aaf2e0abb4cf4de5d49fa8342f7 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Tue, 10 Dec 2013 23:07:45 +0000 Subject: powerpc: fix exception clearing in e500 SPE float emulation The e500 SPE floating-point emulation code clears existing exceptions (__FPU_FPSCR &= ~FP_EX_MASK;) before ORing in the exceptions from the emulated operation. However, these exception bits are the "sticky", cumulative exception bits, and should only be cleared by the user program setting SPEFSCR, not implicitly by any floating-point instruction (whether executed purely by the hardware or emulated). The spurious clearing of these bits shows up as missing exceptions in glibc testing. Fixing this, however, is not as simple as just not clearing the bits, because while the bits may be from previous floating-point operations (in which case they should not be cleared), the processor can also set the sticky bits itself before the interrupt for an exception occurs, and this can happen in cases when IEEE 754 semantics are that the sticky bit should not be set. Specifically, the "invalid" sticky bit is set in various cases with non-finite operands, where IEEE 754 semantics do not involve raising such an exception, and the "underflow" sticky bit is set in cases of exact underflow, whereas IEEE 754 semantics are that this flag is set only for inexact underflow. Thus, for correct emulation the kernel needs to know the setting of these two sticky bits before the instruction being emulated. When a floating-point operation raises an exception, the kernel can note the state of the sticky bits immediately afterwards. Some functions that affect the state of these bits, such as fesetenv and feholdexcept, need to use prctl with PR_GET_FPEXC and PR_SET_FPEXC anyway, and so it is natural to record the state of those bits during that call into the kernel and so avoid any need for a separate call into the kernel to inform it of a change to those bits. Thus, the interface I chose to use (in this patch and the glibc port) is that one of those prctl calls must be made after any userspace change to those sticky bits, other than through a floating-point operation that traps into the kernel anyway. feclearexcept and fesetexceptflag duly make those calls, which would not be required were it not for this issue. The previous EGLIBC port, and the uClibc code copied from it, is fundamentally broken as regards any use of prctl for floating-point exceptions because it didn't use the PR_FP_EXC_SW_ENABLE bit in its prctl calls (and did various worse things, such as passing a pointer when prctl expected an integer). If you avoid anything where prctl is used, the clearing of sticky bits still means it will never give anything approximating correct exception semantics with existing kernels. I don't believe the patch makes things any worse for existing code that doesn't try to inform the kernel of changes to sticky bits - such code may get incorrect exceptions in some cases, but it would have done so anyway in other cases. Signed-off-by: Joseph Myers Signed-off-by: Scott Wood --- arch/powerpc/kernel/process.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3386d8ab7eb0..b08c0d03530f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1175,6 +1175,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) if (val & PR_FP_EXC_SW_ENABLE) { #ifdef CONFIG_SPE if (cpu_has_feature(CPU_FTR_SPE)) { + /* + * When the sticky exception bits are set + * directly by userspace, it must call prctl + * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE + * in the existing prctl settings) or + * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in + * the bits being set). functions + * saving and restoring the whole + * floating-point environment need to do so + * anyway to restore the prctl settings from + * the saved environment. + */ + tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); tsk->thread.fpexc_mode = val & (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); return 0; @@ -1206,9 +1219,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) #ifdef CONFIG_SPE - if (cpu_has_feature(CPU_FTR_SPE)) + if (cpu_has_feature(CPU_FTR_SPE)) { + /* + * When the sticky exception bits are set + * directly by userspace, it must call prctl + * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE + * in the existing prctl settings) or + * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in + * the bits being set). functions + * saving and restoring the whole + * floating-point environment need to do so + * anyway to restore the prctl settings from + * the saved environment. + */ + tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); val = tsk->thread.fpexc_mode; - else + } else return -EINVAL; #else return -EINVAL; -- cgit v1.2.3 From b58a7bd6df7b61446b833a7c72f8a1f11066e0b0 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 2 Jan 2014 16:37:50 -0600 Subject: powerpc/fsl-booke: Use SPRN_SPRGn rather than mfsprg/mtsprg This fixes a build break that was probably introduced with the removal of -Wa,-me500 (commit f49596a4cf4753d13951608f24f939a59fdcc653), where the assembler refuses to recognize SPRG4-7 with a generic PPC target. Signed-off-by: Scott Wood Cc: Dongsheng Wang Cc: Anton Vorontsov Reviewed-by: Wang Dongsheng Tested-by: Wang Dongsheng --- arch/powerpc/kernel/swsusp_booke.S | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S index 0f204053e5b5..553c1405ee05 100644 --- a/arch/powerpc/kernel/swsusp_booke.S +++ b/arch/powerpc/kernel/swsusp_booke.S @@ -74,21 +74,21 @@ _GLOBAL(swsusp_arch_suspend) bne 1b /* Save SPRGs */ - mfsprg r4,0 + mfspr r4,SPRN_SPRG0 stw r4,SL_SPRG0(r11) - mfsprg r4,1 + mfspr r4,SPRN_SPRG1 stw r4,SL_SPRG1(r11) - mfsprg r4,2 + mfspr r4,SPRN_SPRG2 stw r4,SL_SPRG2(r11) - mfsprg r4,3 + mfspr r4,SPRN_SPRG3 stw r4,SL_SPRG3(r11) - mfsprg r4,4 + mfspr r4,SPRN_SPRG4 stw r4,SL_SPRG4(r11) - mfsprg r4,5 + mfspr r4,SPRN_SPRG5 stw r4,SL_SPRG5(r11) - mfsprg r4,6 + mfspr r4,SPRN_SPRG6 stw r4,SL_SPRG6(r11) - mfsprg r4,7 + mfspr r4,SPRN_SPRG7 stw r4,SL_SPRG7(r11) /* Call the low level suspend stuff (we should probably have made @@ -150,21 +150,21 @@ _GLOBAL(swsusp_arch_resume) bl _tlbil_all lwz r4,SL_SPRG0(r11) - mtsprg 0,r4 + mtspr SPRN_SPRG0,r4 lwz r4,SL_SPRG1(r11) - mtsprg 1,r4 + mtspr SPRN_SPRG1,r4 lwz r4,SL_SPRG2(r11) - mtsprg 2,r4 + mtspr SPRN_SPRG2,r4 lwz r4,SL_SPRG3(r11) - mtsprg 3,r4 + mtspr SPRN_SPRG3,r4 lwz r4,SL_SPRG4(r11) - mtsprg 4,r4 + mtspr SPRN_SPRG4,r4 lwz r4,SL_SPRG5(r11) - mtsprg 5,r4 + mtspr SPRN_SPRG5,r4 lwz r4,SL_SPRG6(r11) - mtsprg 6,r4 + mtspr SPRN_SPRG6,r4 lwz r4,SL_SPRG7(r11) - mtsprg 7,r4 + mtspr SPRN_SPRG7,r4 /* restore the MSR */ lwz r3,SL_MSR(r11) -- cgit v1.2.3 From 202e059ce34d5c5e3ff8a542866c280d575ccb17 Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Tue, 17 Dec 2013 16:17:00 +0800 Subject: powerpc/85xx: add hardware automatically enter altivec idle state Each core's AltiVec unit may be placed into a power savings mode by turning off power to the unit. Core hardware will automatically power down the AltiVec unit after no AltiVec instructions have executed in N cycles. The AltiVec power-control is triggered by hardware. Signed-off-by: Wang Dongsheng Signed-off-by: Scott Wood --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index fa6862db8a02..26c09db2ec20 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -53,6 +53,25 @@ _GLOBAL(__e500_dcache_setup) isync blr +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for AV_WAIT_IDLE_BIT. + */ +#define AV_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_altivec_idle) + mfspr r3, SPRN_PWRMGTCR0 + + /* Enable Altivec Idle */ + oris r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h + li r11, AV_WAIT_IDLE_BIT + + /* Set Automatic AltiVec Idle Count */ + rlwimi r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT + + mtspr SPRN_PWRMGTCR0, r3 + + blr + _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 @@ -64,6 +83,7 @@ _GLOBAL(__setup_cpu_e6500) bl .setup_lrat_ivor 1: #endif + bl setup_altivec_idle bl __setup_cpu_e5500 mtlr r6 blr @@ -131,6 +151,7 @@ _GLOBAL(__restore_cpu_e6500) beq 1f bl .setup_lrat_ivor 1: + bl .setup_altivec_idle bl __restore_cpu_e5500 mtlr r5 blr -- cgit v1.2.3 From 1d47ddf7c3725e889763b1fffa70a04e1061940b Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Tue, 17 Dec 2013 16:17:01 +0800 Subject: powerpc/85xx: add hardware automatically enter pw20 state Using hardware features make core automatically enter PW20 state. Set a TB count to hardware, the effective count begins when PW10 is entered. When the effective period has expired, the core will proceed from PW10 to PW20 if no exit conditions have occurred during the period. Signed-off-by: Wang Dongsheng Signed-off-by: Scott Wood --- arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 26c09db2ec20..cc2d8962e090 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -53,6 +53,25 @@ _GLOBAL(__e500_dcache_setup) isync blr +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for PW20_WAIT_IDLE_BIT. + */ +#define PW20_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_pw20_idle) + mfspr r3, SPRN_PWRMGTCR0 + + /* Set PW20_WAIT bit, enable pw20 state*/ + ori r3, r3, PWRMGTCR0_PW20_WAIT + li r11, PW20_WAIT_IDLE_BIT + + /* Set Automatic PW20 Core Idle Count */ + rlwimi r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT + + mtspr SPRN_PWRMGTCR0, r3 + + blr + /* * FIXME - we haven't yet done testing to determine a reasonable default * value for AV_WAIT_IDLE_BIT. @@ -83,6 +102,7 @@ _GLOBAL(__setup_cpu_e6500) bl .setup_lrat_ivor 1: #endif + bl setup_pw20_idle bl setup_altivec_idle bl __setup_cpu_e5500 mtlr r6 @@ -151,6 +171,7 @@ _GLOBAL(__restore_cpu_e6500) beq 1f bl .setup_lrat_ivor 1: + bl .setup_pw20_idle bl .setup_altivec_idle bl __restore_cpu_e5500 mtlr r5 -- cgit v1.2.3 From a7189483f03d4c4b93219ff27a2e0a01716abd21 Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Tue, 17 Dec 2013 16:17:02 +0800 Subject: powerpc/85xx: add sysfs for pw20 state and altivec idle Add a sys interface to enable/diable pw20 state or altivec idle, and control the wait entry time. Enable/Disable interface: 0, disable. 1, enable. /sys/devices/system/cpu/cpuX/pw20_state /sys/devices/system/cpu/cpuX/altivec_idle Set wait time interface:(Nanosecond) /sys/devices/system/cpu/cpuX/pw20_wait_time /sys/devices/system/cpu/cpuX/altivec_idle_wait_time Example: Base on TBfreq is 41MHZ. 1~48(ns): TB[63] 49~97(ns): TB[62] 98~195(ns): TB[61] 196~390(ns): TB[60] 391~780(ns): TB[59] 781~1560(ns): TB[58] ... Signed-off-by: Wang Dongsheng [scottwood@freescale.com: change ifdef] Signed-off-by: Scott Wood --- arch/powerpc/kernel/sysfs.c | 316 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index cad777eb613a..d4a43e64a6a9 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -86,6 +86,304 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E +#define MAX_BIT 63 + +static u64 pw20_wt; +static u64 altivec_idle_wt; + +static unsigned int get_idle_ticks_bit(u64 ns) +{ + u64 cycle; + + if (ns >= 10000) + cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec; + else + cycle = div_u64(ns * tb_ticks_per_usec, 1000); + + if (!cycle) + return 0; + + return ilog2(cycle); +} + +static void do_show_pwrmgtcr0(void *val) +{ + u32 *value = val; + + *value = mfspr(SPRN_PWRMGTCR0); +} + +static ssize_t show_pw20_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + unsigned int cpu = dev->id; + + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + + value &= PWRMGTCR0_PW20_WAIT; + + return sprintf(buf, "%u\n", value ? 1 : 0); +} + +static void do_store_pw20_state(void *val) +{ + u32 *value = val; + u32 pw20_state; + + pw20_state = mfspr(SPRN_PWRMGTCR0); + + if (*value) + pw20_state |= PWRMGTCR0_PW20_WAIT; + else + pw20_state &= ~PWRMGTCR0_PW20_WAIT; + + mtspr(SPRN_PWRMGTCR0, pw20_state); +} + +static ssize_t store_pw20_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 value; + unsigned int cpu = dev->id; + + if (kstrtou32(buf, 0, &value)) + return -EINVAL; + + if (value > 1) + return -EINVAL; + + smp_call_function_single(cpu, do_store_pw20_state, &value, 1); + + return count; +} + +static ssize_t show_pw20_wait_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + u64 tb_cycle = 1; + u64 time; + + unsigned int cpu = dev->id; + + if (!pw20_wt) { + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + value = (value & PWRMGTCR0_PW20_ENT) >> + PWRMGTCR0_PW20_ENT_SHIFT; + + tb_cycle = (tb_cycle << (MAX_BIT - value + 1)); + /* convert ms to ns */ + if (tb_ticks_per_usec > 1000) { + time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); + } else { + u32 rem_us; + + time = div_u64_rem(tb_cycle, tb_ticks_per_usec, + &rem_us); + time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; + } + } else { + time = pw20_wt; + } + + return sprintf(buf, "%llu\n", time > 0 ? time : 0); +} + +static void set_pw20_wait_entry_bit(void *val) +{ + u32 *value = val; + u32 pw20_idle; + + pw20_idle = mfspr(SPRN_PWRMGTCR0); + + /* Set Automatic PW20 Core Idle Count */ + /* clear count */ + pw20_idle &= ~PWRMGTCR0_PW20_ENT; + + /* set count */ + pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT); + + mtspr(SPRN_PWRMGTCR0, pw20_idle); +} + +static ssize_t store_pw20_wait_time(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 entry_bit; + u64 value; + + unsigned int cpu = dev->id; + + if (kstrtou64(buf, 0, &value)) + return -EINVAL; + + if (!value) + return -EINVAL; + + entry_bit = get_idle_ticks_bit(value); + if (entry_bit > MAX_BIT) + return -EINVAL; + + pw20_wt = value; + + smp_call_function_single(cpu, set_pw20_wait_entry_bit, + &entry_bit, 1); + + return count; +} + +static ssize_t show_altivec_idle(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + unsigned int cpu = dev->id; + + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + + value &= PWRMGTCR0_AV_IDLE_PD_EN; + + return sprintf(buf, "%u\n", value ? 1 : 0); +} + +static void do_store_altivec_idle(void *val) +{ + u32 *value = val; + u32 altivec_idle; + + altivec_idle = mfspr(SPRN_PWRMGTCR0); + + if (*value) + altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN; + else + altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN; + + mtspr(SPRN_PWRMGTCR0, altivec_idle); +} + +static ssize_t store_altivec_idle(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 value; + unsigned int cpu = dev->id; + + if (kstrtou32(buf, 0, &value)) + return -EINVAL; + + if (value > 1) + return -EINVAL; + + smp_call_function_single(cpu, do_store_altivec_idle, &value, 1); + + return count; +} + +static ssize_t show_altivec_idle_wait_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + u64 tb_cycle = 1; + u64 time; + + unsigned int cpu = dev->id; + + if (!altivec_idle_wt) { + smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1); + value = (value & PWRMGTCR0_AV_IDLE_CNT) >> + PWRMGTCR0_AV_IDLE_CNT_SHIFT; + + tb_cycle = (tb_cycle << (MAX_BIT - value + 1)); + /* convert ms to ns */ + if (tb_ticks_per_usec > 1000) { + time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); + } else { + u32 rem_us; + + time = div_u64_rem(tb_cycle, tb_ticks_per_usec, + &rem_us); + time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; + } + } else { + time = altivec_idle_wt; + } + + return sprintf(buf, "%llu\n", time > 0 ? time : 0); +} + +static void set_altivec_idle_wait_entry_bit(void *val) +{ + u32 *value = val; + u32 altivec_idle; + + altivec_idle = mfspr(SPRN_PWRMGTCR0); + + /* Set Automatic AltiVec Idle Count */ + /* clear count */ + altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT; + + /* set count */ + altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT); + + mtspr(SPRN_PWRMGTCR0, altivec_idle); +} + +static ssize_t store_altivec_idle_wait_time(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 entry_bit; + u64 value; + + unsigned int cpu = dev->id; + + if (kstrtou64(buf, 0, &value)) + return -EINVAL; + + if (!value) + return -EINVAL; + + entry_bit = get_idle_ticks_bit(value); + if (entry_bit > MAX_BIT) + return -EINVAL; + + altivec_idle_wt = value; + + smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit, + &entry_bit, 1); + + return count; +} + +/* + * Enable/Disable interface: + * 0, disable. 1, enable. + */ +static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state); +static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle); + +/* + * Set wait time interface:(Nanosecond) + * Example: Base on TBfreq is 41MHZ. + * 1~48(ns): TB[63] + * 49~97(ns): TB[62] + * 98~195(ns): TB[61] + * 196~390(ns): TB[60] + * 391~780(ns): TB[59] + * 781~1560(ns): TB[58] + * ... + */ +static DEVICE_ATTR(pw20_wait_time, 0600, + show_pw20_wait_time, + store_pw20_wait_time); +static DEVICE_ATTR(altivec_idle_wait_time, 0600, + show_altivec_idle_wait_time, + store_altivec_idle_wait_time); +#endif + /* * Enabling PMCs will slow partition context switch times so we only do * it the first time we write to the PMCs. @@ -425,6 +723,15 @@ static void register_cpu_online(unsigned int cpu) device_create_file(s, &dev_attr_pir); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { + device_create_file(s, &dev_attr_pw20_state); + device_create_file(s, &dev_attr_pw20_wait_time); + + device_create_file(s, &dev_attr_altivec_idle); + device_create_file(s, &dev_attr_altivec_idle_wait_time); + } +#endif cacheinfo_cpu_online(cpu); } @@ -497,6 +804,15 @@ static void unregister_cpu_online(unsigned int cpu) device_remove_file(s, &dev_attr_pir); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { + device_remove_file(s, &dev_attr_pw20_state); + device_remove_file(s, &dev_attr_pw20_wait_time); + + device_remove_file(s, &dev_attr_altivec_idle); + device_remove_file(s, &dev_attr_altivec_idle_wait_time); + } +#endif cacheinfo_cpu_offline(cpu); } -- cgit v1.2.3 From 7c732cba3d9312882e82d91d5948261dfd5c8fe6 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:03 +0800 Subject: powerpc/fsl_booke: protect the access to MAS7 The e500v1 doesn't implement the MAS7, so we should avoid to access this register on that implementations. In the current kernel, the access to MAS7 are protected by either CONFIG_PHYS_64BIT or MMU_FTR_BIG_PHYS. Since some code are executed before the code patching, we have to use CONFIG_PHYS_64BIT in these cases. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index f45726a1d963..09921a5197c6 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -82,7 +82,9 @@ _ENTRY(_start); and r19,r3,r18 /* r19 = page offset */ andc r31,r20,r18 /* r31 = page base */ or r31,r31,r19 /* r31 = devtree phys addr */ +#ifdef CONFIG_PHYS_64BIT mfspr r30,SPRN_MAS7 +#endif li r25,0 /* phys kernel start (low) */ li r24,0 /* CPU number */ -- cgit v1.2.3 From 99739611e816716d912ae89a4354237fc39745a6 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:04 +0800 Subject: powerpc/fsl_booke: introduce get_phys_addr function Move the codes which translate a effective address to physical address to a separate function. So it can be reused by other code. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 50 +++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 09921a5197c6..196950f29c00 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -65,26 +65,9 @@ _ENTRY(_start); nop /* Translate device tree address to physical, save in r30/r31 */ - mfmsr r16 - mfspr r17,SPRN_PID - rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */ - rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */ - mtspr SPRN_MAS6,r17 - - tlbsx 0,r3 /* must succeed */ - - mfspr r16,SPRN_MAS1 - mfspr r20,SPRN_MAS3 - rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */ - li r18,1024 - slw r18,r18,r17 /* r18 = page size */ - addi r18,r18,-1 - and r19,r3,r18 /* r19 = page offset */ - andc r31,r20,r18 /* r31 = page base */ - or r31,r31,r19 /* r31 = devtree phys addr */ -#ifdef CONFIG_PHYS_64BIT - mfspr r30,SPRN_MAS7 -#endif + bl get_phys_addr + mr r30,r3 + mr r31,r4 li r25,0 /* phys kernel start (low) */ li r24,0 /* CPU number */ @@ -857,6 +840,33 @@ KernelSPE: #endif /* CONFIG_SPE */ +/* + * Translate the effec addr in r3 to phys addr. The phys addr will be put + * into r3(higher 32bit) and r4(lower 32bit) + */ +get_phys_addr: + mfmsr r8 + mfspr r9,SPRN_PID + rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */ + rlwimi r9,r8,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */ + mtspr SPRN_MAS6,r9 + + tlbsx 0,r3 /* must succeed */ + + mfspr r8,SPRN_MAS1 + mfspr r12,SPRN_MAS3 + rlwinm r9,r8,25,0x1f /* r9 = log2(page size) */ + li r10,1024 + slw r10,r10,r9 /* r10 = page size */ + addi r10,r10,-1 + and r11,r3,r10 /* r11 = page offset */ + andc r4,r12,r10 /* r4 = page base */ + or r4,r4,r11 /* r4 = devtree phys addr */ +#ifdef CONFIG_PHYS_64BIT + mfspr r3,SPRN_MAS7 +#endif + blr + /* * Global functions */ -- cgit v1.2.3 From dd189692d40948d6445bbaeb8cb9bf9d15f54dc6 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:06 +0800 Subject: powerpc: enable the relocatable support for the fsl booke 32bit kernel This is based on the codes in the head_44x.S. The difference is that the init tlb size we used is 64M. With this patch we can only load the kernel at address between memstart_addr ~ memstart_addr + 64M. We will fix this restriction in the following patches. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/fsl_booke_entry_mapping.S | 2 ++ arch/powerpc/kernel/head_fsl_booke.S | 34 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index a92c79be2728..f22e7e44fbf3 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -176,6 +176,8 @@ skpinv: addi r6,r6,1 /* Increment */ /* 7. Jump to KERNELBASE mapping */ lis r6,(KERNELBASE & ~0xfff)@h ori r6,r6,(KERNELBASE & ~0xfff)@l + rlwinm r7,r25,0,0x03ffffff + add r6,r7,r6 #elif defined(ENTRY_MAPPING_KEXEC_SETUP) /* diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 196950f29c00..19bd574bda9d 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -73,6 +73,30 @@ _ENTRY(_start); li r24,0 /* CPU number */ li r23,0 /* phys kernel start (high) */ +#ifdef CONFIG_RELOCATABLE + LOAD_REG_ADDR_PIC(r3, _stext) /* Get our current runtime base */ + + /* Translate _stext address to physical, save in r23/r25 */ + bl get_phys_addr + mr r23,r3 + mr r25,r4 + + /* + * We have the runtime (virutal) address of our base. + * We calculate our shift of offset from a 64M page. + * We could map the 64M page we belong to at PAGE_OFFSET and + * get going from there. + */ + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + rlwinm r6,r25,0,0x3ffffff /* r6 = PHYS_START % 64M */ + rlwinm r5,r4,0,0x3ffffff /* r5 = KERNELBASE % 64M */ + subf r3,r5,r6 /* r3 = r6 - r5 */ + add r3,r4,r3 /* Required Virtual Address */ + + bl relocate +#endif + /* We try to not make any assumptions about how the boot loader * setup or used the TLBs. We invalidate all mappings from the * boot loader and load a single entry in TLB1[0] to map the @@ -182,6 +206,16 @@ _ENTRY(__early_start) bl early_init +#ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_PHYS_64BIT + mr r3,r23 + mr r4,r25 +#else + mr r3,r25 +#endif + bl relocate_init +#endif + #ifdef CONFIG_DYNAMIC_MEMSTART lis r3,kernstart_addr@ha la r3,kernstart_addr@l(r3) -- cgit v1.2.3 From 78a235efdc42ff363de81fdbc171385e8b86b69b Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:07 +0800 Subject: powerpc/fsl_booke: set the tlb entry for the kernel address in AS1 We use the tlb1 entries to map low mem to the kernel space. In the current code, it assumes that the first tlb entry would cover the kernel image. But this is not true for some special cases, such as when we run a relocatable kernel above the 64M or set CONFIG_KERNEL_START above 64M. So we choose to switch to address space 1 before setting these tlb entries. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 81 ++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 19bd574bda9d..75f0223e6d0d 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1156,6 +1156,87 @@ __secondary_hold_acknowledge: .long -1 #endif +/* + * Create a tlb entry with the same effective and physical address as + * the tlb entry used by the current running code. But set the TS to 1. + * Then switch to the address space 1. It will return with the r3 set to + * the ESEL of the new created tlb. + */ +_GLOBAL(switch_to_as1) + mflr r5 + + /* Find a entry not used */ + mfspr r3,SPRN_TLB1CFG + andi. r3,r3,0xfff + mfspr r4,SPRN_PID + rlwinm r4,r4,16,0x3fff0000 /* turn PID into MAS6[SPID] */ + mtspr SPRN_MAS6,r4 +1: lis r4,0x1000 /* Set MAS0(TLBSEL) = 1 */ + addi r3,r3,-1 + rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r4 + tlbre + mfspr r4,SPRN_MAS1 + andis. r4,r4,MAS1_VALID@h + bne 1b + + /* Get the tlb entry used by the current running code */ + bl 0f +0: mflr r4 + tlbsx 0,r4 + + mfspr r4,SPRN_MAS1 + ori r4,r4,MAS1_TS /* Set the TS = 1 */ + mtspr SPRN_MAS1,r4 + + mfspr r4,SPRN_MAS0 + rlwinm r4,r4,0,~MAS0_ESEL_MASK + rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r4 + tlbwe + isync + sync + + mfmsr r4 + ori r4,r4,MSR_IS | MSR_DS + mtspr SPRN_SRR0,r5 + mtspr SPRN_SRR1,r4 + sync + rfi + +/* + * Restore to the address space 0 and also invalidate the tlb entry created + * by switch_to_as1. +*/ +_GLOBAL(restore_to_as0) + mflr r0 + + bl 0f +0: mflr r9 + addi r9,r9,1f - 0b + + mfmsr r7 + li r8,(MSR_IS | MSR_DS) + andc r7,r7,r8 + + mtspr SPRN_SRR0,r9 + mtspr SPRN_SRR1,r7 + sync + rfi + + /* Invalidate the temporary tlb entry for AS1 */ +1: lis r9,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r9,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r9 + tlbre + mfspr r9,SPRN_MAS1 + rlwinm r9,r9,0,2,31 /* Clear MAS1 Valid and IPPROT */ + mtspr SPRN_MAS1,r9 + tlbwe + isync + mtlr r0 + blr + /* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. -- cgit v1.2.3 From b27652dd2174df1a7e0a7c5f00d1c8e3ed9287a7 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:08 +0800 Subject: powerpc: introduce early_get_first_memblock_info For a relocatable kernel since it can be loaded at any place, there is no any relation between the kernel start addr and the memstart_addr. So we can't calculate the memstart_addr from kernel start addr. And also we can't wait to do the relocation after we get the real memstart_addr from device tree because it is so late. So introduce a new function we can use to get the first memblock address and size in a very early stage (before machine_init). Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/prom.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fa0ad8aafbcc..f58c0d3aaeb4 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -523,6 +523,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, return early_init_dt_scan_memory(node, uname, depth, data); } +/* + * For a relocatable kernel, we need to get the memstart_addr first, + * then use it to calculate the virtual kernel start address. This has + * to happen at a very early stage (before machine_init). In this case, + * we just want to get the memstart_address and would not like to mess the + * memblock at this stage. So introduce a variable to skip the memblock_add() + * for this reason. + */ +#ifdef CONFIG_RELOCATABLE +static int add_mem_to_memblock = 1; +#else +#define add_mem_to_memblock 1 +#endif + void __init early_init_dt_add_memory_arch(u64 base, u64 size) { #ifdef CONFIG_PPC64 @@ -543,7 +557,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } /* Add the chunk to the MEMBLOCK list */ - memblock_add(base, size); + if (add_mem_to_memblock) + memblock_add(base, size); } static void __init early_reserve_mem_dt(void) @@ -740,6 +755,30 @@ void __init early_init_devtree(void *params) DBG(" <- early_init_devtree()\n"); } +#ifdef CONFIG_RELOCATABLE +/* + * This function run before early_init_devtree, so we have to init + * initial_boot_params. + */ +void __init early_get_first_memblock_info(void *params, phys_addr_t *size) +{ + /* Setup flat device-tree pointer */ + initial_boot_params = params; + + /* + * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid + * mess the memblock. + */ + add_mem_to_memblock = 0; + of_scan_flat_dt(early_init_dt_scan_root, NULL); + of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + add_mem_to_memblock = 1; + + if (size) + *size = first_memblock_size; +} +#endif + /******* * * New implementation of the OF "find" APIs, return a refcounted -- cgit v1.2.3 From 7d2471f9fa85089beb1cb9436ffc28f9e11e518d Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:10 +0800 Subject: powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for relocatable kernel This is always true for a non-relocatable kernel. Otherwise the kernel would get stuck. But for a relocatable kernel, it seems a little complicated. When booting a relocatable kernel, we just align the kernel start addr to 64M and map the PAGE_OFFSET from there. The relocation will base on this virtual address. But if this address is not the same as the memstart_addr, we will have to change the map of PAGE_OFFSET to the real memstart_addr and do another relocation again. Signed-off-by: Kevin Hao [scottwood@freescale.com: make offset long and non-negative in simple case] Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 74 +++++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 75f0223e6d0d..b1f7edc3c360 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -81,6 +81,39 @@ _ENTRY(_start); mr r23,r3 mr r25,r4 + bl 0f +0: mflr r8 + addis r3,r8,(is_second_reloc - 0b)@ha + lwz r19,(is_second_reloc - 0b)@l(r3) + + /* Check if this is the second relocation. */ + cmpwi r19,1 + bne 1f + + /* + * For the second relocation, we already get the real memstart_addr + * from device tree. So we will map PAGE_OFFSET to memstart_addr, + * then the virtual address of start kernel should be: + * PAGE_OFFSET + (kernstart_addr - memstart_addr) + * Since the offset between kernstart_addr and memstart_addr should + * never be beyond 1G, so we can just use the lower 32bit of them + * for the calculation. + */ + lis r3,PAGE_OFFSET@h + + addis r4,r8,(kernstart_addr - 0b)@ha + addi r4,r4,(kernstart_addr - 0b)@l + lwz r5,4(r4) + + addis r6,r8,(memstart_addr - 0b)@ha + addi r6,r6,(memstart_addr - 0b)@l + lwz r7,4(r6) + + subf r5,r7,r5 + add r3,r3,r5 + b 2f + +1: /* * We have the runtime (virutal) address of our base. * We calculate our shift of offset from a 64M page. @@ -94,7 +127,14 @@ _ENTRY(_start); subf r3,r5,r6 /* r3 = r6 - r5 */ add r3,r4,r3 /* Required Virtual Address */ - bl relocate +2: bl relocate + + /* + * For the second relocation, we already set the right tlb entries + * for the kernel space, so skip the code in fsl_booke_entry_mapping.S + */ + cmpwi r19,1 + beq set_ivor #endif /* We try to not make any assumptions about how the boot loader @@ -122,6 +162,7 @@ _ENTRY(__early_start) #include "fsl_booke_entry_mapping.S" #undef ENTRY_MAPPING_BOOT_SETUP +set_ivor: /* Establish the interrupt vector offsets */ SET_IVOR(0, CriticalInput); SET_IVOR(1, MachineCheck); @@ -207,11 +248,13 @@ _ENTRY(__early_start) bl early_init #ifdef CONFIG_RELOCATABLE + mr r3,r30 + mr r4,r31 #ifdef CONFIG_PHYS_64BIT - mr r3,r23 - mr r4,r25 + mr r5,r23 + mr r6,r25 #else - mr r3,r25 + mr r5,r25 #endif bl relocate_init #endif @@ -1207,6 +1250,9 @@ _GLOBAL(switch_to_as1) /* * Restore to the address space 0 and also invalidate the tlb entry created * by switch_to_as1. + * r3 - the tlb entry which should be invalidated + * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0) + * r5 - device tree virtual address. If r4 is 0, r5 is ignored. */ _GLOBAL(restore_to_as0) mflr r0 @@ -1215,7 +1261,15 @@ _GLOBAL(restore_to_as0) 0: mflr r9 addi r9,r9,1f - 0b - mfmsr r7 + /* + * We may map the PAGE_OFFSET in AS0 to a different physical address, + * so we need calculate the right jump and device tree address based + * on the offset passed by r4. + */ + add r9,r9,r4 + add r5,r5,r4 + +2: mfmsr r7 li r8,(MSR_IS | MSR_DS) andc r7,r7,r8 @@ -1234,9 +1288,19 @@ _GLOBAL(restore_to_as0) mtspr SPRN_MAS1,r9 tlbwe isync + + cmpwi r4,0 + bne 3f mtlr r0 blr + /* + * The PAGE_OFFSET will map to a different physical address, + * jump to _start to do another relocation again. + */ +3: mr r3,r5 + bl _start + /* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. -- cgit v1.2.3 From 0be7d969b0efef085ed6497d462ba16a875ca737 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:11 +0800 Subject: powerpc/fsl_booke: smp support for booting a relocatable kernel above 64M When booting above the 64M for a secondary cpu, we also face the same issue as the boot cpu that the PAGE_OFFSET map two different physical address for the init tlb and the final map. So we have to use switch_to_as1/restore_to_as0 between the conversion of these two maps. When restoring to as0 for a secondary cpu, we only need to return to the caller. So add a new parameter for function restore_to_as0 for this purpose. Use LOAD_REG_ADDR_PIC to get the address of variables which may be used before we set the final map in cams for the secondary cpu. Move the setting of cams a bit earlier in order to avoid the unnecessary using of LOAD_REG_ADDR_PIC. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_fsl_booke.S | 41 ++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b1f7edc3c360..b497188a94a1 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -216,8 +216,7 @@ set_ivor: /* Check to see if we're the second processor, and jump * to the secondary_start code if so */ - lis r24, boot_cpuid@h - ori r24, r24, boot_cpuid@l + LOAD_REG_ADDR_PIC(r24, boot_cpuid) lwz r24, 0(r24) cmpwi r24, -1 mfspr r24,SPRN_PIR @@ -1146,24 +1145,36 @@ _GLOBAL(__flush_disable_L1) /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start __secondary_start: - lis r3,__secondary_hold_acknowledge@h - ori r3,r3,__secondary_hold_acknowledge@l - stw r24,0(r3) - - li r3,0 - mr r4,r24 /* Why? */ - bl call_setup_cpu - - lis r3,tlbcam_index@ha - lwz r3,tlbcam_index@l(r3) + LOAD_REG_ADDR_PIC(r3, tlbcam_index) + lwz r3,0(r3) mtctr r3 li r26,0 /* r26 safe? */ + bl switch_to_as1 + mr r27,r3 /* tlb entry */ /* Load each CAM entry */ 1: mr r3,r26 bl loadcam_entry addi r26,r26,1 bdnz 1b + mr r3,r27 /* tlb entry */ + LOAD_REG_ADDR_PIC(r4, memstart_addr) + lwz r4,0(r4) + mr r5,r25 /* phys kernel start */ + rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */ + subf r4,r5,r4 /* memstart_addr - phys kernel start */ + li r5,0 /* no device tree */ + li r6,0 /* not boot cpu */ + bl restore_to_as0 + + + lis r3,__secondary_hold_acknowledge@h + ori r3,r3,__secondary_hold_acknowledge@l + stw r24,0(r3) + + li r3,0 + mr r4,r24 /* Why? */ + bl call_setup_cpu /* get current_thread_info and current */ lis r1,secondary_ti@ha @@ -1253,6 +1264,7 @@ _GLOBAL(switch_to_as1) * r3 - the tlb entry which should be invalidated * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0) * r5 - device tree virtual address. If r4 is 0, r5 is ignored. + * r6 - boot cpu */ _GLOBAL(restore_to_as0) mflr r0 @@ -1268,6 +1280,7 @@ _GLOBAL(restore_to_as0) */ add r9,r9,r4 add r5,r5,r4 + add r0,r0,r4 2: mfmsr r7 li r8,(MSR_IS | MSR_DS) @@ -1290,7 +1303,9 @@ _GLOBAL(restore_to_as0) isync cmpwi r4,0 - bne 3f + cmpwi cr1,r6,0 + cror eq,4*cr1+eq,eq + bne 3f /* offset != 0 && is_boot_cpu */ mtlr r0 blr -- cgit v1.2.3 From 28efc35fe68dacbddc4b12c2fa8f2df1593a4ad3 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 11 Oct 2013 19:22:38 -0500 Subject: powerpc/e6500: TLB miss handler with hardware tablewalk support There are a few things that make the existing hw tablewalk handlers unsuitable for e6500: - Indirect entries go in TLB1 (though the resulting direct entries go in TLB0). - It has threads, but no "tlbsrx." -- so we need a spinlock and a normal "tlbsx". Because we need this lock, hardware tablewalk is mandatory on e6500 unless we want to add spinlock+tlbsx to the normal bolted TLB miss handler. - TLB1 has no HES (nor next-victim hint) so we need software round robin (TODO: integrate this round robin data with hugetlb/KVM) - The existing tablewalk handlers map half of a page table at a time, because IBM hardware has a fixed 1MiB indirect page size. e6500 has variable size indirect entries, with a minimum of 2MiB. So we can't do the half-page indirect mapping, and even if we could it would be less efficient than mapping the full page. - Like on e5500, the linear mapping is bolted, so we don't need the overhead of supporting nested tlb misses. Note that hardware tablewalk does not work in rev1 of e6500. We do not expect to support e6500 rev1 in mainline Linux. Signed-off-by: Scott Wood Cc: Mihai Caraman --- arch/powerpc/kernel/asm-offsets.c | 9 +++++++++ arch/powerpc/kernel/paca.c | 5 +++++ arch/powerpc/kernel/setup_64.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 41a283956a29..ed8d68ce71f3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -203,6 +203,15 @@ int main(void) DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack)); DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack)); DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); + DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr)); + + DEFINE(TCD_ESEL_NEXT, + offsetof(struct tlb_core_data, esel_next)); + DEFINE(TCD_ESEL_MAX, + offsetof(struct tlb_core_data, esel_max)); + DEFINE(TCD_ESEL_FIRST, + offsetof(struct tlb_core_data, esel_first)); + DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock)); #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_STD_MMU_64 diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 623c356fe34f..bf0aada02fe4 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -160,6 +160,11 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #ifdef CONFIG_PPC_STD_MMU_64 new_paca->slb_shadow_ptr = init_slb_shadow(cpu); #endif /* CONFIG_PPC_STD_MMU_64 */ + +#ifdef CONFIG_PPC_BOOK3E + /* For now -- if we have threads this will be adjusted later */ + new_paca->tcd_ptr = &new_paca->tcd; +#endif } /* Put the paca pointer into r13 and SPRG_PACA */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2232aff66059..1ce9b87d7df8 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -97,6 +97,36 @@ int dcache_bsize; int icache_bsize; int ucache_bsize; +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +static void setup_tlb_core_data(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + int first = cpu_first_thread_sibling(cpu); + + paca[cpu].tcd_ptr = &paca[first].tcd; + + /* + * If we have threads, we need either tlbsrx. + * or e6500 tablewalk mode, or else TLB handlers + * will be racy and could produce duplicate entries. + */ + if (smt_enabled_at_boot >= 2 && + !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && + book3e_htw_mode != PPC_HTW_E6500) { + /* Should we panic instead? */ + WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", + __func__); + } + } +} +#else +static void setup_tlb_core_data(void) +{ +} +#endif + #ifdef CONFIG_SMP static char *smt_enabled_cmdline; @@ -445,6 +475,7 @@ void __init setup_system(void) smp_setup_cpu_maps(); check_smt_enabled(); + setup_tlb_core_data(); #ifdef CONFIG_SMP /* Release secondary cpus out of their spinloops at 0x60 now that -- cgit v1.2.3 From ed2ddc56e758d516c5699260ada4d68434dfe1dc Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Thu, 14 Mar 2013 16:55:11 +0200 Subject: powerpc: Replaced tlbilx with tlbwe in the initialization code On Freescale e6500 cores EPCR[DGTMI] controls whether guest supervisor state can execute TLB management instructions. If EPCR[DGTMI]=0 tlbwe and tlbilx are allowed to execute normally in the guest state. A hypervisor may choose to virtualize TLB1 and for this purpose it may use IPROT to protect the entries for being invalidated by the guest. However, because tlbwe and tlbilx execution in the guest state are sharing the same bit, it is not possible to have a scenario where tlbwe is allowed to be executed in guest state and tlbilx traps. When guest TLB management instructions are allowed to be executed in guest state the guest cannot use tlbilx to invalidate TLB1 guest entries. Linux is using tlbilx in the boot code to invalidate the temporary entries it creates when initializing the MMU. The patch is replacing the usage of tlbilx in initialization code with tlbwe with VALID bit cleared. Linux is also using tlbilx in other contexts (like huge pages or indirect entries) but removing the tlbilx from the initialization code offers the possibility to have scenarios under hypervisor which are not using huge pages or indirect entries. Signed-off-by: Diana Craciun Signed-off-by: Scott Wood --- arch/powerpc/kernel/exceptions-64e.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4d5a0b1034e8..063b65dd4f27 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1068,12 +1068,9 @@ skpinv: addi r6,r6,1 /* Increment */ mtspr SPRN_MAS0,r3 tlbre mfspr r6,SPRN_MAS1 - rlwinm r6,r6,0,2,0 /* clear IPROT */ + rlwinm r6,r6,0,2,31 /* clear IPROT and VALID */ mtspr SPRN_MAS1,r6 tlbwe - - /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,R0) sync isync @@ -1127,12 +1124,9 @@ skpinv: addi r6,r6,1 /* Increment */ mtspr SPRN_MAS0,r4 tlbre mfspr r5,SPRN_MAS1 - rlwinm r5,r5,0,2,0 /* clear IPROT */ + rlwinm r5,r5,0,2,31 /* clear IPROT and VALID */ mtspr SPRN_MAS1,r5 tlbwe - - /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,R0) sync isync -- cgit v1.2.3