From 228b1a473037c89d524e03a569c688a22241b4ea Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 8 Aug 2013 15:56:09 +0300
Subject: powerpc/booke64: Add LRAT error exception handler

LRAT (Logical to Real Address Translation) present in MMU v2 provides hardware
translation from a logical page number (LPN) to a real page number (RPN) when
tlbwe is executed by a guest or when a page table translation occurs from a
guest virtual address.

Add LRAT error exception handler to Booke3E 64-bit kernel and the basic KVM
handler to avoid build breakage. This is a prerequisite for KVM LRAT support
that will follow.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/cpu_setup_fsl_booke.S | 12 ++++++++++++
 arch/powerpc/kernel/exceptions-64e.S      | 17 +++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index bfb18c7290b7..fa6862db8a02 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -57,6 +57,12 @@ _GLOBAL(__setup_cpu_e6500)
 	mflr	r6
 #ifdef CONFIG_PPC64
 	bl	.setup_altivec_ivors
+	/* Touch IVOR42 only if the CPU supports E.HV category */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beq	1f
+	bl	.setup_lrat_ivor
+1:
 #endif
 	bl	__setup_cpu_e5500
 	mtlr	r6
@@ -119,6 +125,12 @@ _GLOBAL(__setup_cpu_e5500)
 _GLOBAL(__restore_cpu_e6500)
 	mflr	r5
 	bl	.setup_altivec_ivors
+	/* Touch IVOR42 only if the CPU supports E.HV category */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beq	1f
+	bl	.setup_lrat_ivor
+1:
 	bl	__restore_cpu_e5500
 	mtlr	r5
 	blr
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e7751561fd1d..4d5a0b1034e8 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -308,6 +308,7 @@ interrupt_base_book3e:					/* fake trap */
 	EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
 	EXCEPTION_STUB(0x300, hypercall)
 	EXCEPTION_STUB(0x320, ehpriv)
+	EXCEPTION_STUB(0x340, lrat_error)
 
 	.globl interrupt_end_book3e
 interrupt_end_book3e:
@@ -677,6 +678,17 @@ kernel_dbg_exc:
 	bl	.unknown_exception
 	b	.ret_from_except
 
+/* LRAT Error interrupt */
+	START_EXCEPTION(lrat_error);
+	NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
+			        PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0x340, PACA_EXGEN, INTS_KEEP)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.save_nvgprs
+	INTS_RESTORE_HARD
+	bl	.unknown_exception
+	b	.ret_from_except
+
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
  * accordingly and if the interrupt is level sensitive, we hard disable
@@ -859,6 +871,7 @@ BAD_STACK_TRAMPOLINE(0x2e0)
 BAD_STACK_TRAMPOLINE(0x300)
 BAD_STACK_TRAMPOLINE(0x310)
 BAD_STACK_TRAMPOLINE(0x320)
+BAD_STACK_TRAMPOLINE(0x340)
 BAD_STACK_TRAMPOLINE(0x400)
 BAD_STACK_TRAMPOLINE(0x500)
 BAD_STACK_TRAMPOLINE(0x600)
@@ -1414,3 +1427,7 @@ _GLOBAL(setup_ehv_ivors)
 	SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
 	SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
 	blr
+
+_GLOBAL(setup_lrat_ivor)
+	SET_IVOR(42, 0x340) /* LRAT Error */
+	blr
-- 
cgit v1.2.3


From 640e922501103aaf2e0abb4cf4de5d49fa8342f7 Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Tue, 10 Dec 2013 23:07:45 +0000
Subject: powerpc: fix exception clearing in e500 SPE float emulation

The e500 SPE floating-point emulation code clears existing exceptions
(__FPU_FPSCR &= ~FP_EX_MASK;) before ORing in the exceptions from the
emulated operation.  However, these exception bits are the "sticky",
cumulative exception bits, and should only be cleared by the user
program setting SPEFSCR, not implicitly by any floating-point
instruction (whether executed purely by the hardware or emulated).
The spurious clearing of these bits shows up as missing exceptions in
glibc testing.

Fixing this, however, is not as simple as just not clearing the bits,
because while the bits may be from previous floating-point operations
(in which case they should not be cleared), the processor can also set
the sticky bits itself before the interrupt for an exception occurs,
and this can happen in cases when IEEE 754 semantics are that the
sticky bit should not be set.  Specifically, the "invalid" sticky bit
is set in various cases with non-finite operands, where IEEE 754
semantics do not involve raising such an exception, and the
"underflow" sticky bit is set in cases of exact underflow, whereas
IEEE 754 semantics are that this flag is set only for inexact
underflow.  Thus, for correct emulation the kernel needs to know the
setting of these two sticky bits before the instruction being
emulated.

When a floating-point operation raises an exception, the kernel can
note the state of the sticky bits immediately afterwards.  Some
<fenv.h> functions that affect the state of these bits, such as
fesetenv and feholdexcept, need to use prctl with PR_GET_FPEXC and
PR_SET_FPEXC anyway, and so it is natural to record the state of those
bits during that call into the kernel and so avoid any need for a
separate call into the kernel to inform it of a change to those bits.
Thus, the interface I chose to use (in this patch and the glibc port)
is that one of those prctl calls must be made after any userspace
change to those sticky bits, other than through a floating-point
operation that traps into the kernel anyway.  feclearexcept and
fesetexceptflag duly make those calls, which would not be required
were it not for this issue.

The previous EGLIBC port, and the uClibc code copied from it, is
fundamentally broken as regards any use of prctl for floating-point
exceptions because it didn't use the PR_FP_EXC_SW_ENABLE bit in its
prctl calls (and did various worse things, such as passing a pointer
when prctl expected an integer).  If you avoid anything where prctl is
used, the clearing of sticky bits still means it will never give
anything approximating correct exception semantics with existing
kernels.  I don't believe the patch makes things any worse for
existing code that doesn't try to inform the kernel of changes to
sticky bits - such code may get incorrect exceptions in some cases,
but it would have done so anyway in other cases.

Signed-off-by: Joseph Myers <joseph@codesourcery.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/process.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3386d8ab7eb0..b08c0d03530f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1175,6 +1175,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
 	if (val & PR_FP_EXC_SW_ENABLE) {
 #ifdef CONFIG_SPE
 		if (cpu_has_feature(CPU_FTR_SPE)) {
+			/*
+			 * When the sticky exception bits are set
+			 * directly by userspace, it must call prctl
+			 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
+			 * in the existing prctl settings) or
+			 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
+			 * the bits being set).  <fenv.h> functions
+			 * saving and restoring the whole
+			 * floating-point environment need to do so
+			 * anyway to restore the prctl settings from
+			 * the saved environment.
+			 */
+			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
 			tsk->thread.fpexc_mode = val &
 				(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
 			return 0;
@@ -1206,9 +1219,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
 
 	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
 #ifdef CONFIG_SPE
-		if (cpu_has_feature(CPU_FTR_SPE))
+		if (cpu_has_feature(CPU_FTR_SPE)) {
+			/*
+			 * When the sticky exception bits are set
+			 * directly by userspace, it must call prctl
+			 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
+			 * in the existing prctl settings) or
+			 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
+			 * the bits being set).  <fenv.h> functions
+			 * saving and restoring the whole
+			 * floating-point environment need to do so
+			 * anyway to restore the prctl settings from
+			 * the saved environment.
+			 */
+			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
 			val = tsk->thread.fpexc_mode;
-		else
+		} else
 			return -EINVAL;
 #else
 		return -EINVAL;
-- 
cgit v1.2.3


From b58a7bd6df7b61446b833a7c72f8a1f11066e0b0 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 2 Jan 2014 16:37:50 -0600
Subject: powerpc/fsl-booke: Use SPRN_SPRGn rather than mfsprg/mtsprg

This fixes a build break that was probably introduced with the removal
of -Wa,-me500 (commit f49596a4cf4753d13951608f24f939a59fdcc653), where
the assembler refuses to recognize SPRG4-7 with a generic PPC target.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Cc: Dongsheng Wang <dongsheng.wang@freescale.com>
Cc: Anton Vorontsov <avorontsov@mvista.com>
Reviewed-by: Wang Dongsheng <dongsheng.wang@freescale.com>
Tested-by: Wang Dongsheng <dongsheng.wang@freescale.com>
---
 arch/powerpc/kernel/swsusp_booke.S | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S
index 0f204053e5b5..553c1405ee05 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_booke.S
@@ -74,21 +74,21 @@ _GLOBAL(swsusp_arch_suspend)
 	bne	1b
 
 	/* Save SPRGs */
-	mfsprg	r4,0
+	mfspr	r4,SPRN_SPRG0
 	stw	r4,SL_SPRG0(r11)
-	mfsprg	r4,1
+	mfspr	r4,SPRN_SPRG1
 	stw	r4,SL_SPRG1(r11)
-	mfsprg	r4,2
+	mfspr	r4,SPRN_SPRG2
 	stw	r4,SL_SPRG2(r11)
-	mfsprg	r4,3
+	mfspr	r4,SPRN_SPRG3
 	stw	r4,SL_SPRG3(r11)
-	mfsprg	r4,4
+	mfspr	r4,SPRN_SPRG4
 	stw	r4,SL_SPRG4(r11)
-	mfsprg	r4,5
+	mfspr	r4,SPRN_SPRG5
 	stw	r4,SL_SPRG5(r11)
-	mfsprg	r4,6
+	mfspr	r4,SPRN_SPRG6
 	stw	r4,SL_SPRG6(r11)
-	mfsprg	r4,7
+	mfspr	r4,SPRN_SPRG7
 	stw	r4,SL_SPRG7(r11)
 
 	/* Call the low level suspend stuff (we should probably have made
@@ -150,21 +150,21 @@ _GLOBAL(swsusp_arch_resume)
 	bl	_tlbil_all
 
 	lwz	r4,SL_SPRG0(r11)
-	mtsprg	0,r4
+	mtspr	SPRN_SPRG0,r4
 	lwz	r4,SL_SPRG1(r11)
-	mtsprg	1,r4
+	mtspr	SPRN_SPRG1,r4
 	lwz	r4,SL_SPRG2(r11)
-	mtsprg	2,r4
+	mtspr	SPRN_SPRG2,r4
 	lwz	r4,SL_SPRG3(r11)
-	mtsprg	3,r4
+	mtspr	SPRN_SPRG3,r4
 	lwz	r4,SL_SPRG4(r11)
-	mtsprg	4,r4
+	mtspr	SPRN_SPRG4,r4
 	lwz	r4,SL_SPRG5(r11)
-	mtsprg	5,r4
+	mtspr	SPRN_SPRG5,r4
 	lwz	r4,SL_SPRG6(r11)
-	mtsprg	6,r4
+	mtspr	SPRN_SPRG6,r4
 	lwz	r4,SL_SPRG7(r11)
-	mtsprg	7,r4
+	mtspr	SPRN_SPRG7,r4
 
 	/* restore the MSR */
 	lwz	r3,SL_MSR(r11)
-- 
cgit v1.2.3


From 202e059ce34d5c5e3ff8a542866c280d575ccb17 Mon Sep 17 00:00:00 2001
From: Wang Dongsheng <dongsheng.wang@freescale.com>
Date: Tue, 17 Dec 2013 16:17:00 +0800
Subject: powerpc/85xx: add hardware automatically enter altivec idle state

Each core's AltiVec unit may be placed into a power savings mode
by turning off power to the unit. Core hardware will automatically
power down the AltiVec unit after no AltiVec instructions have
executed in N cycles. The AltiVec power-control is triggered by hardware.

Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index fa6862db8a02..26c09db2ec20 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -53,6 +53,25 @@ _GLOBAL(__e500_dcache_setup)
 	isync
 	blr
 
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for AV_WAIT_IDLE_BIT.
+ */
+#define AV_WAIT_IDLE_BIT		50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_altivec_idle)
+	mfspr	r3, SPRN_PWRMGTCR0
+
+	/* Enable Altivec Idle */
+	oris	r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h
+	li	r11, AV_WAIT_IDLE_BIT
+
+	/* Set Automatic AltiVec Idle Count */
+	rlwimi	r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT
+
+	mtspr	SPRN_PWRMGTCR0, r3
+
+	blr
+
 _GLOBAL(__setup_cpu_e6500)
 	mflr	r6
 #ifdef CONFIG_PPC64
@@ -64,6 +83,7 @@ _GLOBAL(__setup_cpu_e6500)
 	bl	.setup_lrat_ivor
 1:
 #endif
+	bl	setup_altivec_idle
 	bl	__setup_cpu_e5500
 	mtlr	r6
 	blr
@@ -131,6 +151,7 @@ _GLOBAL(__restore_cpu_e6500)
 	beq	1f
 	bl	.setup_lrat_ivor
 1:
+	bl	.setup_altivec_idle
 	bl	__restore_cpu_e5500
 	mtlr	r5
 	blr
-- 
cgit v1.2.3


From 1d47ddf7c3725e889763b1fffa70a04e1061940b Mon Sep 17 00:00:00 2001
From: Wang Dongsheng <dongsheng.wang@freescale.com>
Date: Tue, 17 Dec 2013 16:17:01 +0800
Subject: powerpc/85xx: add hardware automatically enter pw20 state

Using hardware features make core automatically enter PW20 state.
Set a TB count to hardware, the effective count begins when PW10
is entered. When the effective period has expired, the core will
proceed from PW10 to PW20 if no exit conditions have occurred during
the period.

Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 26c09db2ec20..cc2d8962e090 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -53,6 +53,25 @@ _GLOBAL(__e500_dcache_setup)
 	isync
 	blr
 
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for PW20_WAIT_IDLE_BIT.
+ */
+#define PW20_WAIT_IDLE_BIT		50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_pw20_idle)
+	mfspr	r3, SPRN_PWRMGTCR0
+
+	/* Set PW20_WAIT bit, enable pw20 state*/
+	ori	r3, r3, PWRMGTCR0_PW20_WAIT
+	li	r11, PW20_WAIT_IDLE_BIT
+
+	/* Set Automatic PW20 Core Idle Count */
+	rlwimi	r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT
+
+	mtspr	SPRN_PWRMGTCR0, r3
+
+	blr
+
 /*
  * FIXME - we haven't yet done testing to determine a reasonable default
  * value for AV_WAIT_IDLE_BIT.
@@ -83,6 +102,7 @@ _GLOBAL(__setup_cpu_e6500)
 	bl	.setup_lrat_ivor
 1:
 #endif
+	bl	setup_pw20_idle
 	bl	setup_altivec_idle
 	bl	__setup_cpu_e5500
 	mtlr	r6
@@ -151,6 +171,7 @@ _GLOBAL(__restore_cpu_e6500)
 	beq	1f
 	bl	.setup_lrat_ivor
 1:
+	bl	.setup_pw20_idle
 	bl	.setup_altivec_idle
 	bl	__restore_cpu_e5500
 	mtlr	r5
-- 
cgit v1.2.3


From a7189483f03d4c4b93219ff27a2e0a01716abd21 Mon Sep 17 00:00:00 2001
From: Wang Dongsheng <dongsheng.wang@freescale.com>
Date: Tue, 17 Dec 2013 16:17:02 +0800
Subject: powerpc/85xx: add sysfs for pw20 state and altivec idle

Add a sys interface to enable/diable pw20 state or altivec idle, and
control the wait entry time.

Enable/Disable interface:
    0, disable. 1, enable.
    /sys/devices/system/cpu/cpuX/pw20_state
    /sys/devices/system/cpu/cpuX/altivec_idle

Set wait time interface:(Nanosecond)
    /sys/devices/system/cpu/cpuX/pw20_wait_time
    /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
Example: Base on TBfreq is 41MHZ.
    1~48(ns): TB[63]
    49~97(ns): TB[62]
    98~195(ns): TB[61]
    196~390(ns): TB[60]
    391~780(ns): TB[59]
    781~1560(ns): TB[58]
    ...

Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
[scottwood@freescale.com: change ifdef]
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/sysfs.c | 316 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 316 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index cad777eb613a..d4a43e64a6a9 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -86,6 +86,304 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay);
 
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#define MAX_BIT				63
+
+static u64 pw20_wt;
+static u64 altivec_idle_wt;
+
+static unsigned int get_idle_ticks_bit(u64 ns)
+{
+	u64 cycle;
+
+	if (ns >= 10000)
+		cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
+	else
+		cycle = div_u64(ns * tb_ticks_per_usec, 1000);
+
+	if (!cycle)
+		return 0;
+
+	return ilog2(cycle);
+}
+
+static void do_show_pwrmgtcr0(void *val)
+{
+	u32 *value = val;
+
+	*value = mfspr(SPRN_PWRMGTCR0);
+}
+
+static ssize_t show_pw20_state(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+
+	value &= PWRMGTCR0_PW20_WAIT;
+
+	return sprintf(buf, "%u\n", value ? 1 : 0);
+}
+
+static void do_store_pw20_state(void *val)
+{
+	u32 *value = val;
+	u32 pw20_state;
+
+	pw20_state = mfspr(SPRN_PWRMGTCR0);
+
+	if (*value)
+		pw20_state |= PWRMGTCR0_PW20_WAIT;
+	else
+		pw20_state &= ~PWRMGTCR0_PW20_WAIT;
+
+	mtspr(SPRN_PWRMGTCR0, pw20_state);
+}
+
+static ssize_t store_pw20_state(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	if (kstrtou32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value > 1)
+		return -EINVAL;
+
+	smp_call_function_single(cpu, do_store_pw20_state, &value, 1);
+
+	return count;
+}
+
+static ssize_t show_pw20_wait_time(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	u64 tb_cycle = 1;
+	u64 time;
+
+	unsigned int cpu = dev->id;
+
+	if (!pw20_wt) {
+		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+		value = (value & PWRMGTCR0_PW20_ENT) >>
+					PWRMGTCR0_PW20_ENT_SHIFT;
+
+		tb_cycle = (tb_cycle << (MAX_BIT - value + 1));
+		/* convert ms to ns */
+		if (tb_ticks_per_usec > 1000) {
+			time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
+		} else {
+			u32 rem_us;
+
+			time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
+						&rem_us);
+			time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
+		}
+	} else {
+		time = pw20_wt;
+	}
+
+	return sprintf(buf, "%llu\n", time > 0 ? time : 0);
+}
+
+static void set_pw20_wait_entry_bit(void *val)
+{
+	u32 *value = val;
+	u32 pw20_idle;
+
+	pw20_idle = mfspr(SPRN_PWRMGTCR0);
+
+	/* Set Automatic PW20 Core Idle Count */
+	/* clear count */
+	pw20_idle &= ~PWRMGTCR0_PW20_ENT;
+
+	/* set count */
+	pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT);
+
+	mtspr(SPRN_PWRMGTCR0, pw20_idle);
+}
+
+static ssize_t store_pw20_wait_time(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 entry_bit;
+	u64 value;
+
+	unsigned int cpu = dev->id;
+
+	if (kstrtou64(buf, 0, &value))
+		return -EINVAL;
+
+	if (!value)
+		return -EINVAL;
+
+	entry_bit = get_idle_ticks_bit(value);
+	if (entry_bit > MAX_BIT)
+		return -EINVAL;
+
+	pw20_wt = value;
+
+	smp_call_function_single(cpu, set_pw20_wait_entry_bit,
+				&entry_bit, 1);
+
+	return count;
+}
+
+static ssize_t show_altivec_idle(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+
+	value &= PWRMGTCR0_AV_IDLE_PD_EN;
+
+	return sprintf(buf, "%u\n", value ? 1 : 0);
+}
+
+static void do_store_altivec_idle(void *val)
+{
+	u32 *value = val;
+	u32 altivec_idle;
+
+	altivec_idle = mfspr(SPRN_PWRMGTCR0);
+
+	if (*value)
+		altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN;
+	else
+		altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN;
+
+	mtspr(SPRN_PWRMGTCR0, altivec_idle);
+}
+
+static ssize_t store_altivec_idle(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	if (kstrtou32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value > 1)
+		return -EINVAL;
+
+	smp_call_function_single(cpu, do_store_altivec_idle, &value, 1);
+
+	return count;
+}
+
+static ssize_t show_altivec_idle_wait_time(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	u64 tb_cycle = 1;
+	u64 time;
+
+	unsigned int cpu = dev->id;
+
+	if (!altivec_idle_wt) {
+		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+		value = (value & PWRMGTCR0_AV_IDLE_CNT) >>
+					PWRMGTCR0_AV_IDLE_CNT_SHIFT;
+
+		tb_cycle = (tb_cycle << (MAX_BIT - value + 1));
+		/* convert ms to ns */
+		if (tb_ticks_per_usec > 1000) {
+			time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
+		} else {
+			u32 rem_us;
+
+			time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
+						&rem_us);
+			time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
+		}
+	} else {
+		time = altivec_idle_wt;
+	}
+
+	return sprintf(buf, "%llu\n", time > 0 ? time : 0);
+}
+
+static void set_altivec_idle_wait_entry_bit(void *val)
+{
+	u32 *value = val;
+	u32 altivec_idle;
+
+	altivec_idle = mfspr(SPRN_PWRMGTCR0);
+
+	/* Set Automatic AltiVec Idle Count */
+	/* clear count */
+	altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT;
+
+	/* set count */
+	altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT);
+
+	mtspr(SPRN_PWRMGTCR0, altivec_idle);
+}
+
+static ssize_t store_altivec_idle_wait_time(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 entry_bit;
+	u64 value;
+
+	unsigned int cpu = dev->id;
+
+	if (kstrtou64(buf, 0, &value))
+		return -EINVAL;
+
+	if (!value)
+		return -EINVAL;
+
+	entry_bit = get_idle_ticks_bit(value);
+	if (entry_bit > MAX_BIT)
+		return -EINVAL;
+
+	altivec_idle_wt = value;
+
+	smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit,
+				&entry_bit, 1);
+
+	return count;
+}
+
+/*
+ * Enable/Disable interface:
+ * 0, disable. 1, enable.
+ */
+static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state);
+static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle);
+
+/*
+ * Set wait time interface:(Nanosecond)
+ * Example: Base on TBfreq is 41MHZ.
+ * 1~48(ns): TB[63]
+ * 49~97(ns): TB[62]
+ * 98~195(ns): TB[61]
+ * 196~390(ns): TB[60]
+ * 391~780(ns): TB[59]
+ * 781~1560(ns): TB[58]
+ * ...
+ */
+static DEVICE_ATTR(pw20_wait_time, 0600,
+			show_pw20_wait_time,
+			store_pw20_wait_time);
+static DEVICE_ATTR(altivec_idle_wait_time, 0600,
+			show_altivec_idle_wait_time,
+			store_altivec_idle_wait_time);
+#endif
+
 /*
  * Enabling PMCs will slow partition context switch times so we only do
  * it the first time we write to the PMCs.
@@ -425,6 +723,15 @@ static void register_cpu_online(unsigned int cpu)
 		device_create_file(s, &dev_attr_pir);
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_PPC_FSL_BOOK3E
+	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
+		device_create_file(s, &dev_attr_pw20_state);
+		device_create_file(s, &dev_attr_pw20_wait_time);
+
+		device_create_file(s, &dev_attr_altivec_idle);
+		device_create_file(s, &dev_attr_altivec_idle_wait_time);
+	}
+#endif
 	cacheinfo_cpu_online(cpu);
 }
 
@@ -497,6 +804,15 @@ static void unregister_cpu_online(unsigned int cpu)
 		device_remove_file(s, &dev_attr_pir);
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_PPC_FSL_BOOK3E
+	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
+		device_remove_file(s, &dev_attr_pw20_state);
+		device_remove_file(s, &dev_attr_pw20_wait_time);
+
+		device_remove_file(s, &dev_attr_altivec_idle);
+		device_remove_file(s, &dev_attr_altivec_idle_wait_time);
+	}
+#endif
 	cacheinfo_cpu_offline(cpu);
 }
 
-- 
cgit v1.2.3


From 7c732cba3d9312882e82d91d5948261dfd5c8fe6 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 24 Dec 2013 15:12:03 +0800
Subject: powerpc/fsl_booke: protect the access to MAS7

The e500v1 doesn't implement the MAS7, so we should avoid to access
this register on that implementations. In the current kernel, the
access to MAS7 are protected by either CONFIG_PHYS_64BIT or
MMU_FTR_BIG_PHYS. Since some code are executed before the code
patching, we have to use CONFIG_PHYS_64BIT in these cases.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/head_fsl_booke.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index f45726a1d963..09921a5197c6 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -82,7 +82,9 @@ _ENTRY(_start);
 	and	r19,r3,r18		/* r19 = page offset */
 	andc	r31,r20,r18		/* r31 = page base */
 	or	r31,r31,r19		/* r31 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
 	mfspr	r30,SPRN_MAS7
+#endif
 
 	li	r25,0			/* phys kernel start (low) */
 	li	r24,0			/* CPU number */
-- 
cgit v1.2.3


From 99739611e816716d912ae89a4354237fc39745a6 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 24 Dec 2013 15:12:04 +0800
Subject: powerpc/fsl_booke: introduce get_phys_addr function

Move the codes which translate a effective address to physical address
to a separate function. So it can be reused by other code.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/head_fsl_booke.S | 50 +++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 20 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 09921a5197c6..196950f29c00 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -65,26 +65,9 @@ _ENTRY(_start);
 	nop
 
 	/* Translate device tree address to physical, save in r30/r31 */
-	mfmsr	r16
-	mfspr	r17,SPRN_PID
-	rlwinm	r17,r17,16,0x3fff0000	/* turn PID into MAS6[SPID] */
-	rlwimi	r17,r16,28,0x00000001	/* turn MSR[DS] into MAS6[SAS] */
-	mtspr	SPRN_MAS6,r17
-
-	tlbsx	0,r3			/* must succeed */
-
-	mfspr	r16,SPRN_MAS1
-	mfspr	r20,SPRN_MAS3
-	rlwinm	r17,r16,25,0x1f		/* r17 = log2(page size) */
-	li	r18,1024
-	slw	r18,r18,r17		/* r18 = page size */
-	addi	r18,r18,-1
-	and	r19,r3,r18		/* r19 = page offset */
-	andc	r31,r20,r18		/* r31 = page base */
-	or	r31,r31,r19		/* r31 = devtree phys addr */
-#ifdef CONFIG_PHYS_64BIT
-	mfspr	r30,SPRN_MAS7
-#endif
+	bl	get_phys_addr
+	mr	r30,r3
+	mr	r31,r4
 
 	li	r25,0			/* phys kernel start (low) */
 	li	r24,0			/* CPU number */
@@ -857,6 +840,33 @@ KernelSPE:
 
 #endif /* CONFIG_SPE */
 
+/*
+ * Translate the effec addr in r3 to phys addr. The phys addr will be put
+ * into r3(higher 32bit) and r4(lower 32bit)
+ */
+get_phys_addr:
+	mfmsr	r8
+	mfspr	r9,SPRN_PID
+	rlwinm	r9,r9,16,0x3fff0000	/* turn PID into MAS6[SPID] */
+	rlwimi	r9,r8,28,0x00000001	/* turn MSR[DS] into MAS6[SAS] */
+	mtspr	SPRN_MAS6,r9
+
+	tlbsx	0,r3			/* must succeed */
+
+	mfspr	r8,SPRN_MAS1
+	mfspr	r12,SPRN_MAS3
+	rlwinm	r9,r8,25,0x1f		/* r9 = log2(page size) */
+	li	r10,1024
+	slw	r10,r10,r9		/* r10 = page size */
+	addi	r10,r10,-1
+	and	r11,r3,r10		/* r11 = page offset */
+	andc	r4,r12,r10		/* r4 = page base */
+	or	r4,r4,r11		/* r4 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
+	mfspr	r3,SPRN_MAS7
+#endif
+	blr
+
 /*
  * Global functions
  */
-- 
cgit v1.2.3


From dd189692d40948d6445bbaeb8cb9bf9d15f54dc6 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 24 Dec 2013 15:12:06 +0800
Subject: powerpc: enable the relocatable support for the fsl booke 32bit
 kernel

This is based on the codes in the head_44x.S. The difference is that
the init tlb size we used is 64M. With this patch we can only load the
kernel at address between memstart_addr ~ memstart_addr + 64M. We will
fix this restriction in the following patches.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |  2 ++
 arch/powerpc/kernel/head_fsl_booke.S          | 34 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index a92c79be2728..f22e7e44fbf3 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -176,6 +176,8 @@ skpinv:	addi	r6,r6,1				/* Increment */
 /* 7. Jump to KERNELBASE mapping */
 	lis	r6,(KERNELBASE & ~0xfff)@h
 	ori	r6,r6,(KERNELBASE & ~0xfff)@l
+	rlwinm	r7,r25,0,0x03ffffff
+	add	r6,r7,r6
 
 #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
 /*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 196950f29c00..19bd574bda9d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -73,6 +73,30 @@ _ENTRY(_start);
 	li	r24,0			/* CPU number */
 	li	r23,0			/* phys kernel start (high) */
 
+#ifdef CONFIG_RELOCATABLE
+	LOAD_REG_ADDR_PIC(r3, _stext)	/* Get our current runtime base */
+
+	/* Translate _stext address to physical, save in r23/r25 */
+	bl	get_phys_addr
+	mr	r23,r3
+	mr	r25,r4
+
+	/*
+	 * We have the runtime (virutal) address of our base.
+	 * We calculate our shift of offset from a 64M page.
+	 * We could map the 64M page we belong to at PAGE_OFFSET and
+	 * get going from there.
+	 */
+	lis	r4,KERNELBASE@h
+	ori	r4,r4,KERNELBASE@l
+	rlwinm	r6,r25,0,0x3ffffff		/* r6 = PHYS_START % 64M */
+	rlwinm	r5,r4,0,0x3ffffff		/* r5 = KERNELBASE % 64M */
+	subf	r3,r5,r6			/* r3 = r6 - r5 */
+	add	r3,r4,r3			/* Required Virtual Address */
+
+	bl	relocate
+#endif
+
 /* We try to not make any assumptions about how the boot loader
  * setup or used the TLBs.  We invalidate all mappings from the
  * boot loader and load a single entry in TLB1[0] to map the
@@ -182,6 +206,16 @@ _ENTRY(__early_start)
 
 	bl	early_init
 
+#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_PHYS_64BIT
+	mr	r3,r23
+	mr	r4,r25
+#else
+	mr	r3,r25
+#endif
+	bl	relocate_init
+#endif
+
 #ifdef CONFIG_DYNAMIC_MEMSTART
 	lis	r3,kernstart_addr@ha
 	la	r3,kernstart_addr@l(r3)
-- 
cgit v1.2.3


From 78a235efdc42ff363de81fdbc171385e8b86b69b Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 24 Dec 2013 15:12:07 +0800
Subject: powerpc/fsl_booke: set the tlb entry for the kernel address in AS1

We use the tlb1 entries to map low mem to the kernel space. In the
current code, it assumes that the first tlb entry would cover the
kernel image. But this is not true for some special cases, such as
when we run a relocatable kernel above the 64M or set
CONFIG_KERNEL_START above 64M. So we choose to switch to address
space 1 before setting these tlb entries.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/head_fsl_booke.S | 81 ++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 19bd574bda9d..75f0223e6d0d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1156,6 +1156,87 @@ __secondary_hold_acknowledge:
 	.long	-1
 #endif
 
+/*
+ * Create a tlb entry with the same effective and physical address as
+ * the tlb entry used by the current running code. But set the TS to 1.
+ * Then switch to the address space 1. It will return with the r3 set to
+ * the ESEL of the new created tlb.
+ */
+_GLOBAL(switch_to_as1)
+	mflr	r5
+
+	/* Find a entry not used */
+	mfspr	r3,SPRN_TLB1CFG
+	andi.	r3,r3,0xfff
+	mfspr	r4,SPRN_PID
+	rlwinm	r4,r4,16,0x3fff0000	/* turn PID into MAS6[SPID] */
+	mtspr	SPRN_MAS6,r4
+1:	lis	r4,0x1000		/* Set MAS0(TLBSEL) = 1 */
+	addi	r3,r3,-1
+	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r4
+	tlbre
+	mfspr	r4,SPRN_MAS1
+	andis.	r4,r4,MAS1_VALID@h
+	bne	1b
+
+	/* Get the tlb entry used by the current running code */
+	bl	0f
+0:	mflr	r4
+	tlbsx	0,r4
+
+	mfspr	r4,SPRN_MAS1
+	ori	r4,r4,MAS1_TS		/* Set the TS = 1 */
+	mtspr	SPRN_MAS1,r4
+
+	mfspr	r4,SPRN_MAS0
+	rlwinm	r4,r4,0,~MAS0_ESEL_MASK
+	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r4
+	tlbwe
+	isync
+	sync
+
+	mfmsr	r4
+	ori	r4,r4,MSR_IS | MSR_DS
+	mtspr	SPRN_SRR0,r5
+	mtspr	SPRN_SRR1,r4
+	sync
+	rfi
+
+/*
+ * Restore to the address space 0 and also invalidate the tlb entry created
+ * by switch_to_as1.
+*/
+_GLOBAL(restore_to_as0)
+	mflr	r0
+
+	bl	0f
+0:	mflr	r9
+	addi	r9,r9,1f - 0b
+
+	mfmsr	r7
+	li	r8,(MSR_IS | MSR_DS)
+	andc	r7,r7,r8
+
+	mtspr	SPRN_SRR0,r9
+	mtspr	SPRN_SRR1,r7
+	sync
+	rfi
+
+	/* Invalidate the temporary tlb entry for AS1 */
+1:	lis	r9,0x1000		/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r9,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r9
+	tlbre
+	mfspr	r9,SPRN_MAS1
+	rlwinm	r9,r9,0,2,31		/* Clear MAS1 Valid and IPPROT */
+	mtspr	SPRN_MAS1,r9
+	tlbwe
+	isync
+	mtlr	r0
+	blr
+
 /*
  * We put a few things here that have to be page-aligned. This stuff
  * goes at the beginning of the data segment, which is page-aligned.
-- 
cgit v1.2.3


From b27652dd2174df1a7e0a7c5f00d1c8e3ed9287a7 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 24 Dec 2013 15:12:08 +0800
Subject: powerpc: introduce early_get_first_memblock_info

For a relocatable kernel since it can be loaded at any place, there
is no any relation between the kernel start addr and the memstart_addr.
So we can't calculate the memstart_addr from kernel start addr. And
also we can't wait to do the relocation after we get the real
memstart_addr from device tree because it is so late. So introduce
a new function we can use to get the first memblock address and size
in a very early stage (before machine_init).

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/prom.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa0ad8aafbcc..f58c0d3aaeb4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -523,6 +523,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,
 	return early_init_dt_scan_memory(node, uname, depth, data);
 }
 
+/*
+ * For a relocatable kernel, we need to get the memstart_addr first,
+ * then use it to calculate the virtual kernel start address. This has
+ * to happen at a very early stage (before machine_init). In this case,
+ * we just want to get the memstart_address and would not like to mess the
+ * memblock at this stage. So introduce a variable to skip the memblock_add()
+ * for this reason.
+ */
+#ifdef CONFIG_RELOCATABLE
+static int add_mem_to_memblock = 1;
+#else
+#define add_mem_to_memblock 1
+#endif
+
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 #ifdef CONFIG_PPC64
@@ -543,7 +557,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	}
 
 	/* Add the chunk to the MEMBLOCK list */
-	memblock_add(base, size);
+	if (add_mem_to_memblock)
+		memblock_add(base, size);
 }
 
 static void __init early_reserve_mem_dt(void)
@@ -740,6 +755,30 @@ void __init early_init_devtree(void *params)
 	DBG(" <- early_init_devtree()\n");
 }
 
+#ifdef CONFIG_RELOCATABLE
+/*
+ * This function run before early_init_devtree, so we have to init
+ * initial_boot_params.
+ */
+void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
+{
+	/* Setup flat device-tree pointer */
+	initial_boot_params = params;
+
+	/*
+	 * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid
+	 * mess the memblock.
+	 */
+	add_mem_to_memblock = 0;
+	of_scan_flat_dt(early_init_dt_scan_root, NULL);
+	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+	add_mem_to_memblock = 1;
+
+	if (size)
+		*size = first_memblock_size;
+}
+#endif
+
 /*******
  *
  * New implementation of the OF "find" APIs, return a refcounted
-- 
cgit v1.2.3


From 7d2471f9fa85089beb1cb9436ffc28f9e11e518d Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 24 Dec 2013 15:12:10 +0800
Subject: powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for
 relocatable kernel

This is always true for a non-relocatable kernel. Otherwise the kernel
would get stuck. But for a relocatable kernel, it seems a little
complicated. When booting a relocatable kernel, we just align the
kernel start addr to 64M and map the PAGE_OFFSET from there. The
relocation will base on this virtual address. But if this address
is not the same as the memstart_addr, we will have to change the
map of PAGE_OFFSET to the real memstart_addr and do another relocation
again.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
[scottwood@freescale.com: make offset long and non-negative in simple case]
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/head_fsl_booke.S | 74 +++++++++++++++++++++++++++++++++---
 1 file changed, 69 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 75f0223e6d0d..b1f7edc3c360 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -81,6 +81,39 @@ _ENTRY(_start);
 	mr	r23,r3
 	mr	r25,r4
 
+	bl	0f
+0:	mflr	r8
+	addis	r3,r8,(is_second_reloc - 0b)@ha
+	lwz	r19,(is_second_reloc - 0b)@l(r3)
+
+	/* Check if this is the second relocation. */
+	cmpwi	r19,1
+	bne	1f
+
+	/*
+	 * For the second relocation, we already get the real memstart_addr
+	 * from device tree. So we will map PAGE_OFFSET to memstart_addr,
+	 * then the virtual address of start kernel should be:
+	 *          PAGE_OFFSET + (kernstart_addr - memstart_addr)
+	 * Since the offset between kernstart_addr and memstart_addr should
+	 * never be beyond 1G, so we can just use the lower 32bit of them
+	 * for the calculation.
+	 */
+	lis	r3,PAGE_OFFSET@h
+
+	addis	r4,r8,(kernstart_addr - 0b)@ha
+	addi	r4,r4,(kernstart_addr - 0b)@l
+	lwz	r5,4(r4)
+
+	addis	r6,r8,(memstart_addr - 0b)@ha
+	addi	r6,r6,(memstart_addr - 0b)@l
+	lwz	r7,4(r6)
+
+	subf	r5,r7,r5
+	add	r3,r3,r5
+	b	2f
+
+1:
 	/*
 	 * We have the runtime (virutal) address of our base.
 	 * We calculate our shift of offset from a 64M page.
@@ -94,7 +127,14 @@ _ENTRY(_start);
 	subf	r3,r5,r6			/* r3 = r6 - r5 */
 	add	r3,r4,r3			/* Required Virtual Address */
 
-	bl	relocate
+2:	bl	relocate
+
+	/*
+	 * For the second relocation, we already set the right tlb entries
+	 * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+	*/
+	cmpwi	r19,1
+	beq	set_ivor
 #endif
 
 /* We try to not make any assumptions about how the boot loader
@@ -122,6 +162,7 @@ _ENTRY(__early_start)
 #include "fsl_booke_entry_mapping.S"
 #undef ENTRY_MAPPING_BOOT_SETUP
 
+set_ivor:
 	/* Establish the interrupt vector offsets */
 	SET_IVOR(0,  CriticalInput);
 	SET_IVOR(1,  MachineCheck);
@@ -207,11 +248,13 @@ _ENTRY(__early_start)
 	bl	early_init
 
 #ifdef CONFIG_RELOCATABLE
+	mr	r3,r30
+	mr	r4,r31
 #ifdef CONFIG_PHYS_64BIT
-	mr	r3,r23
-	mr	r4,r25
+	mr	r5,r23
+	mr	r6,r25
 #else
-	mr	r3,r25
+	mr	r5,r25
 #endif
 	bl	relocate_init
 #endif
@@ -1207,6 +1250,9 @@ _GLOBAL(switch_to_as1)
 /*
  * Restore to the address space 0 and also invalidate the tlb entry created
  * by switch_to_as1.
+ * r3 - the tlb entry which should be invalidated
+ * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0)
+ * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
 */
 _GLOBAL(restore_to_as0)
 	mflr	r0
@@ -1215,7 +1261,15 @@ _GLOBAL(restore_to_as0)
 0:	mflr	r9
 	addi	r9,r9,1f - 0b
 
-	mfmsr	r7
+	/*
+	 * We may map the PAGE_OFFSET in AS0 to a different physical address,
+	 * so we need calculate the right jump and device tree address based
+	 * on the offset passed by r4.
+	 */
+	add	r9,r9,r4
+	add	r5,r5,r4
+
+2:	mfmsr	r7
 	li	r8,(MSR_IS | MSR_DS)
 	andc	r7,r7,r8
 
@@ -1234,9 +1288,19 @@ _GLOBAL(restore_to_as0)
 	mtspr	SPRN_MAS1,r9
 	tlbwe
 	isync
+
+	cmpwi	r4,0
+	bne	3f
 	mtlr	r0
 	blr
 
+	/*
+	 * The PAGE_OFFSET will map to a different physical address,
+	 * jump to _start to do another relocation again.
+	*/
+3:	mr	r3,r5
+	bl	_start
+
 /*
  * We put a few things here that have to be page-aligned. This stuff
  * goes at the beginning of the data segment, which is page-aligned.
-- 
cgit v1.2.3


From 0be7d969b0efef085ed6497d462ba16a875ca737 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 24 Dec 2013 15:12:11 +0800
Subject: powerpc/fsl_booke: smp support for booting a relocatable kernel above
 64M

When booting above the 64M for a secondary cpu, we also face the
same issue as the boot cpu that the PAGE_OFFSET map two different
physical address for the init tlb and the final map. So we have to use
switch_to_as1/restore_to_as0 between the conversion of these two
maps. When restoring to as0 for a secondary cpu, we only need to
return to the caller. So add a new parameter for function
restore_to_as0 for this purpose.

Use LOAD_REG_ADDR_PIC to get the address of variables which may
be used before we set the final map in cams for the secondary cpu.
Move the setting of cams a bit earlier in order to avoid the
unnecessary using of LOAD_REG_ADDR_PIC.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/head_fsl_booke.S | 41 ++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index b1f7edc3c360..b497188a94a1 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -216,8 +216,7 @@ set_ivor:
 	/* Check to see if we're the second processor, and jump
 	 * to the secondary_start code if so
 	 */
-	lis	r24, boot_cpuid@h
-	ori	r24, r24, boot_cpuid@l
+	LOAD_REG_ADDR_PIC(r24, boot_cpuid)
 	lwz	r24, 0(r24)
 	cmpwi	r24, -1
 	mfspr   r24,SPRN_PIR
@@ -1146,24 +1145,36 @@ _GLOBAL(__flush_disable_L1)
 /* When we get here, r24 needs to hold the CPU # */
 	.globl __secondary_start
 __secondary_start:
-	lis	r3,__secondary_hold_acknowledge@h
-	ori	r3,r3,__secondary_hold_acknowledge@l
-	stw	r24,0(r3)
-
-	li	r3,0
-	mr	r4,r24		/* Why? */
-	bl	call_setup_cpu
-
-	lis	r3,tlbcam_index@ha
-	lwz	r3,tlbcam_index@l(r3)
+	LOAD_REG_ADDR_PIC(r3, tlbcam_index)
+	lwz	r3,0(r3)
 	mtctr	r3
 	li	r26,0		/* r26 safe? */
 
+	bl	switch_to_as1
+	mr	r27,r3		/* tlb entry */
 	/* Load each CAM entry */
 1:	mr	r3,r26
 	bl	loadcam_entry
 	addi	r26,r26,1
 	bdnz	1b
+	mr	r3,r27		/* tlb entry */
+	LOAD_REG_ADDR_PIC(r4, memstart_addr)
+	lwz	r4,0(r4)
+	mr	r5,r25		/* phys kernel start */
+	rlwinm	r5,r5,0,~0x3ffffff	/* aligned 64M */
+	subf	r4,r5,r4	/* memstart_addr - phys kernel start */
+	li	r5,0		/* no device tree */
+	li	r6,0		/* not boot cpu */
+	bl	restore_to_as0
+
+
+	lis	r3,__secondary_hold_acknowledge@h
+	ori	r3,r3,__secondary_hold_acknowledge@l
+	stw	r24,0(r3)
+
+	li	r3,0
+	mr	r4,r24		/* Why? */
+	bl	call_setup_cpu
 
 	/* get current_thread_info and current */
 	lis	r1,secondary_ti@ha
@@ -1253,6 +1264,7 @@ _GLOBAL(switch_to_as1)
  * r3 - the tlb entry which should be invalidated
  * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0)
  * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
+ * r6 - boot cpu
 */
 _GLOBAL(restore_to_as0)
 	mflr	r0
@@ -1268,6 +1280,7 @@ _GLOBAL(restore_to_as0)
 	 */
 	add	r9,r9,r4
 	add	r5,r5,r4
+	add	r0,r0,r4
 
 2:	mfmsr	r7
 	li	r8,(MSR_IS | MSR_DS)
@@ -1290,7 +1303,9 @@ _GLOBAL(restore_to_as0)
 	isync
 
 	cmpwi	r4,0
-	bne	3f
+	cmpwi	cr1,r6,0
+	cror	eq,4*cr1+eq,eq
+	bne	3f			/* offset != 0 && is_boot_cpu */
 	mtlr	r0
 	blr
 
-- 
cgit v1.2.3


From 28efc35fe68dacbddc4b12c2fa8f2df1593a4ad3 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Fri, 11 Oct 2013 19:22:38 -0500
Subject: powerpc/e6500: TLB miss handler with hardware tablewalk support

There are a few things that make the existing hw tablewalk handlers
unsuitable for e6500:

 - Indirect entries go in TLB1 (though the resulting direct entries go in
   TLB0).

 - It has threads, but no "tlbsrx." -- so we need a spinlock and
   a normal "tlbsx".  Because we need this lock, hardware tablewalk
   is mandatory on e6500 unless we want to add spinlock+tlbsx to
   the normal bolted TLB miss handler.

 - TLB1 has no HES (nor next-victim hint) so we need software round robin
   (TODO: integrate this round robin data with hugetlb/KVM)

 - The existing tablewalk handlers map half of a page table at a time,
   because IBM hardware has a fixed 1MiB indirect page size.  e6500
   has variable size indirect entries, with a minimum of 2MiB.
   So we can't do the half-page indirect mapping, and even if we
   could it would be less efficient than mapping the full page.

 - Like on e5500, the linear mapping is bolted, so we don't need the
   overhead of supporting nested tlb misses.

Note that hardware tablewalk does not work in rev1 of e6500.
We do not expect to support e6500 rev1 in mainline Linux.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Cc: Mihai Caraman <mihai.caraman@freescale.com>
---
 arch/powerpc/kernel/asm-offsets.c |  9 +++++++++
 arch/powerpc/kernel/paca.c        |  5 +++++
 arch/powerpc/kernel/setup_64.c    | 31 +++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 41a283956a29..ed8d68ce71f3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -203,6 +203,15 @@ int main(void)
 	DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
 	DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
 	DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
+	DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr));
+
+	DEFINE(TCD_ESEL_NEXT,
+		offsetof(struct tlb_core_data, esel_next));
+	DEFINE(TCD_ESEL_MAX,
+		offsetof(struct tlb_core_data, esel_max));
+	DEFINE(TCD_ESEL_FIRST,
+		offsetof(struct tlb_core_data, esel_first));
+	DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock));
 #endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PPC_STD_MMU_64
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 623c356fe34f..bf0aada02fe4 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -160,6 +160,11 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 #ifdef CONFIG_PPC_STD_MMU_64
 	new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
 #endif /* CONFIG_PPC_STD_MMU_64 */
+
+#ifdef CONFIG_PPC_BOOK3E
+	/* For now -- if we have threads this will be adjusted later */
+	new_paca->tcd_ptr = &new_paca->tcd;
+#endif
 }
 
 /* Put the paca pointer into r13 and SPRG_PACA */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 2232aff66059..1ce9b87d7df8 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -97,6 +97,36 @@ int dcache_bsize;
 int icache_bsize;
 int ucache_bsize;
 
+#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
+static void setup_tlb_core_data(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		int first = cpu_first_thread_sibling(cpu);
+
+		paca[cpu].tcd_ptr = &paca[first].tcd;
+
+		/*
+		 * If we have threads, we need either tlbsrx.
+		 * or e6500 tablewalk mode, or else TLB handlers
+		 * will be racy and could produce duplicate entries.
+		 */
+		if (smt_enabled_at_boot >= 2 &&
+		    !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
+		    book3e_htw_mode != PPC_HTW_E6500) {
+			/* Should we panic instead? */
+			WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n",
+				  __func__);
+		}
+	}
+}
+#else
+static void setup_tlb_core_data(void)
+{
+}
+#endif
+
 #ifdef CONFIG_SMP
 
 static char *smt_enabled_cmdline;
@@ -445,6 +475,7 @@ void __init setup_system(void)
 
 	smp_setup_cpu_maps();
 	check_smt_enabled();
+	setup_tlb_core_data();
 
 #ifdef CONFIG_SMP
 	/* Release secondary cpus out of their spinloops at 0x60 now that
-- 
cgit v1.2.3


From ed2ddc56e758d516c5699260ada4d68434dfe1dc Mon Sep 17 00:00:00 2001
From: Diana Craciun <Diana.Craciun@freescale.com>
Date: Thu, 14 Mar 2013 16:55:11 +0200
Subject: powerpc: Replaced tlbilx with tlbwe in the initialization code

On Freescale e6500 cores EPCR[DGTMI] controls whether guest supervisor
state can execute TLB management instructions. If EPCR[DGTMI]=0
tlbwe and tlbilx are allowed to execute normally in the guest state.

A hypervisor may choose to virtualize TLB1 and for this purpose it
may use IPROT to protect the entries for being invalidated by the
guest. However, because tlbwe and tlbilx execution in the guest state
are sharing the same bit, it is not possible to have a scenario where
tlbwe is allowed to be executed in guest state and tlbilx traps. When
guest TLB management instructions are allowed to be executed in guest
state the guest cannot use tlbilx to invalidate TLB1 guest entries.

Linux is using tlbilx in the boot code to invalidate the temporary
entries it creates when initializing the MMU. The patch is replacing
the usage of tlbilx in initialization code with tlbwe with VALID bit
cleared.

Linux is also using tlbilx in other contexts (like huge pages or
indirect entries) but removing the tlbilx from the initialization code
offers the possibility to have scenarios under hypervisor which are
not using huge pages or indirect entries.

Signed-off-by: Diana Craciun <Diana.Craciun@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/kernel/exceptions-64e.S | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 4d5a0b1034e8..063b65dd4f27 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1068,12 +1068,9 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	mtspr	SPRN_MAS0,r3
 	tlbre
 	mfspr	r6,SPRN_MAS1
-	rlwinm	r6,r6,0,2,0	/* clear IPROT */
+	rlwinm	r6,r6,0,2,31	/* clear IPROT and VALID */
 	mtspr	SPRN_MAS1,r6
 	tlbwe
-
-	/* Invalidate TLB1 */
-	PPC_TLBILX_ALL(0,R0)
 	sync
 	isync
 
@@ -1127,12 +1124,9 @@ skpinv:	addi	r6,r6,1				/* Increment */
 	mtspr	SPRN_MAS0,r4
 	tlbre
 	mfspr	r5,SPRN_MAS1
-	rlwinm	r5,r5,0,2,0	/* clear IPROT */
+	rlwinm	r5,r5,0,2,31	/* clear IPROT and VALID */
 	mtspr	SPRN_MAS1,r5
 	tlbwe
-
-	/* Invalidate TLB1 */
-	PPC_TLBILX_ALL(0,R0)
 	sync
 	isync
 
-- 
cgit v1.2.3