From a10d3632acbd1135648f07c2a998cba8c5c77cfd Mon Sep 17 00:00:00 2001
From: Jeenu Viswambharan <jeenu.viswambharan@arm.com>
Date: Fri, 6 Jan 2017 14:58:11 +0000
Subject: PSCI: Introduce cache and barrier wrappers

The PSCI implementation performs cache maintenance operations on its
data structures to ensure their visibility to both cache-coherent and
non-cache-coherent participants. These cache maintenance operations
can be skipped if all PSCI participants are cache-coherent. When
HW_ASSISTED_COHERENCY build option is enabled, we assume PSCI
participants are cache-coherent.

For usage abstraction, this patch introduces wrappers for PSCI cache
maintenance and barrier operations used for state coordination: they are
effectively NOPs when HW_ASSISTED_COHERENCY is enabled, but are
applied otherwise.

Also refactor local state usage and associated cache operations to make
it clearer.

Change-Id: I77f17a90cba41085b7188c1345fe5731c99fad87
Signed-off-by: Jeenu Viswambharan <jeenu.viswambharan@arm.com>
---
 lib/psci/psci_common.c | 79 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 28 deletions(-)

(limited to 'lib/psci/psci_common.c')

diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
index 9fdce498..026690d2 100644
--- a/lib/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -247,6 +247,50 @@ static plat_local_state_t *psci_get_req_local_pwr_states(unsigned int pwrlvl,
 	return &psci_req_local_pwr_states[pwrlvl - 1][cpu_idx];
 }
 
+/*
+ * psci_non_cpu_pd_nodes can be placed either in normal memory or coherent
+ * memory.
+ *
+ * With !USE_COHERENT_MEM, psci_non_cpu_pd_nodes is placed in normal memory,
+ * it's accessed by both cached and non-cached participants. To serve the common
+ * minimum, perform a cache flush before read and after write so that non-cached
+ * participants operate on latest data in main memory.
+ *
+ * When USE_COHERENT_MEM is used, psci_non_cpu_pd_nodes is placed in coherent
+ * memory. With HW_ASSISTED_COHERENCY, all PSCI participants are cache-coherent.
+ * In both cases, no cache operations are required.
+ */
+
+/*
+ * Retrieve local state of non-CPU power domain node from a non-cached CPU,
+ * after any required cache maintenance operation.
+ */
+static plat_local_state_t get_non_cpu_pd_node_local_state(
+		unsigned int parent_idx)
+{
+#if !USE_COHERENT_MEM || !HW_ASSISTED_COHERENCY
+	flush_dcache_range(
+			(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
+			sizeof(psci_non_cpu_pd_nodes[parent_idx]));
+#endif
+	return psci_non_cpu_pd_nodes[parent_idx].local_state;
+}
+
+/*
+ * Update local state of non-CPU power domain node from a cached CPU; perform
+ * any required cache maintenance operation afterwards.
+ */
+static void set_non_cpu_pd_node_local_state(unsigned int parent_idx,
+		plat_local_state_t state)
+{
+	psci_non_cpu_pd_nodes[parent_idx].local_state = state;
+#if !USE_COHERENT_MEM || !HW_ASSISTED_COHERENCY
+	flush_dcache_range(
+			(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
+			sizeof(psci_non_cpu_pd_nodes[parent_idx]));
+#endif
+}
+
 /******************************************************************************
  * Helper function to return the current local power state of each power domain
  * from the current cpu power domain to its ancestor at the 'end_pwrlvl'. This
@@ -264,18 +308,7 @@ void psci_get_target_local_pwr_states(unsigned int end_pwrlvl,
 
 	/* Copy the local power state from node to state_info */
 	for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
-#if !USE_COHERENT_MEM
-		/*
-		 * If using normal memory for psci_non_cpu_pd_nodes, we need
-		 * to flush before reading the local power state as another
-		 * cpu in the same power domain could have updated it and this
-		 * code runs before caches are enabled.
-		 */
-		flush_dcache_range(
-				(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
-				sizeof(psci_non_cpu_pd_nodes[parent_idx]));
-#endif
-		pd_state[lvl] =	psci_non_cpu_pd_nodes[parent_idx].local_state;
+		pd_state[lvl] = get_non_cpu_pd_node_local_state(parent_idx);
 		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
 	}
 
@@ -299,21 +332,16 @@ static void psci_set_target_local_pwr_states(unsigned int end_pwrlvl,
 	psci_set_cpu_local_state(pd_state[PSCI_CPU_PWR_LVL]);
 
 	/*
-	 * Need to flush as local_state will be accessed with Data Cache
+	 * Need to flush as local_state might be accessed with Data Cache
 	 * disabled during power on
 	 */
-	flush_cpu_data(psci_svc_cpu_data.local_state);
+	psci_flush_cpu_data(psci_svc_cpu_data.local_state);
 
 	parent_idx = psci_cpu_pd_nodes[plat_my_core_pos()].parent_node;
 
 	/* Copy the local_state from state_info */
 	for (lvl = 1; lvl <= end_pwrlvl; lvl++) {
-		psci_non_cpu_pd_nodes[parent_idx].local_state =	pd_state[lvl];
-#if !USE_COHERENT_MEM
-		flush_dcache_range(
-				(uintptr_t)&psci_non_cpu_pd_nodes[parent_idx],
-				sizeof(psci_non_cpu_pd_nodes[parent_idx]));
-#endif
+		set_non_cpu_pd_node_local_state(parent_idx, pd_state[lvl]);
 		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
 	}
 }
@@ -347,13 +375,8 @@ void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl)
 
 	/* Reset the local_state to RUN for the non cpu power domains. */
 	for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
-		psci_non_cpu_pd_nodes[parent_idx].local_state =
-				PSCI_LOCAL_STATE_RUN;
-#if !USE_COHERENT_MEM
-		flush_dcache_range(
-				(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
-				sizeof(psci_non_cpu_pd_nodes[parent_idx]));
-#endif
+		set_non_cpu_pd_node_local_state(parent_idx,
+				PSCI_LOCAL_STATE_RUN);
 		psci_set_req_local_pwr_state(lvl,
 					     cpu_idx,
 					     PSCI_LOCAL_STATE_RUN);
@@ -364,7 +387,7 @@ void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl)
 	psci_set_aff_info_state(AFF_STATE_ON);
 
 	psci_set_cpu_local_state(PSCI_LOCAL_STATE_RUN);
-	flush_cpu_data(psci_svc_cpu_data);
+	psci_flush_cpu_data(psci_svc_cpu_data);
 }
 
 /******************************************************************************
-- 
cgit v1.2.3


From b0408e87f7dfbdfe3e00cd3c1421b2939dd209ca Mon Sep 17 00:00:00 2001
From: Jeenu Viswambharan <jeenu.viswambharan@arm.com>
Date: Thu, 5 Jan 2017 11:01:02 +0000
Subject: PSCI: Optimize call paths if all participants are cache-coherent

The current PSCI implementation can apply certain optimizations upon the
assumption that all PSCI participants are cache-coherent.

  - Skip performing cache maintenance during power-up.

  - Skip performing cache maintenance during power-down:

    At present, on the power-down path, CPU driver disables caches and
    MMU, and performs cache maintenance in preparation for powering down
    the CPU. This means that PSCI must perform additional cache
    maintenance on the extant stack for correct functioning.

    If all participating CPUs are cache-coherent, CPU driver would
    neither disable MMU nor perform cache maintenance. The CPU being
    powered down, therefore, remain cache-coherent throughout all PSCI
    call paths. This in turn means that PSCI cache maintenance
    operations are not required during power down.

  - Choose spin locks instead of bakery locks:

    The current PSCI implementation must synchronize both cache-coherent
    and non-cache-coherent participants. Mutual exclusion primitives are
    not guaranteed to function on non-coherent memory. For this reason,
    the current PSCI implementation had to resort to bakery locks.

    If all participants are cache-coherent, the implementation can
    enable MMU and data caches early, and substitute bakery locks for
    spin locks. Spin locks make use of architectural mutual exclusion
    primitives, and are lighter and faster.

The optimizations are applied when HW_ASSISTED_COHERENCY build option is
enabled, as it's expected that all PSCI participants are cache-coherent
in those systems.

Change-Id: Iac51c3ed318ea7e2120f6b6a46fd2db2eae46ede
Signed-off-by: Jeenu Viswambharan <jeenu.viswambharan@arm.com>
---
 lib/psci/psci_common.c | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

(limited to 'lib/psci/psci_common.c')

diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
index 026690d2..1be37c09 100644
--- a/lib/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -79,7 +79,8 @@ __section("tzfw_coherent_mem")
 #endif
 ;
 
-DEFINE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
+/* Lock for PSCI state coordination */
+DEFINE_PSCI_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
 
 cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
 
@@ -992,3 +993,33 @@ int psci_get_suspend_afflvl(void)
 }
 
 #endif
+
+/*******************************************************************************
+ * Initiate power down sequence, by calling power down operations registered for
+ * this CPU.
+ ******************************************************************************/
+void psci_do_pwrdown_sequence(unsigned int power_level)
+{
+#if HW_ASSISTED_COHERENCY
+	/*
+	 * With hardware-assisted coherency, the CPU drivers only initiate the
+	 * power down sequence, without performing cache-maintenance operations
+	 * in software. Data caches and MMU remain enabled both before and after
+	 * this call.
+	 */
+	prepare_cpu_pwr_dwn(power_level);
+#else
+	/*
+	 * Without hardware-assisted coherency, the CPU drivers disable data
+	 * caches and MMU, then perform cache-maintenance operations in
+	 * software.
+	 *
+	 * We ought to call prepare_cpu_pwr_dwn() to initiate power down
+	 * sequence. We currently have data caches and MMU enabled, but the
+	 * function will return with data caches and MMU disabled. We must
+	 * ensure that the stack memory is flushed out to memory before we start
+	 * popping from it again.
+	 */
+	psci_do_pwrdown_cache_maintenance(power_level);
+#endif
+}
-- 
cgit v1.2.3