summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSoby Mathew <soby.mathew@arm.com>2015-01-08 18:02:44 +0000
committerDan Handley <dan.handley@arm.com>2015-01-22 10:57:44 +0000
commitab8707e6875a9fe447ff04fad9053d7d719f89e6 (patch)
tree376a47144a8349f7ce3cdf21a1a12694e7f6bba6
parent8c5fe0b5b9f1666b4ddd8f5849de80249cdebe40 (diff)
Remove coherent memory from the BL memory maps
This patch extends the build option `USE_COHERENT_MEMORY` to conditionally remove coherent memory from the memory maps of all boot loader stages. The patch also adds necessary documentation for coherent memory removal in firmware-design, porting and user guides. Fixes ARM-Software/tf-issues#106 Change-Id: I260e8768c6a5c2efc402f5804a80657d8ce38773
-rw-r--r--bl1/aarch64/bl1_entrypoint.S2
-rw-r--r--bl1/bl1.ld.S4
-rw-r--r--bl2/aarch64/bl2_entrypoint.S2
-rw-r--r--bl2/bl2.ld.S5
-rw-r--r--bl31/aarch64/bl31_entrypoint.S2
-rw-r--r--bl31/bl31.ld.S4
-rw-r--r--bl32/tsp/aarch64/tsp_entrypoint.S2
-rw-r--r--bl32/tsp/tsp.ld.S4
-rw-r--r--bl32/tsp/tsp_main.c6
-rw-r--r--docs/firmware-design.md227
-rw-r--r--docs/porting-guide.md21
-rw-r--r--docs/user-guide.md6
-rw-r--r--plat/fvp/aarch64/fvp_common.c22
-rw-r--r--plat/fvp/bl1_fvp_setup.c11
-rw-r--r--plat/fvp/bl2_fvp_setup.c16
-rw-r--r--plat/fvp/bl31_fvp_setup.c27
-rw-r--r--plat/fvp/fvp_private.h18
-rw-r--r--plat/fvp/tsp/tsp_fvp_setup.c26
-rw-r--r--plat/juno/aarch64/juno_common.c21
-rw-r--r--plat/juno/bl1_plat_setup.c11
-rw-r--r--plat/juno/bl2_plat_setup.c16
-rw-r--r--plat/juno/bl31_plat_setup.c25
-rw-r--r--plat/juno/juno_private.h18
-rw-r--r--plat/juno/tsp/tsp_plat_setup.c26
-rw-r--r--services/std_svc/psci/psci_common.c13
-rw-r--r--services/std_svc/psci/psci_setup.c15
26 files changed, 463 insertions, 87 deletions
diff --git a/bl1/aarch64/bl1_entrypoint.S b/bl1/aarch64/bl1_entrypoint.S
index 82330c11..cfc62921 100644
--- a/bl1/aarch64/bl1_entrypoint.S
+++ b/bl1/aarch64/bl1_entrypoint.S
@@ -131,9 +131,11 @@ func bl1_entrypoint
ldr x1, =__BSS_SIZE__
bl zeromem16
+#if USE_COHERENT_MEM
ldr x0, =__COHERENT_RAM_START__
ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__
bl zeromem16
+#endif
ldr x0, =__DATA_RAM_START__
ldr x1, =__DATA_ROM_START__
diff --git a/bl1/bl1.ld.S b/bl1/bl1.ld.S
index 007149b5..d682384a 100644
--- a/bl1/bl1.ld.S
+++ b/bl1/bl1.ld.S
@@ -107,6 +107,7 @@ SECTIONS
*(xlat_table)
} >RAM
+#if USE_COHERENT_MEM
/*
* The base address of the coherent memory section must be page-aligned (4K)
* to guarantee that the coherent data are stored on their own pages and
@@ -125,6 +126,7 @@ SECTIONS
. = NEXT(4096);
__COHERENT_RAM_END__ = .;
} >RAM
+#endif
__BL1_RAM_START__ = ADDR(.data);
__BL1_RAM_END__ = .;
@@ -140,8 +142,10 @@ SECTIONS
__BSS_SIZE__ = SIZEOF(.bss);
+#if USE_COHERENT_MEM
__COHERENT_RAM_UNALIGNED_SIZE__ =
__COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
ASSERT(. <= BL1_RW_LIMIT, "BL1's RW section has exceeded its limit.")
}
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 2f058da9..499dc373 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -91,9 +91,11 @@ func bl2_entrypoint
ldr x1, =__BSS_SIZE__
bl zeromem16
+#if USE_COHERENT_MEM
ldr x0, =__COHERENT_RAM_START__
ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__
bl zeromem16
+#endif
/* --------------------------------------------
* Allocate a stack whose memory will be marked
diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S
index 65304de7..99333391 100644
--- a/bl2/bl2.ld.S
+++ b/bl2/bl2.ld.S
@@ -93,6 +93,7 @@ SECTIONS
*(xlat_table)
} >RAM
+#if USE_COHERENT_MEM
/*
* The base address of the coherent memory section must be page-aligned (4K)
* to guarantee that the coherent data are stored on their own pages and
@@ -111,12 +112,16 @@ SECTIONS
. = NEXT(4096);
__COHERENT_RAM_END__ = .;
} >RAM
+#endif
__BL2_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
+
+#if USE_COHERENT_MEM
__COHERENT_RAM_UNALIGNED_SIZE__ =
__COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
ASSERT(. <= BL2_LIMIT, "BL2 image has exceeded its limit.")
}
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 04063e1c..b786b29d 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -149,9 +149,11 @@ func bl31_entrypoint
ldr x1, =__BSS_SIZE__
bl zeromem16
+#if USE_COHERENT_MEM
ldr x0, =__COHERENT_RAM_START__
ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__
bl zeromem16
+#endif
/* ---------------------------------------------
* Initialize the cpu_ops pointer.
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 124be85c..3327f316 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -117,6 +117,7 @@ SECTIONS
*(xlat_table)
} >RAM
+#if USE_COHERENT_MEM
/*
* The base address of the coherent memory section must be page-aligned (4K)
* to guarantee that the coherent data are stored on their own pages and
@@ -135,12 +136,15 @@ SECTIONS
. = NEXT(4096);
__COHERENT_RAM_END__ = .;
} >RAM
+#endif
__BL31_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
+#if USE_COHERENT_MEM
__COHERENT_RAM_UNALIGNED_SIZE__ =
__COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
ASSERT(. <= BL31_LIMIT, "BL3-1 image has exceeded its limit.")
}
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 1cda1653..2714282b 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -108,9 +108,11 @@ func tsp_entrypoint
ldr x1, =__BSS_SIZE__
bl zeromem16
+#if USE_COHERENT_MEM
ldr x0, =__COHERENT_RAM_START__
ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__
bl zeromem16
+#endif
/* --------------------------------------------
* Allocate a stack whose memory will be marked
diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S
index 5d7ffa11..d411ad02 100644
--- a/bl32/tsp/tsp.ld.S
+++ b/bl32/tsp/tsp.ld.S
@@ -98,6 +98,7 @@ SECTIONS
*(xlat_table)
} >RAM
+#if USE_COHERENT_MEM
/*
* The base address of the coherent memory section must be page-aligned (4K)
* to guarantee that the coherent data are stored on their own pages and
@@ -116,12 +117,15 @@ SECTIONS
. = NEXT(4096);
__COHERENT_RAM_END__ = .;
} >RAM
+#endif
__BL32_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
+#if USE_COHERENT_MEM
__COHERENT_RAM_UNALIGNED_SIZE__ =
__COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
ASSERT(. <= BL32_LIMIT, "BL3-2 image has exceeded its limit.")
}
diff --git a/bl32/tsp/tsp_main.c b/bl32/tsp/tsp_main.c
index 193ba29b..2eaca7c9 100644
--- a/bl32/tsp/tsp_main.c
+++ b/bl32/tsp/tsp_main.c
@@ -43,7 +43,7 @@
* of trusted SRAM
******************************************************************************/
extern unsigned long __RO_START__;
-extern unsigned long __COHERENT_RAM_END__;
+extern unsigned long __BL32_END__;
/*******************************************************************************
* Lock to control access to the console
@@ -63,11 +63,11 @@ work_statistics_t tsp_stats[PLATFORM_CORE_COUNT];
/*******************************************************************************
* The BL32 memory footprint starts with an RO sections and ends
- * with a section for coherent RAM. Use it to find the memory size
+ * with the linker symbol __BL32_END__. Use it to find the memory size
******************************************************************************/
#define BL32_TOTAL_BASE (unsigned long)(&__RO_START__)
-#define BL32_TOTAL_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#define BL32_TOTAL_LIMIT (unsigned long)(&__BL32_END__)
static tsp_args_t *set_smc_args(uint64_t arg0,
uint64_t arg1,
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index 41aaf7f2..774ea436 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -12,8 +12,9 @@ Contents :
7. [CPU specific operations framework](#7--cpu-specific-operations-framework)
8. [Memory layout of BL images](#8-memory-layout-of-bl-images)
9. [Firmware Image Package (FIP)](#9--firmware-image-package-fip)
-10. [Code Structure](#10--code-structure)
-11. [References](#11--references)
+10. [Use of coherent memory in Trusted Firmware](#10--use-of-coherent-memory-in-trusted-firmware)
+11. [Code Structure](#11--code-structure)
+12. [References](#12--references)
1. Introduction
@@ -368,10 +369,10 @@ level implementation of the generic timer through the memory mapped interface.
`ON`; any other cluster is `OFF`. BL3-1 initializes the data structures that
implement the state machine, including the locks that protect them. BL3-1
accesses the state of a CPU or cluster immediately after reset and before
- the MMU is enabled in the warm boot path. It is not currently possible to
- use 'exclusive' based spinlocks, therefore BL3-1 uses locks based on
- Lamport's Bakery algorithm instead. BL3-1 allocates these locks in device
- memory. They are accessible irrespective of MMU state.
+ the data cache is enabled in the warm boot path. It is not currently
+ possible to use 'exclusive' based spinlocks, therefore BL3-1 uses locks
+ based on Lamport's Bakery algorithm instead. BL3-1 allocates these locks in
+ device memory by default.
* Runtime services initialization:
@@ -1127,9 +1128,10 @@ this purpose:
* `__BSS_START__` This address must be aligned on a 16-byte boundary.
* `__BSS_SIZE__`
-Similarly, the coherent memory section must be zero-initialised. Also, the MMU
-setup code needs to know the extents of this section to set the right memory
-attributes for it. The following linker symbols are defined for this purpose:
+Similarly, the coherent memory section (if enabled) must be zero-initialised.
+Also, the MMU setup code needs to know the extents of this section to set the
+right memory attributes for it. The following linker symbols are defined for
+this purpose:
* `__COHERENT_RAM_START__` This address must be aligned on a page-size boundary.
* `__COHERENT_RAM_END__` This address must be aligned on a page-size boundary.
@@ -1443,7 +1445,208 @@ Currently the FVP's policy only allows loading of a known set of images. The
platform policy can be modified to allow additional images.
-10. Code Structure
+10. Use of coherent memory in Trusted Firmware
+----------------------------------------------
+
+There might be loss of coherency when physical memory with mismatched
+shareability, cacheability and memory attributes is accessed by multiple CPUs
+(refer to section B2.9 of [ARM ARM] for more details). This possibility occurs
+in Trusted Firmware during power up/down sequences when coherency, MMU and
+caches are turned on/off incrementally.
+
+Trusted Firmware defines coherent memory as a region of memory with Device
+nGnRE attributes in the translation tables. The translation granule size in
+Trusted Firmware is 4KB. This is the smallest possible size of the coherent
+memory region.
+
+By default, all data structures which are susceptible to accesses with
+mismatched attributes from various CPUs are allocated in a coherent memory
+region (refer to section 2.1 of [Porting Guide]). The coherent memory region
+accesses are Outer Shareable, non-cacheable and they can be accessed
+with the Device nGnRE attributes when the MMU is turned on. Hence, at the
+expense of at least an extra page of memory, Trusted Firmware is able to work
+around coherency issues due to mismatched memory attributes.
+
+The alternative to the above approach is to allocate the susceptible data
+structures in Normal WriteBack WriteAllocate Inner shareable memory. This
+approach requires the data structures to be designed so that it is possible to
+work around the issue of mismatched memory attributes by performing software
+cache maintenance on them.
+
+### Disabling the use of coherent memory in Trusted Firmware
+
+It might be desirable to avoid the cost of allocating coherent memory on
+platforms which are memory constrained. Trusted Firmware enables inclusion of
+coherent memory in firmware images through the build flag `USE_COHERENT_MEM`.
+This flag is enabled by default. It can be disabled to choose the second
+approach described above.
+
+The below sections analyze the data structures allocated in the coherent memory
+region and the changes required to allocate them in normal memory.
+
+### PSCI Affinity map nodes
+
+The `psci_aff_map` data structure stores the hierarchial node information for
+each affinity level in the system including the PSCI states associated with them.
+By default, this data structure is allocated in the coherent memory region in
+the Trusted Firmware because it can be accessed by multiple CPUs, either with
+their caches enabled or disabled.
+
+ typedef struct aff_map_node {
+ unsigned long mpidr;
+ unsigned char ref_count;
+ unsigned char state;
+ unsigned char level;
+ #if USE_COHERENT_MEM
+ bakery_lock_t lock;
+ #else
+ unsigned char aff_map_index;
+ #endif
+ } aff_map_node_t;
+
+In order to move this data structure to normal memory, the use of each of its
+fields must be analyzed. Fields like `mpidr` and `level` are only written once
+during cold boot. Hence removing them from coherent memory involves only doing
+a clean and invalidate of the cache lines after these fields are written.
+
+The fields `state` and `ref_count` can be concurrently accessed by multiple
+CPUs in different cache states. A Lamport's Bakery lock is used to ensure mutual
+exlusion to these fields. As a result, it is possible to move these fields out
+of coherent memory by performing software cache maintenance on them. The field
+`lock` is the bakery lock data structure when `USE_COHERENT_MEM` is enabled.
+The `aff_map_index` is used to identify the bakery lock when `USE_COHERENT_MEM`
+is disabled.
+
+### Bakery lock data
+
+The bakery lock data structure `bakery_lock_t` is allocated in coherent memory
+and is accessed by multiple CPUs with mismatched attributes. `bakery_lock_t` is
+defined as follows:
+
+ typedef struct bakery_lock {
+ int owner;
+ volatile char entering[BAKERY_LOCK_MAX_CPUS];
+ volatile unsigned number[BAKERY_LOCK_MAX_CPUS];
+ } bakery_lock_t;
+
+It is a characteristic of Lamport's Bakery algorithm that the volatile per-CPU
+fields can be read by all CPUs but only written to by the owning CPU.
+
+Depending upon the data cache line size, the per-CPU fields of the
+`bakery_lock_t` structure for multiple CPUs may exist on a single cache line.
+These per-CPU fields can be read and written during lock contention by multiple
+CPUs with mismatched memory attributes. Since these fields are a part of the
+lock implementation, they do not have access to any other locking primitive to
+safeguard against the resulting coherency issues. As a result, simple software
+cache maintenance is not enough to allocate them in coherent memory. Consider
+the following example.
+
+CPU0 updates its per-CPU field with data cache enabled. This write updates a
+local cache line which contains a copy of the fields for other CPUs as well. Now
+CPU1 updates its per-CPU field of the `bakery_lock_t` structure with data cache
+disabled. CPU1 then issues a DCIVAC operation to invalidate any stale copies of
+its field in any other cache line in the system. This operation will invalidate
+the update made by CPU0 as well.
+
+To use bakery locks when `USE_COHERENT_MEM` is disabled, the lock data structure
+has been redesigned. The changes utilise the characteristic of Lamport's Bakery
+algorithm mentioned earlier. The per-CPU fields of the new lock structure are
+aligned such that they are allocated on separate cache lines. The per-CPU data
+framework in Trusted Firmware is used to achieve this. This enables software to
+perform software cache maintenance on the lock data structure without running
+into coherency issues associated with mismatched attributes.
+
+The per-CPU data framework enables consolidation of data structures on the
+fewest cache lines possible. This saves memory as compared to the scenario where
+each data structure is separately aligned to the cache line boundary to achieve
+the same effect.
+
+The bakery lock data structure `bakery_info_t` is defined for use when
+`USE_COHERENT_MEM` is disabled as follows:
+
+ typedef struct bakery_info {
+ /*
+ * The lock_data is a bit-field of 2 members:
+ * Bit[0] : choosing. This field is set when the CPU is
+ * choosing its bakery number.
+ * Bits[1 - 15] : number. This is the bakery number allocated.
+ */
+ volatile uint16_t lock_data;
+ } bakery_info_t;
+
+The `bakery_info_t` represents a single per-CPU field of one lock and
+the combination of corresponding `bakery_info_t` structures for all CPUs in the
+system represents the complete bakery lock. It is embedded in the per-CPU
+data framework `cpu_data` as shown below:
+
+ CPU0 cpu_data
+ ------------------
+ | .... |
+ |----------------|
+ | `bakery_info_t`| <-- Lock_0 per-CPU field
+ | Lock_0 | for CPU0
+ |----------------|
+ | `bakery_info_t`| <-- Lock_1 per-CPU field
+ | Lock_1 | for CPU0
+ |----------------|
+ | .... |
+ |----------------|
+ | `bakery_info_t`| <-- Lock_N per-CPU field
+ | Lock_N | for CPU0
+ ------------------
+
+
+ CPU1 cpu_data
+ ------------------
+ | .... |
+ |----------------|
+ | `bakery_info_t`| <-- Lock_0 per-CPU field
+ | Lock_0 | for CPU1
+ |----------------|
+ | `bakery_info_t`| <-- Lock_1 per-CPU field
+ | Lock_1 | for CPU1
+ |----------------|
+ | .... |
+ |----------------|
+ | `bakery_info_t`| <-- Lock_N per-CPU field
+ | Lock_N | for CPU1
+ ------------------
+
+Consider a system of 2 CPUs with 'N' bakery locks as shown above. For an
+operation on Lock_N, the corresponding `bakery_info_t` in both CPU0 and CPU1
+`cpu_data` need to be fetched and appropriate cache operations need to be
+performed for each access.
+
+For multiple bakery locks, an array of `bakery_info_t` is declared in `cpu_data`
+and each lock is given an `id` to identify it in the array.
+
+### Non Functional Impact of removing coherent memory
+
+Removal of the coherent memory region leads to the additional software overhead
+of performing cache maintenance for the affected data structures. However, since
+the memory where the data structures are allocated is cacheable, the overhead is
+mostly mitigated by an increase in performance.
+
+There is however a performance impact for bakery locks, due to:
+* Additional cache maintenance operations, and
+* Multiple cache line reads for each lock operation, since the bakery locks
+ for each CPU are distributed across different cache lines.
+
+The implementation has been optimized to mimimize this additional overhead.
+Measurements indicate that when bakery locks are allocated in Normal memory, the
+minimum latency of acquiring a lock is on an average 3-4 micro seconds whereas
+in Device memory the same is 2 micro seconds. The measurements were done on the
+Juno ARM development platform.
+
+As mentioned earlier, almost a page of memory can be saved by disabling
+`USE_COHERENT_MEM`. Each platform needs to consider these trade-offs to decide
+whether coherent memory should be used. If a platform disables
+`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it should
+reserve memory in `cpu_data` by defining the macro `PLAT_PCPU_DATA_SIZE` (see
+the [Porting Guide]). Refer to the reference platform code for examples.
+
+
+11. Code Structure
-------------------
Trusted Firmware code is logically divided between the three boot loader
@@ -1488,7 +1691,7 @@ FDTs provide a description of the hardware platform and are used by the Linux
kernel at boot time. These can be found in the `fdts` directory.
-11. References
+12. References
---------------
1. Trusted Board Boot Requirements CLIENT PDD (ARM DEN 0006B-5). Available
@@ -1504,7 +1707,7 @@ kernel at boot time. These can be found in the `fdts` directory.
_Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved._
-
+[ARM ARM]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.e/index.html "ARMv8-A Reference Manual (ARM DDI0487A.E)"
[PSCI]: http://infocenter.arm.com/help/topic/com.arm.doc.den0022b/index.html "Power State Coordination Interface PDD (ARM DEN 0022B.b)"
[SMCCC]: http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html "SMC Calling Convention PDD (ARM DEN 0028A)"
[UUID]: https://tools.ietf.org/rfc/rfc4122.txt "A Universally Unique IDentifier (UUID) URN Namespace"
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index 3855ca7b..3d5e66fb 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -63,11 +63,11 @@ mapped page tables, and enable both the instruction and data caches for each BL
stage. In the ARM FVP port, each BL stage configures the MMU in its platform-
specific architecture setup function, for example `blX_plat_arch_setup()`.
-Each platform must allocate a block of identity mapped secure memory with
-Device-nGnRE attributes aligned to page boundary (4K) for each BL stage. This
-memory is identified by the section name `tzfw_coherent_mem` so that its
-possible for the firmware to place variables in it using the following C code
-directive:
+If the build option `USE_COHERENT_MEM` is enabled, each platform must allocate a
+block of identity mapped secure memory with Device-nGnRE attributes aligned to
+page boundary (4K) for each BL stage. This memory is identified by the section
+name `tzfw_coherent_mem` so that its possible for the firmware to place
+variables in it using the following C code directive:
__attribute__ ((section("tzfw_coherent_mem")))
@@ -246,6 +246,17 @@ must also be defined:
entities than this value using `io_open()` will fail with
IO_RESOURCES_EXHAUSTED.
+If the platform needs to allocate data within the per-cpu data framework in
+BL3-1, it should define the following macro. Currently this is only required if
+the platform decides not to use the coherent memory section by undefining the
+USE_COHERENT_MEM build flag. In this case, the framework allocates the required
+memory within the the per-cpu data to minimize wastage.
+
+* **#define : PLAT_PCPU_DATA_SIZE**
+
+ Defines the memory (in bytes) to be reserved within the per-cpu data
+ structure for use by the platform layer.
+
The following constants are optional. They should be defined when the platform
memory layout implies some image overlaying like on FVP.
diff --git a/docs/user-guide.md b/docs/user-guide.md
index b33c4c0c..5ad44a89 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -245,6 +245,12 @@ performed.
synchronous method) or 1 (BL3-2 is initialized using asynchronous method).
Default is 0.
+* `USE_COHERENT_MEM`: This flag determines whether to include the coherent
+ memory region in the BL memory map or not (see "Use of Coherent memory in
+ Trusted Firmware" section in [Firmware Design]). It can take the value 1
+ (Coherent memory region is included) or 0 (Coherent memory region is
+ excluded). Default is 1.
+
#### FVP specific build options
* `FVP_TSP_RAM_LOCATION`: location of the TSP binary. Options:
diff --git a/plat/fvp/aarch64/fvp_common.c b/plat/fvp/aarch64/fvp_common.c
index 987f48f6..e20fe7d9 100644
--- a/plat/fvp/aarch64/fvp_common.c
+++ b/plat/fvp/aarch64/fvp_common.c
@@ -136,7 +136,8 @@ const unsigned int num_sec_irqs = sizeof(irq_sec_array) /
* Macro generating the code for the function setting up the pagetables as per
* the platform memory map & initialize the mmu, for the given exception level
******************************************************************************/
-#define DEFINE_CONFIGURE_MMU_EL(_el) \
+#if USE_COHERENT_MEM
+#define DEFINE_CONFIGURE_MMU_EL(_el) \
void fvp_configure_mmu_el##_el(unsigned long total_base, \
unsigned long total_size, \
unsigned long ro_start, \
@@ -158,6 +159,25 @@ const unsigned int num_sec_irqs = sizeof(irq_sec_array) /
\
enable_mmu_el##_el(0); \
}
+#else
+#define DEFINE_CONFIGURE_MMU_EL(_el) \
+ void fvp_configure_mmu_el##_el(unsigned long total_base, \
+ unsigned long total_size, \
+ unsigned long ro_start, \
+ unsigned long ro_limit) \
+ { \
+ mmap_add_region(total_base, total_base, \
+ total_size, \
+ MT_MEMORY | MT_RW | MT_SECURE); \
+ mmap_add_region(ro_start, ro_start, \
+ ro_limit - ro_start, \
+ MT_MEMORY | MT_RO | MT_SECURE); \
+ mmap_add(fvp_mmap); \
+ init_xlat_tables(); \
+ \
+ enable_mmu_el##_el(0); \
+ }
+#endif
/* Define EL1 and EL3 variants of the function initialising the MMU */
DEFINE_CONFIGURE_MMU_EL(1)
diff --git a/plat/fvp/bl1_fvp_setup.c b/plat/fvp/bl1_fvp_setup.c
index b1205d43..4b421d71 100644
--- a/plat/fvp/bl1_fvp_setup.c
+++ b/plat/fvp/bl1_fvp_setup.c
@@ -40,6 +40,7 @@
#include "fvp_def.h"
#include "fvp_private.h"
+#if USE_COHERENT_MEM
/*******************************************************************************
* Declarations of linker defined symbols which will help us find the layout
* of trusted SRAM
@@ -56,6 +57,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL1_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL1_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
/* Data structure which holds the extents of the trusted SRAM for BL1*/
static meminfo_t bl1_tzram_layout;
@@ -116,9 +118,12 @@ void bl1_plat_arch_setup(void)
fvp_configure_mmu_el3(bl1_tzram_layout.total_base,
bl1_tzram_layout.total_size,
BL1_RO_BASE,
- BL1_RO_LIMIT,
- BL1_COHERENT_RAM_BASE,
- BL1_COHERENT_RAM_LIMIT);
+ BL1_RO_LIMIT
+#if USE_COHERENT_MEM
+ , BL1_COHERENT_RAM_BASE,
+ BL1_COHERENT_RAM_LIMIT
+#endif
+ );
}
diff --git a/plat/fvp/bl2_fvp_setup.c b/plat/fvp/bl2_fvp_setup.c
index 67f89bc4..71bd8c2a 100644
--- a/plat/fvp/bl2_fvp_setup.c
+++ b/plat/fvp/bl2_fvp_setup.c
@@ -45,8 +45,10 @@
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
+#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
+#endif
/*
* The next 2 constants identify the extents of the code & RO data region.
@@ -57,6 +59,7 @@ extern unsigned long __COHERENT_RAM_END__;
#define BL2_RO_BASE (unsigned long)(&__RO_START__)
#define BL2_RO_LIMIT (unsigned long)(&__RO_END__)
+#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@@ -66,11 +69,11 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL2_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL2_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
/* Data structure which holds the extents of the trusted SRAM for BL2 */
static meminfo_t bl2_tzram_layout
-__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE),
- section("tzfw_coherent_mem")));
+__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE)));
/* Assert that BL3-1 parameters fit in shared memory */
CASSERT((PARAMS_BASE + sizeof(bl2_to_bl31_params_mem_t)) <
@@ -209,9 +212,12 @@ void bl2_plat_arch_setup(void)
fvp_configure_mmu_el1(bl2_tzram_layout.total_base,
bl2_tzram_layout.total_size,
BL2_RO_BASE,
- BL2_RO_LIMIT,
- BL2_COHERENT_RAM_BASE,
- BL2_COHERENT_RAM_LIMIT);
+ BL2_RO_LIMIT
+#if USE_COHERENT_MEM
+ , BL2_COHERENT_RAM_BASE,
+ BL2_COHERENT_RAM_LIMIT
+#endif
+ );
}
/*******************************************************************************
diff --git a/plat/fvp/bl31_fvp_setup.c b/plat/fvp/bl31_fvp_setup.c
index 69efc9cf..3874413f 100644
--- a/plat/fvp/bl31_fvp_setup.c
+++ b/plat/fvp/bl31_fvp_setup.c
@@ -48,19 +48,25 @@
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
+extern unsigned long __BL31_END__;
+#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
+#endif
/*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned. It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
+ * The next 3 constants identify the extents of the code, RO data region and the
+ * limit of the BL3-1 image. These addresses are used by the MMU setup code and
+ * therefore they must be page-aligned. It is the responsibility of the linker
+ * script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols
+ * refer to page-aligned addresses.
*/
#define BL31_RO_BASE (unsigned long)(&__RO_START__)
#define BL31_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL31_END (unsigned long)(&__BL31_END__)
+#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@@ -70,7 +76,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
-
+#endif
#if RESET_TO_BL31
static entry_point_info_t bl32_image_ep_info;
@@ -235,9 +241,12 @@ void bl31_plat_arch_setup(void)
fvp_cci_enable();
#endif
fvp_configure_mmu_el3(BL31_RO_BASE,
- (BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE),
+ (BL31_END - BL31_RO_BASE),
BL31_RO_BASE,
- BL31_RO_LIMIT,
- BL31_COHERENT_RAM_BASE,
- BL31_COHERENT_RAM_LIMIT);
+ BL31_RO_LIMIT
+#if USE_COHERENT_MEM
+ , BL31_COHERENT_RAM_BASE,
+ BL31_COHERENT_RAM_LIMIT
+#endif
+ );
}
diff --git a/plat/fvp/fvp_private.h b/plat/fvp/fvp_private.h
index 6f1a637e..3949754b 100644
--- a/plat/fvp/fvp_private.h
+++ b/plat/fvp/fvp_private.h
@@ -118,15 +118,21 @@ CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(fvp_cpu_data_t), \
void fvp_configure_mmu_el1(unsigned long total_base,
unsigned long total_size,
unsigned long,
- unsigned long,
- unsigned long,
- unsigned long);
+ unsigned long
+#if USE_COHERENT_MEM
+ , unsigned long,
+ unsigned long
+#endif
+ );
void fvp_configure_mmu_el3(unsigned long total_base,
unsigned long total_size,
unsigned long,
- unsigned long,
- unsigned long,
- unsigned long);
+ unsigned long
+#if USE_COHERENT_MEM
+ , unsigned long,
+ unsigned long
+#endif
+ );
int fvp_config_setup(void);
diff --git a/plat/fvp/tsp/tsp_fvp_setup.c b/plat/fvp/tsp/tsp_fvp_setup.c
index 301f6693..d8f46bd2 100644
--- a/plat/fvp/tsp/tsp_fvp_setup.c
+++ b/plat/fvp/tsp/tsp_fvp_setup.c
@@ -40,19 +40,25 @@
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
+extern unsigned long __BL32_END__;
+#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
+#endif
/*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned. It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
+ * The next 3 constants identify the extents of the code & RO data region and
+ * the limit of the BL3-2 image. These addresses are used by the MMU setup code
+ * and therefore they must be page-aligned. It is the responsibility of the
+ * linker script to ensure that __RO_START__, __RO_END__ & & __BL32_END__
+ * linker symbols refer to page-aligned addresses.
*/
#define BL32_RO_BASE (unsigned long)(&__RO_START__)
#define BL32_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL32_END (unsigned long)(&__BL32_END__)
+#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@@ -62,6 +68,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
/*******************************************************************************
* Initialize the UART
@@ -93,9 +100,12 @@ void tsp_platform_setup(void)
void tsp_plat_arch_setup(void)
{
fvp_configure_mmu_el1(BL32_RO_BASE,
- (BL32_COHERENT_RAM_LIMIT - BL32_RO_BASE),
+ (BL32_END - BL32_RO_BASE),
BL32_RO_BASE,
- BL32_RO_LIMIT,
- BL32_COHERENT_RAM_BASE,
- BL32_COHERENT_RAM_LIMIT);
+ BL32_RO_LIMIT
+#if USE_COHERENT_MEM
+ , BL32_COHERENT_RAM_BASE,
+ BL32_COHERENT_RAM_LIMIT
+#endif
+ );
}
diff --git a/plat/juno/aarch64/juno_common.c b/plat/juno/aarch64/juno_common.c
index 8129b051..7ad40d0d 100644
--- a/plat/juno/aarch64/juno_common.c
+++ b/plat/juno/aarch64/juno_common.c
@@ -140,6 +140,7 @@ const unsigned int num_sec_irqs = sizeof(irq_sec_array) /
* Macro generating the code for the function setting up the pagetables as per
* the platform memory map & initialize the mmu, for the given exception level
******************************************************************************/
+#if USE_COHERENT_MEM
#define DEFINE_CONFIGURE_MMU_EL(_el) \
void configure_mmu_el##_el(unsigned long total_base, \
unsigned long total_size, \
@@ -162,7 +163,25 @@ const unsigned int num_sec_irqs = sizeof(irq_sec_array) /
\
enable_mmu_el##_el(0); \
}
-
+#else
+#define DEFINE_CONFIGURE_MMU_EL(_el) \
+ void configure_mmu_el##_el(unsigned long total_base, \
+ unsigned long total_size, \
+ unsigned long ro_start, \
+ unsigned long ro_limit) \
+ { \
+ mmap_add_region(total_base, total_base, \
+ total_size, \
+ MT_MEMORY | MT_RW | MT_SECURE); \
+ mmap_add_region(ro_start, ro_start, \
+ ro_limit - ro_start, \
+ MT_MEMORY | MT_RO | MT_SECURE); \
+ mmap_add(juno_mmap); \
+ init_xlat_tables(); \
+ \
+ enable_mmu_el##_el(0); \
+ }
+#endif
/* Define EL1 and EL3 variants of the function initialising the MMU */
DEFINE_CONFIGURE_MMU_EL(1)
DEFINE_CONFIGURE_MMU_EL(3)
diff --git a/plat/juno/bl1_plat_setup.c b/plat/juno/bl1_plat_setup.c
index e27e3948..23e8592b 100644
--- a/plat/juno/bl1_plat_setup.c
+++ b/plat/juno/bl1_plat_setup.c
@@ -41,6 +41,7 @@
#include "juno_def.h"
#include "juno_private.h"
+#if USE_COHERENT_MEM
/*******************************************************************************
* Declarations of linker defined symbols which will help us find the layout
* of trusted RAM
@@ -57,6 +58,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL1_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL1_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
/* Data structure which holds the extents of the trusted RAM for BL1 */
static meminfo_t bl1_tzram_layout;
@@ -189,9 +191,12 @@ void bl1_plat_arch_setup(void)
configure_mmu_el3(bl1_tzram_layout.total_base,
bl1_tzram_layout.total_size,
TZROM_BASE,
- TZROM_BASE + TZROM_SIZE,
- BL1_COHERENT_RAM_BASE,
- BL1_COHERENT_RAM_LIMIT);
+ TZROM_BASE + TZROM_SIZE
+#if USE_COHERENT_MEM
+ , BL1_COHERENT_RAM_BASE,
+ BL1_COHERENT_RAM_LIMIT
+#endif
+ );
}
/*******************************************************************************
diff --git a/plat/juno/bl2_plat_setup.c b/plat/juno/bl2_plat_setup.c
index 900a587f..8e7b2a0a 100644
--- a/plat/juno/bl2_plat_setup.c
+++ b/plat/juno/bl2_plat_setup.c
@@ -47,8 +47,10 @@
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
+#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
+#endif
/*
* The next 2 constants identify the extents of the code & RO data region.
@@ -59,6 +61,7 @@ extern unsigned long __COHERENT_RAM_END__;
#define BL2_RO_BASE (unsigned long)(&__RO_START__)
#define BL2_RO_LIMIT (unsigned long)(&__RO_END__)
+#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@@ -68,11 +71,11 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL2_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL2_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
/* Data structure which holds the extents of the trusted RAM for BL2 */
static meminfo_t bl2_tzram_layout
-__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE),
- section("tzfw_coherent_mem")));
+__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE)));
/*******************************************************************************
* Structure which holds the arguments which need to be passed to BL3-1
@@ -194,9 +197,12 @@ void bl2_plat_arch_setup(void)
configure_mmu_el1(bl2_tzram_layout.total_base,
bl2_tzram_layout.total_size,
BL2_RO_BASE,
- BL2_RO_LIMIT,
- BL2_COHERENT_RAM_BASE,
- BL2_COHERENT_RAM_LIMIT);
+ BL2_RO_LIMIT
+#if USE_COHERENT_MEM
+ , BL2_COHERENT_RAM_BASE,
+ BL2_COHERENT_RAM_LIMIT
+#endif
+ );
}
/*******************************************************************************
diff --git a/plat/juno/bl31_plat_setup.c b/plat/juno/bl31_plat_setup.c
index c4504622..ad8ea435 100644
--- a/plat/juno/bl31_plat_setup.c
+++ b/plat/juno/bl31_plat_setup.c
@@ -48,19 +48,25 @@
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
+extern unsigned long __BL31_END__;
+#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
+#endif
/*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned. It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
+ * The next 3 constants identify the extents of the code, RO data region and the
+ * limit of the BL3-1 image. These addresses are used by the MMU setup code and
+ * therefore they must be page-aligned. It is the responsibility of the linker
+ * script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols
+ * refer to page-aligned addresses.
*/
#define BL31_RO_BASE (unsigned long)(&__RO_START__)
#define BL31_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL31_END (unsigned long)(&__BL31_END__)
+#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@@ -70,6 +76,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
/******************************************************************************
* Placeholder variables for copying the arguments that have been passed to
@@ -178,9 +185,13 @@ void bl31_platform_setup(void)
void bl31_plat_arch_setup()
{
configure_mmu_el3(BL31_RO_BASE,
- BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE,
+ (BL31_END - BL31_RO_BASE),
BL31_RO_BASE,
- BL31_RO_LIMIT,
+ BL31_RO_LIMIT
+#if USE_COHERENT_MEM
+ ,
BL31_COHERENT_RAM_BASE,
- BL31_COHERENT_RAM_LIMIT);
+ BL31_COHERENT_RAM_LIMIT
+#endif
+ );
}
diff --git a/plat/juno/juno_private.h b/plat/juno/juno_private.h
index b7ef4488..70439e8b 100644
--- a/plat/juno/juno_private.h
+++ b/plat/juno/juno_private.h
@@ -134,15 +134,21 @@ unsigned int platform_get_core_pos(unsigned long mpidr);
void configure_mmu_el1(unsigned long total_base,
unsigned long total_size,
unsigned long ro_start,
- unsigned long ro_limit,
- unsigned long coh_start,
- unsigned long coh_limit);
+ unsigned long ro_limit
+#if USE_COHERENT_MEM
+ , unsigned long coh_start,
+ unsigned long coh_limit
+#endif
+ );
void configure_mmu_el3(unsigned long total_base,
unsigned long total_size,
unsigned long ro_start,
- unsigned long ro_limit,
- unsigned long coh_start,
- unsigned long coh_limit);
+ unsigned long ro_limit
+#if USE_COHERENT_MEM
+ , unsigned long coh_start,
+ unsigned long coh_limit
+#endif
+ );
void plat_report_exception(unsigned long type);
unsigned long plat_get_ns_image_entrypoint(void);
unsigned long platform_get_stack(unsigned long mpidr);
diff --git a/plat/juno/tsp/tsp_plat_setup.c b/plat/juno/tsp/tsp_plat_setup.c
index 0a9d4cbe..8293a132 100644
--- a/plat/juno/tsp/tsp_plat_setup.c
+++ b/plat/juno/tsp/tsp_plat_setup.c
@@ -40,19 +40,25 @@
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
+extern unsigned long __BL32_END__;
+#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
+#endif
/*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned. It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
+ * The next 3 constants identify the extents of the code, RO data region and the
+ * limit of the BL3-2 image. These addresses are used by the MMU setup code and
+ * therefore they must be page-aligned. It is the responsibility of the linker
+ * script to ensure that __RO_START__, __RO_END__ & __BL32_END__ linker symbols
+ * refer to page-aligned addresses.
*/
#define BL32_RO_BASE (unsigned long)(&__RO_START__)
#define BL32_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL32_END (unsigned long)(&__BL32_END__)
+#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@@ -62,6 +68,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
/*******************************************************************************
* Initialize the UART
@@ -90,9 +97,12 @@ void tsp_platform_setup(void)
void tsp_plat_arch_setup(void)
{
configure_mmu_el1(BL32_RO_BASE,
- BL32_COHERENT_RAM_LIMIT - BL32_RO_BASE,
+ (BL32_END - BL32_RO_BASE),
BL32_RO_BASE,
- BL32_RO_LIMIT,
- BL32_COHERENT_RAM_BASE,
- BL32_COHERENT_RAM_LIMIT);
+ BL32_RO_LIMIT
+#if USE_COHERENT_MEM
+ , BL32_COHERENT_RAM_BASE,
+ BL32_COHERENT_RAM_LIMIT
+#endif
+ );
}
diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c
index c984e9ec..0a1cdf9e 100644
--- a/services/std_svc/psci/psci_common.c
+++ b/services/std_svc/psci/psci_common.c
@@ -51,7 +51,10 @@ const spd_pm_ops_t *psci_spd_pm;
* corresponds to an affinity instance e.g. cluster, cpu within an mpidr
******************************************************************************/
aff_map_node_t psci_aff_map[PSCI_NUM_AFFS]
-__attribute__ ((section("tzfw_coherent_mem")));
+#if USE_COHERENT_MEM
+__attribute__ ((section("tzfw_coherent_mem")))
+#endif
+;
/*******************************************************************************
* Pointer to functions exported by the platform to complete power mgmt. ops
@@ -352,6 +355,10 @@ int psci_save_ns_entry(uint64_t mpidr,
******************************************************************************/
unsigned short psci_get_state(aff_map_node_t *node)
{
+#if !USE_COHERENT_MEM
+ flush_dcache_range((uint64_t) node, sizeof(*node));
+#endif
+
assert(node->level >= MPIDR_AFFLVL0 && node->level <= MPIDR_MAX_AFFLVL);
/* A cpu node just contains the state which can be directly returned */
@@ -409,6 +416,10 @@ void psci_set_state(aff_map_node_t *node, unsigned short state)
node->state &= ~(PSCI_STATE_MASK << PSCI_STATE_SHIFT);
node->state |= (state & PSCI_STATE_MASK) << PSCI_STATE_SHIFT;
}
+
+#if !USE_COHERENT_MEM
+ flush_dcache_range((uint64_t) node, sizeof(*node));
+#endif
}
/*******************************************************************************
diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c
index a750256e..be504e81 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/services/std_svc/psci/psci_setup.c
@@ -331,13 +331,20 @@ int32_t psci_setup(void)
afflvl);
}
+#if !USE_COHERENT_MEM
+ /*
+ * The psci_aff_map only needs flushing when it's not allocated in
+ * coherent memory.
+ */
+ flush_dcache_range((uint64_t) &psci_aff_map, sizeof(psci_aff_map));
+#endif
+
/*
* Set the bounds for the affinity counts of each level in the map. Also
* flush out the entire array so that it's visible to subsequent power
- * management operations. The 'psci_aff_map' array is allocated in
- * coherent memory so does not need flushing. The 'psci_aff_limits'
- * array is allocated in normal memory. It will be accessed when the mmu
- * is off e.g. after reset. Hence it needs to be flushed.
+ * management operations. The 'psci_aff_limits' array is allocated in
+ * normal memory. It will be accessed when the mmu is off e.g. after
+ * reset. Hence it needs to be flushed.
*/
for (afflvl = MPIDR_AFFLVL0; afflvl < max_afflvl; afflvl++) {
psci_aff_limits[afflvl].min =