summaryrefslogtreecommitdiff
path: root/arch/arm/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/cache-l2x0.c48
-rw-r--r--arch/arm/mm/cache-tauros3.h41
-rw-r--r--arch/arm/mm/cache-v7.S14
-rw-r--r--arch/arm/mm/context.c41
-rw-r--r--arch/arm/mm/dma-mapping.c88
-rw-r--r--arch/arm/mm/flush.c6
-rw-r--r--arch/arm/mm/mmap.c2
-rw-r--r--arch/arm/mm/pgd.c5
8 files changed, 160 insertions, 85 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 447da6ffadd5..7abde2ce8973 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -25,6 +25,7 @@
#include <asm/cacheflush.h>
#include <asm/hardware/cache-l2x0.h>
+#include "cache-tauros3.h"
#include "cache-aurora-l2.h"
#define CACHE_LINE_SIZE 32
@@ -767,6 +768,14 @@ static void aurora_save(void)
l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
}
+static void __init tauros3_save(void)
+{
+ l2x0_saved_regs.aux2_ctrl =
+ readl_relaxed(l2x0_base + TAUROS3_AUX2_CTRL);
+ l2x0_saved_regs.prefetch_ctrl =
+ readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL);
+}
+
static void l2x0_resume(void)
{
if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
@@ -821,6 +830,18 @@ static void aurora_resume(void)
}
}
+static void tauros3_resume(void)
+{
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ writel_relaxed(l2x0_saved_regs.aux2_ctrl,
+ l2x0_base + TAUROS3_AUX2_CTRL);
+ writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+ l2x0_base + L2X0_PREFETCH_CTRL);
+ }
+
+ l2x0_resume();
+}
+
static void __init aurora_broadcast_l2_commands(void)
{
__u32 u;
@@ -906,6 +927,15 @@ static const struct l2x0_of_data aurora_no_outer_data = {
},
};
+static const struct l2x0_of_data tauros3_data = {
+ .setup = NULL,
+ .save = tauros3_save,
+ /* Tauros3 broadcasts L1 cache operations to L2 */
+ .outer_cache = {
+ .resume = tauros3_resume,
+ },
+};
+
static const struct l2x0_of_data bcm_l2x0_data = {
.setup = pl310_of_setup,
.save = pl310_save,
@@ -922,17 +952,19 @@ static const struct l2x0_of_data bcm_l2x0_data = {
};
static const struct of_device_id l2x0_ids[] __initconst = {
- { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
- { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
- { .compatible = "marvell,aurora-system-cache",
- .data = (void *)&aurora_no_outer_data},
- { .compatible = "marvell,aurora-outer-cache",
- .data = (void *)&aurora_with_outer_data},
- { .compatible = "brcm,bcm11351-a2-pl310-cache",
- .data = (void *)&bcm_l2x0_data},
+ { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
+ { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
{ .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */
.data = (void *)&bcm_l2x0_data},
+ { .compatible = "brcm,bcm11351-a2-pl310-cache",
+ .data = (void *)&bcm_l2x0_data},
+ { .compatible = "marvell,aurora-outer-cache",
+ .data = (void *)&aurora_with_outer_data},
+ { .compatible = "marvell,aurora-system-cache",
+ .data = (void *)&aurora_no_outer_data},
+ { .compatible = "marvell,tauros3-cache",
+ .data = (void *)&tauros3_data },
{}
};
diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h
new file mode 100644
index 000000000000..02c0a97cbc02
--- /dev/null
+++ b/arch/arm/mm/cache-tauros3.h
@@ -0,0 +1,41 @@
+/*
+ * Marvell Tauros3 cache controller includes
+ *
+ * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ *
+ * based on GPL'ed 2.6 kernel sources
+ * (c) Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_HARDWARE_TAUROS3_H
+#define __ASM_ARM_HARDWARE_TAUROS3_H
+
+/*
+ * Marvell Tauros3 L2CC is compatible with PL310 r0p0
+ * but with PREFETCH_CTRL (r2p0) and an additional event counter.
+ * Also, there is AUX2_CTRL for some Marvell specific control.
+ */
+
+#define TAUROS3_EVENT_CNT2_CFG 0x224
+#define TAUROS3_EVENT_CNT2_VAL 0x228
+#define TAUROS3_INV_ALL 0x780
+#define TAUROS3_CLEAN_ALL 0x784
+#define TAUROS3_AUX2_CTRL 0x820
+
+/* Registers shifts and masks */
+#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN (1 << 2)
+
+#endif
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index b5c467a65c27..778bcf88ee79 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -146,18 +146,18 @@ flush_levels:
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size
loop1:
- mov r9, r4 @ create working copy of max way size
+ mov r9, r7 @ create working copy of max index
loop2:
- ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11
- THUMB( lsl r6, r9, r5 )
+ ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11
+ THUMB( lsl r6, r4, r5 )
THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
- ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11
- THUMB( lsl r6, r7, r2 )
+ ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11
+ THUMB( lsl r6, r9, r2 )
THUMB( orr r11, r11, r6 ) @ factor index number into r11
mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
- subs r9, r9, #1 @ decrement the way
+ subs r9, r9, #1 @ decrement the index
bge loop2
- subs r7, r7, #1 @ decrement the index
+ subs r4, r4, #1 @ decrement the way
bge loop1
skip:
add r10, r10, #2 @ increment cache number
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 84e6f772e204..6eb97b3a7481 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -36,8 +36,8 @@
* The context ID is used by debuggers and trace logic, and
* should be unique within all running processes.
*
- * In big endian operation, the two 32 bit words are swapped if accesed by
- * non 64-bit operations.
+ * In big endian operation, the two 32 bit words are swapped if accessed
+ * by non-64-bit operations.
*/
#define ASID_FIRST_VERSION (1ULL << ASID_BITS)
#define NUM_USER_ASIDS ASID_FIRST_VERSION
@@ -78,20 +78,21 @@ void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
#endif
#ifdef CONFIG_ARM_LPAE
-static void cpu_set_reserved_ttbr0(void)
-{
- /*
- * Set TTBR0 to swapper_pg_dir which contains only global entries. The
- * ASID is set to 0.
- */
- cpu_set_ttbr(0, __pa(swapper_pg_dir));
- isb();
-}
+/*
+ * With LPAE, the ASID and page tables are updated atomicly, so there is
+ * no need for a reserved set of tables (the active ASID tracking prevents
+ * any issues across a rollover).
+ */
+#define cpu_set_reserved_ttbr0()
#else
static void cpu_set_reserved_ttbr0(void)
{
u32 ttb;
- /* Copy TTBR1 into TTBR0 */
+ /*
+ * Copy TTBR1 into TTBR0.
+ * This points at swapper_pg_dir, which contains only global
+ * entries so any speculative walks are perfectly safe.
+ */
asm volatile(
" mrc p15, 0, %0, c2, c0, 1 @ read TTBR1\n"
" mcr p15, 0, %0, c2, c0, 0 @ set TTBR0\n"
@@ -179,6 +180,7 @@ static int is_reserved_asid(u64 asid)
static u64 new_context(struct mm_struct *mm, unsigned int cpu)
{
+ static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
u64 generation = atomic64_read(&asid_generation);
@@ -193,10 +195,13 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
* Allocate a free ASID. If we can't find one, take a
* note of the currently active ASIDs and mark the TLBs
* as requiring flushes. We always count from ASID #1,
- * as we reserve ASID #0 to switch via TTBR0 and indicate
- * rollover events.
+ * as we reserve ASID #0 to switch via TTBR0 and to
+ * avoid speculative page table walks from hitting in
+ * any partial walk caches, which could be populated
+ * from overlapping level-1 descriptors used to map both
+ * the module area and the userspace stack.
*/
- asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
if (asid == NUM_USER_ASIDS) {
generation = atomic64_add_return(ASID_FIRST_VERSION,
&asid_generation);
@@ -204,6 +209,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
}
__set_bit(asid, asid_map);
+ cur_idx = asid;
asid |= generation;
cpumask_clear(mm_cpumask(mm));
}
@@ -221,8 +227,9 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
__check_vmalloc_seq(mm);
/*
- * Required during context switch to avoid speculative page table
- * walking with the wrong TTBR.
+ * We cannot update the pgd and the ASID atomicly with classic
+ * MMU, so switch exclusively to global mappings to avoid
+ * speculative page table walking with the wrong TTBR.
*/
cpu_set_reserved_ttbr0();
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index f0ea0134e5a3..1a77450e728a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -9,6 +9,7 @@
*
* DMA uncached mapping support.
*/
+#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/gfp.h>
@@ -157,6 +158,44 @@ struct dma_map_ops arm_coherent_dma_ops = {
};
EXPORT_SYMBOL(arm_coherent_dma_ops);
+static int __dma_supported(struct device *dev, u64 mask, bool warn)
+{
+ unsigned long max_dma_pfn;
+
+ /*
+ * If the mask allows for more memory than we can address,
+ * and we actually have that much memory, then we must
+ * indicate that DMA to this device is not supported.
+ */
+ if (sizeof(mask) != sizeof(dma_addr_t) &&
+ mask > (dma_addr_t)~0 &&
+ dma_to_pfn(dev, ~0) < max_pfn) {
+ if (warn) {
+ dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
+ mask);
+ dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n");
+ }
+ return 0;
+ }
+
+ max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
+
+ /*
+ * Translate the device's DMA mask to a PFN limit. This
+ * PFN number includes the page which we can DMA to.
+ */
+ if (dma_to_pfn(dev, mask) < max_dma_pfn) {
+ if (warn)
+ dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n",
+ mask,
+ dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1,
+ max_dma_pfn + 1);
+ return 0;
+ }
+
+ return 1;
+}
+
static u64 get_coherent_dma_mask(struct device *dev)
{
u64 mask = (u64)DMA_BIT_MASK(32);
@@ -173,32 +212,8 @@ static u64 get_coherent_dma_mask(struct device *dev)
return 0;
}
- /*
- * If the mask allows for more memory than we can address,
- * and we actually have that much memory, then fail the
- * allocation.
- */
- if (sizeof(mask) != sizeof(dma_addr_t) &&
- mask > (dma_addr_t)~0 &&
- dma_to_pfn(dev, ~0) > arm_dma_pfn_limit) {
- dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
- mask);
- dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n");
- return 0;
- }
-
- /*
- * Now check that the mask, when translated to a PFN,
- * fits within the allowable addresses which we can
- * allocate.
- */
- if (dma_to_pfn(dev, mask) < arm_dma_pfn_limit) {
- dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n",
- mask,
- dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1,
- arm_dma_pfn_limit + 1);
+ if (!__dma_supported(dev, mask, true))
return 0;
- }
}
return mask;
@@ -1027,28 +1042,7 @@ void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
*/
int dma_supported(struct device *dev, u64 mask)
{
- unsigned long limit;
-
- /*
- * If the mask allows for more memory than we can address,
- * and we actually have that much memory, then we must
- * indicate that DMA to this device is not supported.
- */
- if (sizeof(mask) != sizeof(dma_addr_t) &&
- mask > (dma_addr_t)~0 &&
- dma_to_pfn(dev, ~0) > arm_dma_pfn_limit)
- return 0;
-
- /*
- * Translate the device's DMA mask to a PFN limit. This
- * PFN number includes the page which we can DMA to.
- */
- limit = dma_to_pfn(dev, mask);
-
- if (limit < arm_dma_pfn_limit)
- return 0;
-
- return 1;
+ return __dma_supported(dev, mask, false);
}
EXPORT_SYMBOL(dma_supported);
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 6d5ba9afb16a..3387e60e4ea3 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -175,16 +175,16 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
unsigned long i;
if (cache_is_vipt_nonaliasing()) {
for (i = 0; i < (1 << compound_order(page)); i++) {
- void *addr = kmap_atomic(page);
+ void *addr = kmap_atomic(page + i);
__cpuc_flush_dcache_area(addr, PAGE_SIZE);
kunmap_atomic(addr);
}
} else {
for (i = 0; i < (1 << compound_order(page)); i++) {
- void *addr = kmap_high_get(page);
+ void *addr = kmap_high_get(page + i);
if (addr) {
__cpuc_flush_dcache_area(addr, PAGE_SIZE);
- kunmap_high(page);
+ kunmap_high(page + i);
}
}
}
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index d27158c38eb0..5e85ed371364 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -146,7 +146,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
- info.low_limit = PAGE_SIZE;
+ info.low_limit = FIRST_USER_ADDRESS;
info.high_limit = mm->mmap_base;
info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
info.align_offset = pgoff << PAGE_SHIFT;
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 0acb089d0f70..249379535be2 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -23,7 +23,7 @@
#define __pgd_alloc() kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL)
#define __pgd_free(pgd) kfree(pgd)
#else
-#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL, 2)
+#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2)
#define __pgd_free(pgd) free_pages((unsigned long)pgd, 2)
#endif
@@ -87,7 +87,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
init_pud = pud_offset(init_pgd, 0);
init_pmd = pmd_offset(init_pud, 0);
init_pte = pte_offset_map(init_pmd, 0);
- set_pte_ext(new_pte, *init_pte, 0);
+ set_pte_ext(new_pte + 0, init_pte[0], 0);
+ set_pte_ext(new_pte + 1, init_pte[1], 0);
pte_unmap(init_pte);
pte_unmap(new_pte);
}