summaryrefslogtreecommitdiff
path: root/arch/arc
diff options
context:
space:
mode:
authorMarcel Ziswiler <marcel.ziswiler@toradex.com>2019-03-28 16:27:49 +0100
committerMarcel Ziswiler <marcel.ziswiler@toradex.com>2019-03-28 16:27:49 +0100
commitd899927728beca8357a5b4120b690cb3c1d80844 (patch)
treeccb170439cc8638d71f6120ae08a6faded46db98 /arch/arc
parent8d60367808c45e33c0a9127621f4e5fc34914f6b (diff)
parent0a8ab17689e628c84a666195bfc6ab85d11cf057 (diff)
Diffstat (limited to 'arch/arc')
-rw-r--r--arch/arc/configs/zebu_hs_defconfig1
-rw-r--r--arch/arc/configs/zebu_hs_smp_defconfig1
-rw-r--r--arch/arc/include/asm/bitops.h6
-rw-r--r--arch/arc/include/asm/cache.h11
-rw-r--r--arch/arc/include/asm/io.h72
-rw-r--r--arch/arc/include/asm/perf_event.h3
-rw-r--r--arch/arc/include/asm/uaccess.h8
-rw-r--r--arch/arc/kernel/head.S14
-rw-r--r--arch/arc/kernel/setup.c87
-rw-r--r--arch/arc/lib/memcpy-archs.S14
-rw-r--r--arch/arc/lib/memset-archs.S40
11 files changed, 198 insertions, 59 deletions
diff --git a/arch/arc/configs/zebu_hs_defconfig b/arch/arc/configs/zebu_hs_defconfig
index 9f6166be7145..3346829b02bb 100644
--- a/arch/arc/configs/zebu_hs_defconfig
+++ b/arch/arc/configs/zebu_hs_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
CONFIG_EXPERT=y
CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
diff --git a/arch/arc/configs/zebu_hs_smp_defconfig b/arch/arc/configs/zebu_hs_smp_defconfig
index 44e9693f4257..4471f9c37d7e 100644
--- a/arch/arc/configs/zebu_hs_smp_defconfig
+++ b/arch/arc/configs/zebu_hs_smp_defconfig
@@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 8da87feec59a..99e6d8948f4a 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -340,7 +340,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
/*
* __ffs: Similar to ffs, but zero based (0-31)
*/
-static inline __attribute__ ((const)) int __ffs(unsigned long word)
+static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
{
if (!word)
return word;
@@ -400,9 +400,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x)
/*
* __ffs: Similar to ffs, but zero based (0-31)
*/
-static inline __attribute__ ((const)) int __ffs(unsigned long x)
+static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
{
- int n;
+ unsigned long n;
asm volatile(
" ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 4fd6272e6c01..c5816a224571 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -49,6 +49,17 @@
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+/*
+ * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
+ * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
+ * alignment for any atomic64_t embedded in buffer.
+ * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
+ * value of 4 (and not 8) in ARC ABI.
+ */
+#if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
+#define ARCH_SLAB_MINALIGN 8
+#endif
+
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index c22b181e8206..2f39d9b3886e 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <asm/byteorder.h>
#include <asm/page.h>
+#include <asm/unaligned.h>
#ifdef CONFIG_ISA_ARCV2
#include <asm/barrier.h>
@@ -94,6 +95,42 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
return w;
}
+/*
+ * {read,write}s{b,w,l}() repeatedly access the same IO address in
+ * native endianness in 8-, 16-, 32-bit chunks {into,from} memory,
+ * @count times
+ */
+#define __raw_readsx(t,f) \
+static inline void __raw_reads##f(const volatile void __iomem *addr, \
+ void *ptr, unsigned int count) \
+{ \
+ bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \
+ u##t *buf = ptr; \
+ \
+ if (!count) \
+ return; \
+ \
+ /* Some ARC CPU's don't support unaligned accesses */ \
+ if (is_aligned) { \
+ do { \
+ u##t x = __raw_read##f(addr); \
+ *buf++ = x; \
+ } while (--count); \
+ } else { \
+ do { \
+ u##t x = __raw_read##f(addr); \
+ put_unaligned(x, buf++); \
+ } while (--count); \
+ } \
+}
+
+#define __raw_readsb __raw_readsb
+__raw_readsx(8, b)
+#define __raw_readsw __raw_readsw
+__raw_readsx(16, w)
+#define __raw_readsl __raw_readsl
+__raw_readsx(32, l)
+
#define __raw_writeb __raw_writeb
static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
{
@@ -126,6 +163,35 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
}
+#define __raw_writesx(t,f) \
+static inline void __raw_writes##f(volatile void __iomem *addr, \
+ const void *ptr, unsigned int count) \
+{ \
+ bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \
+ const u##t *buf = ptr; \
+ \
+ if (!count) \
+ return; \
+ \
+ /* Some ARC CPU's don't support unaligned accesses */ \
+ if (is_aligned) { \
+ do { \
+ __raw_write##f(*buf++, addr); \
+ } while (--count); \
+ } else { \
+ do { \
+ __raw_write##f(get_unaligned(buf++), addr); \
+ } while (--count); \
+ } \
+}
+
+#define __raw_writesb __raw_writesb
+__raw_writesx(8, b)
+#define __raw_writesw __raw_writesw
+__raw_writesx(16, w)
+#define __raw_writesl __raw_writesl
+__raw_writesx(32, l)
+
/*
* MMIO can also get buffered/optimized in micro-arch, so barriers needed
* Based on ARM model for the typical use case
@@ -141,10 +207,16 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; })
#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+#define readsb(p,d,l) ({ __raw_readsb(p,d,l); __iormb(); })
+#define readsw(p,d,l) ({ __raw_readsw(p,d,l); __iormb(); })
+#define readsl(p,d,l) ({ __raw_readsl(p,d,l); __iormb(); })
#define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); })
#define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); })
#define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); })
+#define writesb(p,d,l) ({ __iowmb(); __raw_writesb(p,d,l); })
+#define writesw(p,d,l) ({ __iowmb(); __raw_writesw(p,d,l); })
+#define writesl(p,d,l) ({ __iowmb(); __raw_writesl(p,d,l); })
/*
* Relaxed API for drivers which can handle barrier ordering themselves
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index 9185541035cc..6958545390f0 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = {
/* counts condition */
[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
+ /* All jump instructions that are taken */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
[PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
#ifdef CONFIG_ISA_ARCV2
[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 0684fd2f42e8..f82393f89215 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -209,7 +209,7 @@ __arc_copy_from_user(void *to, const void __user *from, unsigned long n)
*/
"=&r" (tmp), "+r" (to), "+r" (from)
:
- : "lp_count", "lp_start", "lp_end", "memory");
+ : "lp_count", "memory");
return n;
}
@@ -438,7 +438,7 @@ __arc_copy_to_user(void __user *to, const void *from, unsigned long n)
*/
"=&r" (tmp), "+r" (to), "+r" (from)
:
- : "lp_count", "lp_start", "lp_end", "memory");
+ : "lp_count", "memory");
return n;
}
@@ -658,7 +658,7 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
" .previous \n"
: "+r"(d_char), "+r"(res)
: "i"(0)
- : "lp_count", "lp_start", "lp_end", "memory");
+ : "lp_count", "memory");
return res;
}
@@ -691,7 +691,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
" .previous \n"
: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
: "g"(-EFAULT), "r"(count)
- : "lp_count", "lp_start", "lp_end", "memory");
+ : "lp_count", "memory");
return res;
}
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 8b90d25a15cc..1f945d0f40da 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -17,6 +17,7 @@
#include <asm/entry.h>
#include <asm/arcregs.h>
#include <asm/cache.h>
+#include <asm/irqflags.h>
.macro CPU_EARLY_SETUP
@@ -47,6 +48,15 @@
sr r5, [ARC_REG_DC_CTRL]
1:
+
+#ifdef CONFIG_ISA_ARCV2
+ ; Unaligned access is disabled at reset, so re-enable early as
+ ; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access
+ ; by default
+ lr r5, [status32]
+ bset r5, r5, STATUS_AD_BIT
+ kflag r5
+#endif
.endm
.section .init.text, "ax",@progbits
@@ -93,9 +103,9 @@ ENTRY(stext)
#ifdef CONFIG_ARC_UBOOT_SUPPORT
; Uboot - kernel ABI
; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
- ; r1 = magic number (board identity, unused as of now
+ ; r1 = magic number (always zero as of now)
; r2 = pointer to uboot provided cmdline or external DTB in mem
- ; These are handled later in setup_arch()
+ ; These are handled later in handle_uboot_args()
st r0, [@uboot_tag]
st r2, [@uboot_arg]
#endif
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 0385df77a697..9119bea503a7 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -381,43 +381,80 @@ void setup_processor(void)
arc_chk_core_config();
}
-static inline int is_kernel(unsigned long addr)
+static inline bool uboot_arg_invalid(unsigned long addr)
{
- if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
- return 1;
- return 0;
+ /*
+ * Check that it is a untranslated address (although MMU is not enabled
+ * yet, it being a high address ensures this is not by fluke)
+ */
+ if (addr < PAGE_OFFSET)
+ return true;
+
+ /* Check that address doesn't clobber resident kernel image */
+ return addr >= (unsigned long)_stext && addr <= (unsigned long)_end;
}
-void __init setup_arch(char **cmdline_p)
+#define IGNORE_ARGS "Ignore U-boot args: "
+
+/* uboot_tag values for U-boot - kernel ABI revision 0; see head.S */
+#define UBOOT_TAG_NONE 0
+#define UBOOT_TAG_CMDLINE 1
+#define UBOOT_TAG_DTB 2
+
+void __init handle_uboot_args(void)
{
+ bool use_embedded_dtb = true;
+ bool append_cmdline = false;
+
#ifdef CONFIG_ARC_UBOOT_SUPPORT
- /* make sure that uboot passed pointer to cmdline/dtb is valid */
- if (uboot_tag && is_kernel((unsigned long)uboot_arg))
- panic("Invalid uboot arg\n");
+ /* check that we know this tag */
+ if (uboot_tag != UBOOT_TAG_NONE &&
+ uboot_tag != UBOOT_TAG_CMDLINE &&
+ uboot_tag != UBOOT_TAG_DTB) {
+ pr_warn(IGNORE_ARGS "invalid uboot tag: '%08x'\n", uboot_tag);
+ goto ignore_uboot_args;
+ }
+
+ if (uboot_tag != UBOOT_TAG_NONE &&
+ uboot_arg_invalid((unsigned long)uboot_arg)) {
+ pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg);
+ goto ignore_uboot_args;
+ }
+
+ /* see if U-boot passed an external Device Tree blob */
+ if (uboot_tag == UBOOT_TAG_DTB) {
+ machine_desc = setup_machine_fdt((void *)uboot_arg);
- /* See if u-boot passed an external Device Tree blob */
- machine_desc = setup_machine_fdt(uboot_arg); /* uboot_tag == 2 */
- if (!machine_desc)
+ /* external Device Tree blob is invalid - use embedded one */
+ use_embedded_dtb = !machine_desc;
+ }
+
+ if (uboot_tag == UBOOT_TAG_CMDLINE)
+ append_cmdline = true;
+
+ignore_uboot_args:
#endif
- {
- /* No, so try the embedded one */
+
+ if (use_embedded_dtb) {
machine_desc = setup_machine_fdt(__dtb_start);
if (!machine_desc)
panic("Embedded DT invalid\n");
+ }
- /*
- * If we are here, it is established that @uboot_arg didn't
- * point to DT blob. Instead if u-boot says it is cmdline,
- * append to embedded DT cmdline.
- * setup_machine_fdt() would have populated @boot_command_line
- */
- if (uboot_tag == 1) {
- /* Ensure a whitespace between the 2 cmdlines */
- strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
- strlcat(boot_command_line, uboot_arg,
- COMMAND_LINE_SIZE);
- }
+ /*
+ * NOTE: @boot_command_line is populated by setup_machine_fdt() so this
+ * append processing can only happen after.
+ */
+ if (append_cmdline) {
+ /* Ensure a whitespace between the 2 cmdlines */
+ strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+ strlcat(boot_command_line, uboot_arg, COMMAND_LINE_SIZE);
}
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+ handle_uboot_args();
/* Save unparsed command line copy for /proc/cmdline */
*cmdline_p = boot_command_line;
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S
index d61044dd8b58..ea14b0bf3116 100644
--- a/arch/arc/lib/memcpy-archs.S
+++ b/arch/arc/lib/memcpy-archs.S
@@ -25,15 +25,11 @@
#endif
#ifdef CONFIG_ARC_HAS_LL64
-# define PREFETCH_READ(RX) prefetch [RX, 56]
-# define PREFETCH_WRITE(RX) prefetchw [RX, 64]
# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
# define ZOLSHFT 5
# define ZOLAND 0x1F
#else
-# define PREFETCH_READ(RX) prefetch [RX, 28]
-# define PREFETCH_WRITE(RX) prefetchw [RX, 32]
# define LOADX(DST,RX) ld.ab DST, [RX, 4]
# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
# define ZOLSHFT 4
@@ -41,8 +37,6 @@
#endif
ENTRY_CFI(memcpy)
- prefetch [r1] ; Prefetch the read location
- prefetchw [r0] ; Prefetch the write location
mov.f 0, r2
;;; if size is zero
jz.d [blink]
@@ -72,8 +66,6 @@ ENTRY_CFI(memcpy)
lpnz @.Lcopy32_64bytes
;; LOOP START
LOADX (r6, r1)
- PREFETCH_READ (r1)
- PREFETCH_WRITE (r3)
LOADX (r8, r1)
LOADX (r10, r1)
LOADX (r4, r1)
@@ -117,9 +109,7 @@ ENTRY_CFI(memcpy)
lpnz @.Lcopy8bytes_1
;; LOOP START
ld.ab r6, [r1, 4]
- prefetch [r1, 28] ;Prefetch the next read location
ld.ab r8, [r1,4]
- prefetchw [r3, 32] ;Prefetch the next write location
SHIFT_1 (r7, r6, 24)
or r7, r7, r5
@@ -162,9 +152,7 @@ ENTRY_CFI(memcpy)
lpnz @.Lcopy8bytes_2
;; LOOP START
ld.ab r6, [r1, 4]
- prefetch [r1, 28] ;Prefetch the next read location
ld.ab r8, [r1,4]
- prefetchw [r3, 32] ;Prefetch the next write location
SHIFT_1 (r7, r6, 16)
or r7, r7, r5
@@ -204,9 +192,7 @@ ENTRY_CFI(memcpy)
lpnz @.Lcopy8bytes_3
;; LOOP START
ld.ab r6, [r1, 4]
- prefetch [r1, 28] ;Prefetch the next read location
ld.ab r8, [r1,4]
- prefetchw [r3, 32] ;Prefetch the next write location
SHIFT_1 (r7, r6, 8)
or r7, r7, r5
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index 62ad4bcb841a..f230bb7092fd 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -7,11 +7,39 @@
*/
#include <linux/linkage.h>
+#include <asm/cache.h>
-#undef PREALLOC_NOT_AVAIL
+/*
+ * The memset implementation below is optimized to use prefetchw and prealloc
+ * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
+ * If you want to implement optimized memset for other possible L1 data cache
+ * line lengths (32B and 128B) you should rewrite code carefully checking
+ * we don't call any prefetchw/prealloc instruction for L1 cache lines which
+ * don't belongs to memset area.
+ */
+
+#if L1_CACHE_SHIFT == 6
+
+.macro PREALLOC_INSTR reg, off
+ prealloc [\reg, \off]
+.endm
+
+.macro PREFETCHW_INSTR reg, off
+ prefetchw [\reg, \off]
+.endm
+
+#else
+
+.macro PREALLOC_INSTR
+.endm
+
+.macro PREFETCHW_INSTR
+.endm
+
+#endif
ENTRY_CFI(memset)
- prefetchw [r0] ; Prefetch the write location
+ PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
mov.f 0, r2
;;; if size is zero
jz.d [blink]
@@ -48,11 +76,8 @@ ENTRY_CFI(memset)
lpnz @.Lset64bytes
;; LOOP START
-#ifdef PREALLOC_NOT_AVAIL
- prefetchw [r3, 64] ;Prefetch the next write location
-#else
- prealloc [r3, 64]
-#endif
+ PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
+
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
@@ -85,7 +110,6 @@ ENTRY_CFI(memset)
lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
lpnz .Lset32bytes
;; LOOP START
- prefetchw [r3, 32] ;Prefetch the next write location
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]