summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/crypto/aes-glue.c2
-rw-r--r--arch/arm64/crypto/aes-modes.S88
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h16
-rw-r--r--arch/arm64/include/asm/percpu.h120
-rw-r--r--arch/arm64/include/asm/spinlock.h10
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h1
-rw-r--r--arch/arm64/kernel/head.S3
-rw-r--r--arch/arm64/kernel/ptrace.c11
8 files changed, 131 insertions, 120 deletions
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 05d9e16c0dfd..6a51dfccfe71 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -211,7 +211,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
- if (nbytes) {
+ if (walk.nbytes % AES_BLOCK_SIZE) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
u8 __aligned(8) tail[AES_BLOCK_SIZE];
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index f6e372c528eb..94b5669ee4d3 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt)
cbz w6, .Lcbcencloop
ld1 {v0.16b}, [x5] /* get iv */
- enc_prepare w3, x2, x5
+ enc_prepare w3, x2, x6
.Lcbcencloop:
ld1 {v1.16b}, [x1], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
- encrypt_block v0, w3, x2, x5, w6
+ encrypt_block v0, w3, x2, x6, w7
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcencloop
+ st1 {v0.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_encrypt)
@@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt)
cbz w6, .LcbcdecloopNx
ld1 {v7.16b}, [x5] /* get iv */
- dec_prepare w3, x2, x5
+ dec_prepare w3, x2, x6
.LcbcdecloopNx:
#if INTERLEAVE >= 2
@@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt)
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w3, x2, x5, w6
+ decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16
@@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt)
bne .Lcbcdecloop
.Lcbcdecout:
FRAME_POP
+ st1 {v7.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_decrypt)
@@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt)
AES_ENTRY(aes_ctr_encrypt)
FRAME_PUSH
- cbnz w6, .Lctrfirst /* 1st time around? */
- umov x5, v4.d[1] /* keep swabbed ctr in reg */
- rev x5, x5
-#if INTERLEAVE >= 2
- cmn w5, w4 /* 32 bit overflow? */
- bcs .Lctrinc
- add x5, x5, #1 /* increment BE ctr */
- b .LctrincNx
-#else
- b .Lctrinc
-#endif
-.Lctrfirst:
+ cbz w6, .Lctrnotfirst /* 1st time around? */
enc_prepare w3, x2, x6
ld1 {v4.16b}, [x5]
- umov x5, v4.d[1] /* keep swabbed ctr in reg */
- rev x5, x5
+
+.Lctrnotfirst:
+ umov x8, v4.d[1] /* keep swabbed ctr in reg */
+ rev x8, x8
#if INTERLEAVE >= 2
- cmn w5, w4 /* 32 bit overflow? */
+ cmn w8, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
subs w4, w4, #INTERLEAVE
@@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt)
#if INTERLEAVE == 2
mov v0.8b, v4.8b
mov v1.8b, v4.8b
- rev x7, x5
- add x5, x5, #1
+ rev x7, x8
+ add x8, x8, #1
ins v0.d[1], x7
- rev x7, x5
- add x5, x5, #1
+ rev x7, x8
+ add x8, x8, #1
ins v1.d[1], x7
ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
do_encrypt_block2x
@@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt)
st1 {v0.16b-v1.16b}, [x0], #32
#else
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
- dup v7.4s, w5
+ dup v7.4s, w8
mov v0.16b, v4.16b
add v7.4s, v7.4s, v8.4s
mov v1.16b, v4.16b
@@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt)
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64
- add x5, x5, #INTERLEAVE
+ add x8, x8, #INTERLEAVE
#endif
- cbz w4, .LctroutNx
-.LctrincNx:
- rev x7, x5
+ rev x7, x8
ins v4.d[1], x7
+ cbz w4, .Lctrout
b .LctrloopNx
-.LctroutNx:
- sub x5, x5, #1
- rev x7, x5
- ins v4.d[1], x7
- b .Lctrout
.Lctr1x:
adds w4, w4, #INTERLEAVE
beq .Lctrout
@@ -342,30 +329,39 @@ AES_ENTRY(aes_ctr_encrypt)
.Lctrloop:
mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x6, w7
+
+ adds x8, x8, #1 /* increment BE ctr */
+ rev x7, x8
+ ins v4.d[1], x7
+ bcs .Lctrcarry /* overflow? */
+
+.Lctrcarrydone:
subs w4, w4, #1
bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
- beq .Lctrout
-.Lctrinc:
- adds x5, x5, #1 /* increment BE ctr */
- rev x7, x5
- ins v4.d[1], x7
- bcc .Lctrloop /* no overflow? */
- umov x7, v4.d[0] /* load upper word of ctr */
- rev x7, x7 /* ... to handle the carry */
- add x7, x7, #1
- rev x7, x7
- ins v4.d[0], x7
- b .Lctrloop
+ bne .Lctrloop
+
+.Lctrout:
+ st1 {v4.16b}, [x5] /* return next CTR value */
+ FRAME_POP
+ ret
+
.Lctrhalfblock:
ld1 {v3.8b}, [x1]
eor v3.8b, v0.8b, v3.8b
st1 {v3.8b}, [x0]
-.Lctrout:
FRAME_POP
ret
+
+.Lctrcarry:
+ umov x7, v4.d[0] /* load upper word of ctr */
+ rev x7, x7 /* ... to handle the carry */
+ add x7, x7, #1
+ rev x7, x7
+ ins v4.d[0], x7
+ b .Lctrcarrydone
AES_ENDPROC(aes_ctr_encrypt)
.ltorg
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 3ca894ecf699..3e3c4c7a5082 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -153,11 +153,6 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
}
-static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
-{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR);
-}
-
static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
@@ -178,6 +173,17 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
}
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) ||
+ kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */
+}
+
+static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
+}
+
static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 4fde8c1df97f..9d096bc89287 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -52,48 +52,44 @@ static inline unsigned long __percpu_##op(void *ptr, \
\
switch (size) { \
case 1: \
- do { \
- asm ("//__per_cpu_" #op "_1\n" \
- "ldxrb %w[ret], %[ptr]\n" \
+ asm ("//__per_cpu_" #op "_1\n" \
+ "1: ldxrb %w[ret], %[ptr]\n" \
#asm_op " %w[ret], %w[ret], %w[val]\n" \
- "stxrb %w[loop], %w[ret], %[ptr]\n" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u8 *)ptr) \
- : [val] "Ir" (val)); \
- } while (loop); \
+ " stxrb %w[loop], %w[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b" \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u8 *)ptr) \
+ : [val] "Ir" (val)); \
break; \
case 2: \
- do { \
- asm ("//__per_cpu_" #op "_2\n" \
- "ldxrh %w[ret], %[ptr]\n" \
+ asm ("//__per_cpu_" #op "_2\n" \
+ "1: ldxrh %w[ret], %[ptr]\n" \
#asm_op " %w[ret], %w[ret], %w[val]\n" \
- "stxrh %w[loop], %w[ret], %[ptr]\n" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u16 *)ptr) \
- : [val] "Ir" (val)); \
- } while (loop); \
+ " stxrh %w[loop], %w[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b" \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u16 *)ptr) \
+ : [val] "Ir" (val)); \
break; \
case 4: \
- do { \
- asm ("//__per_cpu_" #op "_4\n" \
- "ldxr %w[ret], %[ptr]\n" \
+ asm ("//__per_cpu_" #op "_4\n" \
+ "1: ldxr %w[ret], %[ptr]\n" \
#asm_op " %w[ret], %w[ret], %w[val]\n" \
- "stxr %w[loop], %w[ret], %[ptr]\n" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u32 *)ptr) \
- : [val] "Ir" (val)); \
- } while (loop); \
+ " stxr %w[loop], %w[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b" \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u32 *)ptr) \
+ : [val] "Ir" (val)); \
break; \
case 8: \
- do { \
- asm ("//__per_cpu_" #op "_8\n" \
- "ldxr %[ret], %[ptr]\n" \
+ asm ("//__per_cpu_" #op "_8\n" \
+ "1: ldxr %[ret], %[ptr]\n" \
#asm_op " %[ret], %[ret], %[val]\n" \
- "stxr %w[loop], %[ret], %[ptr]\n" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u64 *)ptr) \
- : [val] "Ir" (val)); \
- } while (loop); \
+ " stxr %w[loop], %[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b" \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u64 *)ptr) \
+ : [val] "Ir" (val)); \
break; \
default: \
BUILD_BUG(); \
@@ -158,44 +154,40 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
switch (size) {
case 1:
- do {
- asm ("//__percpu_xchg_1\n"
- "ldxrb %w[ret], %[ptr]\n"
- "stxrb %w[loop], %w[val], %[ptr]\n"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u8 *)ptr)
- : [val] "r" (val));
- } while (loop);
+ asm ("//__percpu_xchg_1\n"
+ "1: ldxrb %w[ret], %[ptr]\n"
+ " stxrb %w[loop], %w[val], %[ptr]\n"
+ " cbnz %w[loop], 1b"
+ : [loop] "=&r"(loop), [ret] "=&r"(ret),
+ [ptr] "+Q"(*(u8 *)ptr)
+ : [val] "r" (val));
break;
case 2:
- do {
- asm ("//__percpu_xchg_2\n"
- "ldxrh %w[ret], %[ptr]\n"
- "stxrh %w[loop], %w[val], %[ptr]\n"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u16 *)ptr)
- : [val] "r" (val));
- } while (loop);
+ asm ("//__percpu_xchg_2\n"
+ "1: ldxrh %w[ret], %[ptr]\n"
+ " stxrh %w[loop], %w[val], %[ptr]\n"
+ " cbnz %w[loop], 1b"
+ : [loop] "=&r"(loop), [ret] "=&r"(ret),
+ [ptr] "+Q"(*(u16 *)ptr)
+ : [val] "r" (val));
break;
case 4:
- do {
- asm ("//__percpu_xchg_4\n"
- "ldxr %w[ret], %[ptr]\n"
- "stxr %w[loop], %w[val], %[ptr]\n"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u32 *)ptr)
- : [val] "r" (val));
- } while (loop);
+ asm ("//__percpu_xchg_4\n"
+ "1: ldxr %w[ret], %[ptr]\n"
+ " stxr %w[loop], %w[val], %[ptr]\n"
+ " cbnz %w[loop], 1b"
+ : [loop] "=&r"(loop), [ret] "=&r"(ret),
+ [ptr] "+Q"(*(u32 *)ptr)
+ : [val] "r" (val));
break;
case 8:
- do {
- asm ("//__percpu_xchg_8\n"
- "ldxr %[ret], %[ptr]\n"
- "stxr %w[loop], %[val], %[ptr]\n"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u64 *)ptr)
- : [val] "r" (val));
- } while (loop);
+ asm ("//__percpu_xchg_8\n"
+ "1: ldxr %[ret], %[ptr]\n"
+ " stxr %w[loop], %[val], %[ptr]\n"
+ " cbnz %w[loop], 1b"
+ : [loop] "=&r"(loop), [ret] "=&r"(ret),
+ [ptr] "+Q"(*(u64 *)ptr)
+ : [val] "r" (val));
break;
default:
BUILD_BUG();
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cee128732435..d155a9bbfab6 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -231,4 +231,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
+/*
+ * Accesses appearing in program order before a spin_lock() operation
+ * can be reordered with accesses inside the critical section, by virtue
+ * of arch_spin_lock being constructed using acquire semantics.
+ *
+ * In cases where this is problematic (e.g. try_to_wake_up), an
+ * smp_mb__before_spinlock() can restore the required ordering.
+ */
+#define smp_mb__before_spinlock() smp_mb()
+
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 6913643bbe54..c136fd53c847 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -75,6 +75,7 @@ struct user_fpsimd_state {
__uint128_t vregs[32];
__u32 fpsr;
__u32 fpcr;
+ __u32 __reserved[2];
};
struct user_hwdebug_state {
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index cc7435c9676e..b346b35f827d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -572,8 +572,9 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
- msr mdcr_el2, x0 // all PMU counters from EL1
4:
+ csel x0, xzr, x0, lt // all PMU counters from EL1
+ msr mdcr_el2, x0 // (if they exist)
/* Stage-2 translation */
msr vttbr_el2, xzr
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 7038b9a3b42c..f7738bbc8c3f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -450,6 +450,8 @@ static int hw_break_set(struct task_struct *target,
/* (address, ctrl) registers */
limit = regset->n * regset->size;
while (count && offset < limit) {
+ if (count < PTRACE_HBP_ADDR_SZ)
+ return -EINVAL;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr,
offset, offset + PTRACE_HBP_ADDR_SZ);
if (ret)
@@ -459,6 +461,8 @@ static int hw_break_set(struct task_struct *target,
return ret;
offset += PTRACE_HBP_ADDR_SZ;
+ if (!count)
+ break;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl,
offset, offset + PTRACE_HBP_CTRL_SZ);
if (ret)
@@ -495,7 +499,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- struct user_pt_regs newregs;
+ struct user_pt_regs newregs = task_pt_regs(target)->user_regs;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1);
if (ret)
@@ -525,7 +529,8 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- struct user_fpsimd_state newstate;
+ struct user_fpsimd_state newstate =
+ target->thread.fpsimd_state.user_fpsimd;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
if (ret)
@@ -549,7 +554,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- unsigned long tls;
+ unsigned long tls = target->thread.tp_value;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
if (ret)