Merge patch series "Fix io accessors for KVM"

Ilias Apalodimas <ilias.apalodimas@linaro.org> says: Instructions that lead ito an exception in the hypervisor can't modify two CPU registers at once for the ARM ISA. These instructions cannot be emulated by KVM as they do not produce syndrome information data that KVM can use to infer the destination register, the faulting address, whether it was a load or store, or if it's a 32 or 64 bit general-purpose register. As a result an external abort is injected from QEMU, via ext_dabt_pending. Link: https://lore.kernel.org/r/20250618065828.1312146-1-ilias.apalodimas@linaro.org
author: Tom Rini <trini@konsulko.com> 2025-06-27 11:50:30 -0600
committer: Tom Rini <trini@konsulko.com> 2025-06-27 11:50:30 -0600
commit: 02bb597ed6b23270e5e96b18d30ca03e403f1161 (patch)
tree: 6f5c7639084d24bb95f5139d0b3a4bf9ba8f7ab7
parent: e5d4733c2e1937eb7b622771bfea9f1fda0425c6 (diff)
parent: fcc60481ae755cf289bb96ccef81294b0b443654 (diff)
6 files changed, 142 insertions, 54 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ec3e626a81f..1d096ad6554 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -108,6 +108,18 @@ config LNX_KRNL_IMG_TEXT_OFFSET_BASE
 	  The value subtracted from CONFIG_TEXT_BASE to calculate the
 	  TEXT_OFFSET value written to the Linux kernel image header.
 
+config KVM_VIRT_INS
+	bool "Emit virtualizable instructions"
+	help
+	  Instructions in the ARM ISA that have multiple output registers,
+	  can't be used if the instruction leads to an exception to the hypervisor.
+	  These instructions cannot be emulated by KVM because they do not produce
+	  syndrome information data that KVM can use to infer the destination
+	  register, the faulting address, whether it was a load or store,
+	  if it's a 32 or 64 bit general-purpose register amongst other things.
+	  Use this to produce virtualizable instructions if you plan to run U-Boot
+	  with KVM.
+
 config NVIC
 	bool
 
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 89b1015bc4d..85ec0e6937e 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -20,23 +20,108 @@ static inline void sync(void)
 {
 }
 
-/* Generic virtual read/write. */
-#define __arch_getb(a)			(*(volatile unsigned char *)(a))
-#define __arch_getw(a)			(*(volatile unsigned short *)(a))
-#define __arch_getl(a)			(*(volatile unsigned int *)(a))
-#define __arch_getq(a)			(*(volatile unsigned long long *)(a))
+#ifdef CONFIG_ARM64
+#define __W	"w"
+#else
+#define __W
+#endif
+
+#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD)
+#define __R "l"
+#define __RM "=l"
+#else
+#define __R "r"
+#define __RM "=r"
+#endif
 
-#define __arch_putb(v,a)		(*(volatile unsigned char *)(a) = (v))
-#define __arch_putw(v,a)		(*(volatile unsigned short *)(a) = (v))
-#define __arch_putl(v,a)		(*(volatile unsigned int *)(a) = (v))
-#define __arch_putq(v,a)		(*(volatile unsigned long long *)(a) = (v))
+#ifdef CONFIG_KVM_VIRT_INS
+/*
+ * The __raw_writeX/__raw_readX below should be converted to static inline
+ * functions. However doing so produces a lot of compilation warnings when
+ * called with a raw address. Convert these once the callers have been fixed.
+ */
+#define __raw_writeb(val, addr)			\
+	do {					\
+		asm volatile("strb %" __W "0, [%1]"	\
+		:				\
+		: __R ((u8)(val)), __R (addr));	\
+	} while (0)
+
+#define __raw_readb(addr)				\
+	({						\
+		u32 __val;				\
+		asm volatile("ldrb %" __W "0, [%1]"		\
+		: __RM (__val)				\
+		: __R (addr));				\
+		__val;					\
+	})
+
+#define __raw_writew(val, addr)			\
+	do {					\
+		asm volatile("strh %" __W "0, [%1]"	\
+		:					\
+		: __R ((u16)(val)), __R (addr));	\
+	} while (0)
+
+#define __raw_readw(addr)				\
+	({						\
+		u32 __val;				\
+		asm volatile("ldrh %" __W "0, [%1]"		\
+		: __RM (__val)				\
+		: __R (addr));				\
+	__val;						\
+    })
+
+#define __raw_writel(val, addr)				\
+	do {						\
+		asm volatile("str %" __W "0, [%1]"		\
+		:					\
+		: __R ((u32)(val)), __R (addr));	\
+	} while (0)
+
+#define __raw_readl(addr)				\
+	({						\
+		u32 __val;				\
+		asm volatile("ldr %" __W "0, [%1]"		\
+		: __RM (__val)				\
+		: __R (addr));				\
+		__val;					\
+	})
+
+#define __raw_writeq(val, addr)				\
+	do {						\
+		asm volatile("str %0, [%1]"		\
+		:					\
+		: __R ((u64)(val)), __R (addr));	\
+	} while (0)
+
+#define __raw_readq(addr)				\
+	({						\
+		u64 __val;				\
+		asm volatile("ldr %0, [%1]"		\
+		: __RM (__val)				\
+		: __R (addr));				\
+		__val;					\
+	    })
+#else
+/* Generic virtual read/write. */
+#define __raw_readb(a)			(*(volatile unsigned char *)(a))
+#define __raw_readw(a)			(*(volatile unsigned short *)(a))
+#define __raw_readl(a)			(*(volatile unsigned int *)(a))
+#define __raw_readq(a)			(*(volatile unsigned long long *)(a))
+
+#define __raw_writeb(v, a)		(*(volatile unsigned char *)(a) = (v))
+#define __raw_writew(v, a)		(*(volatile unsigned short *)(a) = (v))
+#define __raw_writel(v, a)		(*(volatile unsigned int *)(a) = (v))
+#define __raw_writeq(v, a)		(*(volatile unsigned long long *)(a) = (v))
+#endif
 
 static inline void __raw_writesb(unsigned long addr, const void *data,
 				 int bytelen)
 {
 	uint8_t *buf = (uint8_t *)data;
 	while(bytelen--)
-		__arch_putb(*buf++, addr);
+		__raw_writeb(*buf++, addr);
 }
 
 static inline void __raw_writesw(unsigned long addr, const void *data,
@@ -44,7 +129,7 @@ static inline void __raw_writesw(unsigned long addr, const void *data,
 {
 	uint16_t *buf = (uint16_t *)data;
 	while(wordlen--)
-		__arch_putw(*buf++, addr);
+		__raw_writew(*buf++, addr);
 }
 
 static inline void __raw_writesl(unsigned long addr, const void *data,
@@ -52,40 +137,30 @@ static inline void __raw_writesl(unsigned long addr, const void *data,
 {
 	uint32_t *buf = (uint32_t *)data;
 	while(longlen--)
-		__arch_putl(*buf++, addr);
+		__raw_writel(*buf++, addr);
 }
 
 static inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
 {
 	uint8_t *buf = (uint8_t *)data;
 	while(bytelen--)
-		*buf++ = __arch_getb(addr);
+		*buf++ = __raw_readb(addr);
 }
 
 static inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
 {
 	uint16_t *buf = (uint16_t *)data;
 	while(wordlen--)
-		*buf++ = __arch_getw(addr);
+		*buf++ = __raw_readw(addr);
 }
 
 static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
 {
 	uint32_t *buf = (uint32_t *)data;
 	while(longlen--)
-		*buf++ = __arch_getl(addr);
+		*buf++ = __raw_readl(addr);
 }
 
-#define __raw_writeb(v,a)	__arch_putb(v,a)
-#define __raw_writew(v,a)	__arch_putw(v,a)
-#define __raw_writel(v,a)	__arch_putl(v,a)
-#define __raw_writeq(v,a)	__arch_putq(v,a)
-
-#define __raw_readb(a)		__arch_getb(a)
-#define __raw_readw(a)		__arch_getw(a)
-#define __raw_readl(a)		__arch_getl(a)
-#define __raw_readq(a)		__arch_getq(a)
-
 /*
  * TODO: The kernel offers some more advanced versions of barriers, it might
  * have some advantages to use them instead of the simple one here.
@@ -98,15 +173,15 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
 
 #define smp_processor_id()	0
 
-#define writeb(v,c)	({ u8  __v = v; __iowmb(); __arch_putb(__v,c); __v; })
-#define writew(v,c)	({ u16 __v = v; __iowmb(); __arch_putw(__v,c); __v; })
-#define writel(v,c)	({ u32 __v = v; __iowmb(); __arch_putl(__v,c); __v; })
-#define writeq(v,c)	({ u64 __v = v; __iowmb(); __arch_putq(__v,c); __v; })
+#define writeb(v, c)	({ u8  __v = v; __iowmb(); writeb_relaxed(__v, c); __v; })
+#define writew(v, c)	({ u16 __v = v; __iowmb(); writew_relaxed(__v, c); __v; })
+#define writel(v, c)	({ u32 __v = v; __iowmb(); writel_relaxed(__v, c); __v; })
+#define writeq(v, c)	({ u64 __v = v; __iowmb(); writeq_relaxed(__v, c); __v; })
 
-#define readb(c)	({ u8  __v = __arch_getb(c); __iormb(); __v; })
-#define readw(c)	({ u16 __v = __arch_getw(c); __iormb(); __v; })
-#define readl(c)	({ u32 __v = __arch_getl(c); __iormb(); __v; })
-#define readq(c)	({ u64 __v = __arch_getq(c); __iormb(); __v; })
+#define readb(c)	({ u8  __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c)	({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c)	({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+#define readq(c)	({ u64 __v = readq_relaxed(c); __iormb(); __v; })
 
 /*
  * Relaxed I/O memory access primitives. These follow the Device memory
@@ -121,13 +196,10 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
 #define readq_relaxed(c)	({ u64 __r = le64_to_cpu((__force __le64) \
 						__raw_readq(c)); __r; })
 
-#define writeb_relaxed(v, c)	((void)__raw_writeb((v), (c)))
-#define writew_relaxed(v, c)	((void)__raw_writew((__force u16) \
-						    cpu_to_le16(v), (c)))
-#define writel_relaxed(v, c)	((void)__raw_writel((__force u32) \
-						    cpu_to_le32(v), (c)))
-#define writeq_relaxed(v, c)	((void)__raw_writeq((__force u64) \
-						    cpu_to_le64(v), (c)))
+#define writeb_relaxed(v, c)	__raw_writeb((v), (c))
+#define writew_relaxed(v, c)	__raw_writew((__force u16)cpu_to_le16(v), (c))
+#define writel_relaxed(v, c)	__raw_writel((__force u32)cpu_to_le32(v), (c))
+#define writeq_relaxed(v, c)	__raw_writeq((__force u64)cpu_to_le64(v), (c))
 
 /*
  * The compiler seems to be incapable of optimising constants
diff --git a/configs/qemu_arm64_defconfig b/configs/qemu_arm64_defconfig
index 72bd255eafa..39afb837e41 100644
--- a/configs/qemu_arm64_defconfig
+++ b/configs/qemu_arm64_defconfig
@@ -1,4 +1,5 @@
 CONFIG_ARM=y
+CONFIG_KVM_VIRT_INS=y
 CONFIG_ARCH_QEMU=y
 CONFIG_SYS_MALLOC_LEN=0x1000000
 CONFIG_BLOBLIST_SIZE_RELOC=0x2000
diff --git a/configs/qemu_arm_defconfig b/configs/qemu_arm_defconfig
index f13001390d4..92ba48f6af9 100644
--- a/configs/qemu_arm_defconfig
+++ b/configs/qemu_arm_defconfig
@@ -1,4 +1,5 @@
 CONFIG_ARM=y
+CONFIG_KVM_VIRT_INS=y
 CONFIG_ARM_SMCCC=y
 CONFIG_ARCH_QEMU=y
 CONFIG_SYS_MALLOC_LEN=0x1000000
diff --git a/drivers/spi/fsl_dspi.c b/drivers/spi/fsl_dspi.c
index f2393c041f4..545561ad116 100644
--- a/drivers/spi/fsl_dspi.c
+++ b/drivers/spi/fsl_dspi.c
@@ -123,8 +123,10 @@ static uint dspi_read32(uint flags, uint *addr)
 
 static void dspi_write32(uint flags, uint *addr, uint val)
 {
-	flags & DSPI_FLAG_REGMAP_ENDIAN_BIG ?
-		out_be32(addr, val) : out_le32(addr, val);
+	if (flags & DSPI_FLAG_REGMAP_ENDIAN_BIG)
+		out_be32(addr, val);
+	else
+		out_le32(addr, val);
 }
 
 static void dspi_halt(struct fsl_dspi_priv *priv, u8 halt)
diff --git a/include/fsl_ifc.h b/include/fsl_ifc.h
index 3ac22687930..1c363115beb 100644
--- a/include/fsl_ifc.h
+++ b/include/fsl_ifc.h
@@ -803,29 +803,29 @@ void init_final_memctl_regs(void);
 	((struct fsl_ifc_fcm *)CFG_SYS_IFC_ADDR)
 
 #define get_ifc_cspr_ext(i)	\
-		(ifc_in32(&(IFC_FCM_BASE_ADDR)->cspr_cs[i].cspr_ext))
+		ifc_in32(&(IFC_FCM_BASE_ADDR)->cspr_cs[i].cspr_ext)
 #define get_ifc_cspr(i)		\
-		(ifc_in32(&(IFC_FCM_BASE_ADDR)->cspr_cs[i].cspr))
+		ifc_in32(&(IFC_FCM_BASE_ADDR)->cspr_cs[i].cspr)
 #define get_ifc_csor_ext(i)	\
-		(ifc_in32(&(IFC_FCM_BASE_ADDR)->csor_cs[i].csor_ext))
+		ifc_in32(&(IFC_FCM_BASE_ADDR)->csor_cs[i].csor_ext)
 #define get_ifc_csor(i)		\
-		(ifc_in32(&(IFC_FCM_BASE_ADDR)->csor_cs[i].csor))
+		ifc_in32(&(IFC_FCM_BASE_ADDR)->csor_cs[i].csor)
 #define get_ifc_amask(i)	\
-		(ifc_in32(&(IFC_FCM_BASE_ADDR)->amask_cs[i].amask))
+		ifc_in32(&(IFC_FCM_BASE_ADDR)->amask_cs[i].amask)
 #define get_ifc_ftim(i, j)	\
-		(ifc_in32(&(IFC_FCM_BASE_ADDR)->ftim_cs[i].ftim[j]))
+		ifc_in32(&(IFC_FCM_BASE_ADDR)->ftim_cs[i].ftim[j])
 #define set_ifc_cspr_ext(i, v)	\
-		(ifc_out32(&(IFC_FCM_BASE_ADDR)->cspr_cs[i].cspr_ext, v))
+		ifc_out32(&(IFC_FCM_BASE_ADDR)->cspr_cs[i].cspr_ext, v)
 #define set_ifc_cspr(i, v)	\
-		(ifc_out32(&(IFC_FCM_BASE_ADDR)->cspr_cs[i].cspr, v))
+		ifc_out32(&(IFC_FCM_BASE_ADDR)->cspr_cs[i].cspr, v)
 #define set_ifc_csor_ext(i, v)	\
-		(ifc_out32(&(IFC_FCM_BASE_ADDR)->csor_cs[i].csor_ext, v))
+		ifc_out32(&(IFC_FCM_BASE_ADDR)->csor_cs[i].csor_ext, v)
 #define set_ifc_csor(i, v)	\
-		(ifc_out32(&(IFC_FCM_BASE_ADDR)->csor_cs[i].csor, v))
+		ifc_out32(&(IFC_FCM_BASE_ADDR)->csor_cs[i].csor, v)
 #define set_ifc_amask(i, v)	\
-		(ifc_out32(&(IFC_FCM_BASE_ADDR)->amask_cs[i].amask, v))
+		ifc_out32(&(IFC_FCM_BASE_ADDR)->amask_cs[i].amask, v)
 #define set_ifc_ftim(i, j, v)	\
-		(ifc_out32(&(IFC_FCM_BASE_ADDR)->ftim_cs[i].ftim[j], v))
+		ifc_out32(&(IFC_FCM_BASE_ADDR)->ftim_cs[i].ftim[j], v)
 
 enum ifc_chip_sel {
 	IFC_CS0,
author	Tom Rini <trini@konsulko.com>	2025-06-27 11:50:30 -0600
committer	Tom Rini <trini@konsulko.com>	2025-06-27 11:50:30 -0600
commit	02bb597ed6b23270e5e96b18d30ca03e403f1161 (patch)
tree	6f5c7639084d24bb95f5139d0b3a4bf9ba8f7ab7
parent	e5d4733c2e1937eb7b622771bfea9f1fda0425c6 (diff)
parent	fcc60481ae755cf289bb96ccef81294b0b443654 (diff)