From fca3aa16642200069eafa4ece17a60751bb891cd Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 17 Apr 2018 00:41:30 +0900
Subject: MIPS: dts: Avoid unneeded built-in.a in DTS dirs

arch/mips/boot/dts/Makefile collects objects from sub-directories into
built-in.a only when CONFIG_BUILTIN_DTB is enabled. Reflect it also to
the sub-directory Makefiles. This suppresses unneeded built-in.a
creation in arch/mips/boot/dts/*/ directories.

While I am here, I replaced $(patsubst %.dtb, %.dtb.o, $(dtb-y)) with
$(addsuffix .o, $(dtb-y)) to simplify the code a little bit.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Cercueil <paul@crapouillou.net>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: devicetree@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/19099/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/boot/dts/brcm/Makefile          | 2 +-
 arch/mips/boot/dts/cavium-octeon/Makefile | 2 +-
 arch/mips/boot/dts/ingenic/Makefile       | 2 +-
 arch/mips/boot/dts/lantiq/Makefile        | 2 +-
 arch/mips/boot/dts/mscc/Makefile          | 2 +-
 arch/mips/boot/dts/mti/Makefile           | 2 +-
 arch/mips/boot/dts/netlogic/Makefile      | 2 +-
 arch/mips/boot/dts/pic32/Makefile         | 2 +-
 arch/mips/boot/dts/ralink/Makefile        | 2 +-
 arch/mips/boot/dts/xilfpga/Makefile       | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/brcm/Makefile b/arch/mips/boot/dts/brcm/Makefile
index d8787c9a499e..d85f446cc0ce 100644
--- a/arch/mips/boot/dts/brcm/Makefile
+++ b/arch/mips/boot/dts/brcm/Makefile
@@ -34,4 +34,4 @@ dtb-$(CONFIG_DT_NONE) += \
 	bcm97425svmb.dtb \
 	bcm97435svmb.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/cavium-octeon/Makefile b/arch/mips/boot/dts/cavium-octeon/Makefile
index 24a8efcd7b03..17aef35f311b 100644
--- a/arch/mips/boot/dts/cavium-octeon/Makefile
+++ b/arch/mips/boot/dts/cavium-octeon/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_CAVIUM_OCTEON_SOC)	+= octeon_3xxx.dtb octeon_68xx.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile
index 5b1361a89e02..9cc48441eb71 100644
--- a/arch/mips/boot/dts/ingenic/Makefile
+++ b/arch/mips/boot/dts/ingenic/Makefile
@@ -3,4 +3,4 @@ dtb-$(CONFIG_JZ4740_QI_LB60)	+= qi_lb60.dtb
 dtb-$(CONFIG_JZ4770_GCW0)	+= gcw0.dtb
 dtb-$(CONFIG_JZ4780_CI20)	+= ci20.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/lantiq/Makefile b/arch/mips/boot/dts/lantiq/Makefile
index 51ab9c1dff42..f5dfc06242b9 100644
--- a/arch/mips/boot/dts/lantiq/Makefile
+++ b/arch/mips/boot/dts/lantiq/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_DT_EASY50712)	+= easy50712.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile
index c51164537c02..3c6aed9f5439 100644
--- a/arch/mips/boot/dts/mscc/Makefile
+++ b/arch/mips/boot/dts/mscc/Makefile
@@ -1,3 +1,3 @@
 dtb-$(CONFIG_LEGACY_BOARD_OCELOT)	+= ocelot_pcb123.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/mti/Makefile b/arch/mips/boot/dts/mti/Makefile
index 3508720cb6d9..b5f7426998b1 100644
--- a/arch/mips/boot/dts/mti/Makefile
+++ b/arch/mips/boot/dts/mti/Makefile
@@ -2,4 +2,4 @@
 dtb-$(CONFIG_MIPS_MALTA)	+= malta.dtb
 dtb-$(CONFIG_LEGACY_BOARD_SEAD3)	+= sead3.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/netlogic/Makefile b/arch/mips/boot/dts/netlogic/Makefile
index d630b27950f0..45af4224494f 100644
--- a/arch/mips/boot/dts/netlogic/Makefile
+++ b/arch/mips/boot/dts/netlogic/Makefile
@@ -5,4 +5,4 @@ dtb-$(CONFIG_DT_XLP_FVP)	+= xlp_fvp.dtb
 dtb-$(CONFIG_DT_XLP_GVP)	+= xlp_gvp.dtb
 dtb-$(CONFIG_DT_XLP_RVP)	+= xlp_rvp.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/pic32/Makefile b/arch/mips/boot/dts/pic32/Makefile
index ba9bcef8fde9..fb57f36324db 100644
--- a/arch/mips/boot/dts/pic32/Makefile
+++ b/arch/mips/boot/dts/pic32/Makefile
@@ -4,4 +4,4 @@ dtb-$(CONFIG_DTB_PIC32_MZDA_SK)		+= pic32mzda_sk.dtb
 dtb-$(CONFIG_DTB_PIC32_NONE)		+= \
 					pic32mzda_sk.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/ralink/Makefile b/arch/mips/boot/dts/ralink/Makefile
index 94bee5b38b53..6c26dfa0a903 100644
--- a/arch/mips/boot/dts/ralink/Makefile
+++ b/arch/mips/boot/dts/ralink/Makefile
@@ -6,4 +6,4 @@ dtb-$(CONFIG_DTB_MT7620A_EVAL)	+= mt7620a_eval.dtb
 dtb-$(CONFIG_DTB_OMEGA2P)	+= omega2p.dtb
 dtb-$(CONFIG_DTB_VOCORE2)	+= vocore2.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/xilfpga/Makefile b/arch/mips/boot/dts/xilfpga/Makefile
index 9987e0e378c5..285973fc6169 100644
--- a/arch/mips/boot/dts/xilfpga/Makefile
+++ b/arch/mips/boot/dts/xilfpga/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)	+= nexys4ddr.dtb
 
-obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .o, $(dtb-y))
-- 
cgit v1.2.3


From 425f1e6272567c4d91221957d8b65fc5f6aae501 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Sun, 8 Apr 2018 22:57:32 +0200
Subject: MIPS: BCM47XX: Add support for Netgear WNR1000 V3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds support for detecting this model board and registers some LEDs
and buttons.

There are two uncommon things regarding this device:
1) It can use two different "board_id" ID values.
   Unit I have uses "U12H139T00_NETGEAR" value. This magic is also used
   in firmware file header. There are two reports (one from an OpenWrt
   user) of a different "U12H139T50_NETGEAR" magic though.
2) Power LEDs share GPIOs with buttons.
   Amber one seems to share GPIO 2 with WPS button and green one seems
   to share GPIO 3 with reset button. It remains unknown how to support
   them and handle buttons at the same time. For that reason they aren't
   added to the list of supported LEDs.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19004/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/bcm47xx/board.c                          | 2 ++
 arch/mips/bcm47xx/buttons.c                        | 9 +++++++++
 arch/mips/bcm47xx/leds.c                           | 9 +++++++++
 arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h | 1 +
 4 files changed, 21 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/bcm47xx/board.c b/arch/mips/bcm47xx/board.c
index edfaef0d73a4..a80910d2738c 100644
--- a/arch/mips/bcm47xx/board.c
+++ b/arch/mips/bcm47xx/board.c
@@ -172,6 +172,8 @@ struct bcm47xx_board_type_list1 bcm47xx_board_list_board_id[] __initconst = {
 	{{BCM47XX_BOARD_NETGEAR_WNDR4000, "Netgear WNDR4000"}, "U12H181T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNDR4500V1, "Netgear WNDR4500 V1"}, "U12H189T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNDR4500V2, "Netgear WNDR4500 V2"}, "U12H224T00_NETGEAR"},
+	{{BCM47XX_BOARD_NETGEAR_WNR1000_V3, "Netgear WNR1000 V3"}, "U12H139T00_NETGEAR"},
+	{{BCM47XX_BOARD_NETGEAR_WNR1000_V3, "Netgear WNR1000 V3"}, "U12H139T50_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNR2000, "Netgear WNR2000"}, "U12H114T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNR3500L, "Netgear WNR3500L"}, "U12H136T99_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNR3500U, "Netgear WNR3500U"}, "U12H136T00_NETGEAR"},
diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c
index 88d400d256c4..977990a609ba 100644
--- a/arch/mips/bcm47xx/buttons.c
+++ b/arch/mips/bcm47xx/buttons.c
@@ -411,6 +411,12 @@ bcm47xx_buttons_netgear_wndr4500v1[] __initconst = {
 	BCM47XX_GPIO_KEY(6, KEY_RESTART),
 };
 
+static const struct gpio_keys_button
+bcm47xx_buttons_netgear_wnr1000_v3[] __initconst = {
+	BCM47XX_GPIO_KEY(2, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(3, KEY_RESTART),
+};
+
 static const struct gpio_keys_button
 bcm47xx_buttons_netgear_wnr3500lv1[] __initconst = {
 	BCM47XX_GPIO_KEY(4, KEY_RESTART),
@@ -670,6 +676,9 @@ int __init bcm47xx_buttons_register(void)
 	case BCM47XX_BOARD_NETGEAR_WNDR4500V1:
 		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wndr4500v1);
 		break;
+	case BCM47XX_BOARD_NETGEAR_WNR1000_V3:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wnr1000_v3);
+		break;
 	case BCM47XX_BOARD_NETGEAR_WNR3500L:
 		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wnr3500lv1);
 		break;
diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c
index 34a7b3fbdfd9..3fe015602945 100644
--- a/arch/mips/bcm47xx/leds.c
+++ b/arch/mips/bcm47xx/leds.c
@@ -497,6 +497,12 @@ bcm47xx_leds_netgear_wndr4500v1[] __initconst = {
 	BCM47XX_GPIO_LED(14, "green", "usb2", 1, LEDS_GPIO_DEFSTATE_OFF),
 };
 
+static const struct gpio_led
+bcm47xx_leds_netgear_wnr1000_v3[] __initconst = {
+	BCM47XX_GPIO_LED(0, "blue", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "green", "wps", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
 static const struct gpio_led
 bcm47xx_leds_netgear_wnr3500lv1[] __initconst = {
 	BCM47XX_GPIO_LED(0, "blue", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
@@ -758,6 +764,9 @@ void __init bcm47xx_leds_register(void)
 	case BCM47XX_BOARD_NETGEAR_WNDR4500V1:
 		bcm47xx_set_pdata(bcm47xx_leds_netgear_wndr4500v1);
 		break;
+	case BCM47XX_BOARD_NETGEAR_WNR1000_V3:
+		bcm47xx_set_pdata(bcm47xx_leds_netgear_wnr1000_v3);
+		break;
 	case BCM47XX_BOARD_NETGEAR_WNR3500L:
 		bcm47xx_set_pdata(bcm47xx_leds_netgear_wnr3500lv1);
 		break;
diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
index cbf9da7f2f94..0ef8893e07f8 100644
--- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
+++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
@@ -110,6 +110,7 @@ enum bcm47xx_board {
 	BCM47XX_BOARD_NETGEAR_WNDR4000,
 	BCM47XX_BOARD_NETGEAR_WNDR4500V1,
 	BCM47XX_BOARD_NETGEAR_WNDR4500V2,
+	BCM47XX_BOARD_NETGEAR_WNR1000_V3,
 	BCM47XX_BOARD_NETGEAR_WNR2000,
 	BCM47XX_BOARD_NETGEAR_WNR3500L,
 	BCM47XX_BOARD_NETGEAR_WNR3500U,
-- 
cgit v1.2.3


From e3d5980568fdf83c15a5a3c8ddca1590551ab7a2 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Wed, 11 Apr 2018 08:50:17 +0100
Subject: lib: Rename compiler intrinsic selects to GENERIC_LIB_*

When these are included into arch Kconfig files, maintaining
alphabetical ordering of the selects means these get split up. To allow
for keeping things tidier and alphabetical, rename the selects to
GENERIC_LIB_*

Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Antony Pavlov <antonynpavlov@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-riscv@lists.infradead.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19049/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/riscv/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 23d8acca5c90..5287c1441d66 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -105,9 +105,9 @@ config ARCH_RV32I
 	bool "RV32I"
 	select CPU_SUPPORTS_32BIT_KERNEL
 	select 32BIT
-	select GENERIC_ASHLDI3
-	select GENERIC_ASHRDI3
-	select GENERIC_LSHRDI3
+	select GENERIC_LIB_ASHLDI3
+	select GENERIC_LIB_ASHRDI3
+	select GENERIC_LIB_LSHRDI3
 
 config ARCH_RV64I
 	bool "RV64I"
-- 
cgit v1.2.3


From 9ed491b88bc6f318c1a79d4f298ac0d78a2de587 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Wed, 11 Apr 2018 08:50:18 +0100
Subject: MIPS: vmlinuz: Use generic ashldi3

In preparation for removing some of the MIPS compiler intrinsics from
arch/mips/lib, first update the build of vmlinuz to use the generic
ashldi3 from lib.

Both ashldi3 and bswapsi objects need to be built with different CFLAGS
for inclusion to vmlinuz rather than simply including the object built
for the main kernel image. The objects cannot be built directly from
source, since CONFIG_MODVERSIONS changes cmd_cc_o_c to prevent this.

Split the rule to ship ashldi3 and bswapsi from the relevant source
locations.

These files make no reference to other files in their directory, so the
additional CFLAGS are apparently unnecessary - remove them as well.

Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Antony Pavlov <antonynpavlov@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19050/
[jhogan@kernel.org: Add if_changed and FORCE to fix build failure when
 arch/mips/boot/compressed/ashldi3.c is already generated but there is
 no .ashldi3.c.cmd file yet]
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/boot/compressed/Makefile | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index adce180f3ee4..abe77add8789 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -46,10 +46,13 @@ $(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c
 
 vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o $(obj)/bswapsi.o
 
-extra-y += ashldi3.c bswapsi.c
-$(obj)/ashldi3.o $(obj)/bswapsi.o: KBUILD_CFLAGS += -I$(srctree)/arch/mips/lib
-$(obj)/ashldi3.c $(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c
-	$(call cmd,shipped)
+extra-y += ashldi3.c
+$(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE
+	$(call if_changed,shipped)
+
+extra-y += bswapsi.c
+$(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE
+	$(call if_changed,shipped)
 
 targets := $(notdir $(vmlinuzobjs-y))
 
-- 
cgit v1.2.3


From 740129b36faf049e6845819144542a0455e1e285 Mon Sep 17 00:00:00 2001
From: Antony Pavlov <antonynpavlov@gmail.com>
Date: Wed, 11 Apr 2018 08:50:19 +0100
Subject: MIPS: Use generic GCC library routines from lib/

The commit b35cd9884fa5 ("lib: Add shared copies of some GCC library
routines") makes it possible to share generic GCC library routines by
several architectures.

This commit removes several generic GCC library routines from
arch/mips/lib/ in favour of similar routines from lib/.

Signed-off-by: Antony Pavlov <antonynpavlov@gmail.com>
[Matt Redfearn] Use GENERIC_LIB_* named Kconfig entries
Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19051/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/Kconfig       |  5 +++++
 arch/mips/lib/Makefile  |  3 +--
 arch/mips/lib/ashldi3.c | 30 ------------------------------
 arch/mips/lib/ashrdi3.c | 32 --------------------------------
 arch/mips/lib/cmpdi2.c  | 28 ----------------------------
 arch/mips/lib/lshrdi3.c | 30 ------------------------------
 arch/mips/lib/ucmpdi2.c | 22 ----------------------
 7 files changed, 6 insertions(+), 144 deletions(-)
 delete mode 100644 arch/mips/lib/ashldi3.c
 delete mode 100644 arch/mips/lib/ashrdi3.c
 delete mode 100644 arch/mips/lib/cmpdi2.c
 delete mode 100644 arch/mips/lib/lshrdi3.c
 delete mode 100644 arch/mips/lib/ucmpdi2.c

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 225c95da23ce..79a864cfc595 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -22,6 +22,11 @@ config MIPS
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_LIB_ASHLDI3
+	select GENERIC_LIB_ASHRDI3
+	select GENERIC_LIB_CMPDI2
+	select GENERIC_LIB_LSHRDI3
+	select GENERIC_LIB_UCMPDI2
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
 	select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index e84e12655fa8..6537e022ef62 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -16,5 +16,4 @@ obj-$(CONFIG_CPU_R3000)		+= r3k_dump_tlb.o
 obj-$(CONFIG_CPU_TX39XX)	+= r3k_dump_tlb.o
 
 # libgcc-style stuff needed in the kernel
-obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \
-	 ucmpdi2.o
+obj-y += bswapsi.o bswapdi.o multi3.o
diff --git a/arch/mips/lib/ashldi3.c b/arch/mips/lib/ashldi3.c
deleted file mode 100644
index 24cd6903e797..000000000000
--- a/arch/mips/lib/ashldi3.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/export.h>
-
-#include "libgcc.h"
-
-long long notrace __ashldi3(long long u, word_type b)
-{
-	DWunion uu, w;
-	word_type bm;
-
-	if (b == 0)
-		return u;
-
-	uu.ll = u;
-	bm = 32 - b;
-
-	if (bm <= 0) {
-		w.s.low = 0;
-		w.s.high = (unsigned int) uu.s.low << -bm;
-	} else {
-		const unsigned int carries = (unsigned int) uu.s.low >> bm;
-
-		w.s.low = (unsigned int) uu.s.low << b;
-		w.s.high = ((unsigned int) uu.s.high << b) | carries;
-	}
-
-	return w.ll;
-}
-
-EXPORT_SYMBOL(__ashldi3);
diff --git a/arch/mips/lib/ashrdi3.c b/arch/mips/lib/ashrdi3.c
deleted file mode 100644
index 23f5295af51e..000000000000
--- a/arch/mips/lib/ashrdi3.c
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/export.h>
-
-#include "libgcc.h"
-
-long long notrace __ashrdi3(long long u, word_type b)
-{
-	DWunion uu, w;
-	word_type bm;
-
-	if (b == 0)
-		return u;
-
-	uu.ll = u;
-	bm = 32 - b;
-
-	if (bm <= 0) {
-		/* w.s.high = 1..1 or 0..0 */
-		w.s.high =
-		    uu.s.high >> 31;
-		w.s.low = uu.s.high >> -bm;
-	} else {
-		const unsigned int carries = (unsigned int) uu.s.high << bm;
-
-		w.s.high = uu.s.high >> b;
-		w.s.low = ((unsigned int) uu.s.low >> b) | carries;
-	}
-
-	return w.ll;
-}
-
-EXPORT_SYMBOL(__ashrdi3);
diff --git a/arch/mips/lib/cmpdi2.c b/arch/mips/lib/cmpdi2.c
deleted file mode 100644
index 93cfc785927d..000000000000
--- a/arch/mips/lib/cmpdi2.c
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/export.h>
-
-#include "libgcc.h"
-
-word_type notrace __cmpdi2(long long a, long long b)
-{
-	const DWunion au = {
-		.ll = a
-	};
-	const DWunion bu = {
-		.ll = b
-	};
-
-	if (au.s.high < bu.s.high)
-		return 0;
-	else if (au.s.high > bu.s.high)
-		return 2;
-
-	if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
-		return 0;
-	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
-		return 2;
-
-	return 1;
-}
-
-EXPORT_SYMBOL(__cmpdi2);
diff --git a/arch/mips/lib/lshrdi3.c b/arch/mips/lib/lshrdi3.c
deleted file mode 100644
index 914b971aca3b..000000000000
--- a/arch/mips/lib/lshrdi3.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/export.h>
-
-#include "libgcc.h"
-
-long long notrace __lshrdi3(long long u, word_type b)
-{
-	DWunion uu, w;
-	word_type bm;
-
-	if (b == 0)
-		return u;
-
-	uu.ll = u;
-	bm = 32 - b;
-
-	if (bm <= 0) {
-		w.s.high = 0;
-		w.s.low = (unsigned int) uu.s.high >> -bm;
-	} else {
-		const unsigned int carries = (unsigned int) uu.s.high << bm;
-
-		w.s.high = (unsigned int) uu.s.high >> b;
-		w.s.low = ((unsigned int) uu.s.low >> b) | carries;
-	}
-
-	return w.ll;
-}
-
-EXPORT_SYMBOL(__lshrdi3);
diff --git a/arch/mips/lib/ucmpdi2.c b/arch/mips/lib/ucmpdi2.c
deleted file mode 100644
index c31c78ca4175..000000000000
--- a/arch/mips/lib/ucmpdi2.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/export.h>
-
-#include "libgcc.h"
-
-word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b)
-{
-	const DWunion au = {.ll = a};
-	const DWunion bu = {.ll = b};
-
-	if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
-		return 0;
-	else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
-		return 2;
-	if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
-		return 0;
-	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
-		return 2;
-	return 1;
-}
-
-EXPORT_SYMBOL(__ucmpdi2);
-- 
cgit v1.2.3


From 23f8adc497b7cf1d21845f2bedca354f955f3a8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Fri, 23 Mar 2018 23:58:07 +0100
Subject: MIPS: BCM47XX: Use __initdata for the bcm47xx_leds_pdata
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This struct variable is used during init only. It gets passed to the
gpio_led_register_device() which creates its own data copy. That allows
using __initdata and saving some minimal amount of memory.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Reviewed-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/18928/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/bcm47xx/leds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c
index 3fe015602945..d85fcdac8bf0 100644
--- a/arch/mips/bcm47xx/leds.c
+++ b/arch/mips/bcm47xx/leds.c
@@ -538,7 +538,7 @@ bcm47xx_leds_simpletech_simpleshare[] __initconst = {
  * Init
  **************************************************/
 
-static struct gpio_led_platform_data bcm47xx_leds_pdata;
+static struct gpio_led_platform_data bcm47xx_leds_pdata __initdata;
 
 #define bcm47xx_set_pdata(dev_leds) do {				\
 	bcm47xx_leds_pdata.leds = dev_leds;				\
-- 
cgit v1.2.3


From a780a3ea628268b2ad0ed43d7f28d90db0ff18be Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Sun, 13 May 2018 02:24:47 -0700
Subject: KVM: X86: Fix reserved bits check for MOV to CR3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MSB of CR3 is a reserved bit if the PCIDE bit is not set in CR4.
It should be checked when PCIDE bit is not set, however commit
'd1cd3ce900441 ("KVM: MMU: check guest CR3 reserved bits based on
its physical address width")' removes the bit 63 checking
unconditionally. This patch fixes it by checking bit 63 of CR3
when PCIDE bit is not set in CR4.

Fixes: d1cd3ce900441 (KVM: MMU: check guest CR3 reserved bits based on its physical address width)
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Liran Alon <liran.alon@oracle.com>
Cc: stable@vger.kernel.org
Reviewed-by: Junaid Shahid <junaids@google.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 4 +++-
 arch/x86/kvm/x86.c     | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b3705ae52824..143b7ae52624 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4189,7 +4189,9 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 				maxphyaddr = eax & 0xff;
 			else
 				maxphyaddr = 36;
-			rsvd = rsvd_bits(maxphyaddr, 62);
+			rsvd = rsvd_bits(maxphyaddr, 63);
+			if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE)
+				rsvd &= ~CR3_PCID_INVD;
 		}
 
 		if (new_val & rsvd)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 37dd9a9d050a..e6b4e5665d74 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -856,7 +856,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	}
 
 	if (is_long_mode(vcpu) &&
-	    (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
+	    (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
 		return 1;
 	else if (is_pae(vcpu) && is_paging(vcpu) &&
 		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
-- 
cgit v1.2.3


From 74b566e6cf21f07df385d593a7fcf8bbfc5d3f0f Mon Sep 17 00:00:00 2001
From: Junaid Shahid <junaids@google.com>
Date: Fri, 4 May 2018 11:37:11 -0700
Subject: kvm: x86: Refactor mmu_free_roots()

Extract the logic to free a root page in a separate function to avoid code
duplication in mmu_free_roots(). Also, change it to an exported function
i.e. kvm_mmu_free_roots().

Signed-off-by: Junaid Shahid <junaids@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c              | 64 +++++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c25775fad4ed..8cb846162694 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1277,6 +1277,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 			   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8494dbae41b9..98717cafdbcb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -222,7 +222,6 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
 static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
 
 static void mmu_spte_set(u64 *sptep, u64 spte);
-static void mmu_free_roots(struct kvm_vcpu *vcpu);
 
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
 {
@@ -3342,51 +3341,48 @@ out_unlock:
 	return RET_PF_RETRY;
 }
 
-
-static void mmu_free_roots(struct kvm_vcpu *vcpu)
+static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
+			       struct list_head *invalid_list)
 {
-	int i;
 	struct kvm_mmu_page *sp;
-	LIST_HEAD(invalid_list);
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+	if (!VALID_PAGE(*root_hpa))
 		return;
 
-	if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
-	    (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
-	     vcpu->arch.mmu.direct_map)) {
-		hpa_t root = vcpu->arch.mmu.root_hpa;
+	sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK);
+	--sp->root_count;
+	if (!sp->root_count && sp->role.invalid)
+		kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
 
-		spin_lock(&vcpu->kvm->mmu_lock);
-		sp = page_header(root);
-		--sp->root_count;
-		if (!sp->root_count && sp->role.invalid) {
-			kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
-			kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
-		}
-		spin_unlock(&vcpu->kvm->mmu_lock);
-		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+	*root_hpa = INVALID_PAGE;
+}
+
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+{
+	int i;
+	LIST_HEAD(invalid_list);
+	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+	if (!VALID_PAGE(mmu->root_hpa))
 		return;
-	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
-	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->arch.mmu.pae_root[i];
 
-		if (root) {
-			root &= PT64_BASE_ADDR_MASK;
-			sp = page_header(root);
-			--sp->root_count;
-			if (!sp->root_count && sp->role.invalid)
-				kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
-							 &invalid_list);
-		}
-		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
+	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+	    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+		mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
+	} else {
+		for (i = 0; i < 4; ++i)
+			if (mmu->pae_root[i] != 0)
+				mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i],
+						   &invalid_list);
+		mmu->root_hpa = INVALID_PAGE;
 	}
+
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
 
 static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
 {
@@ -3950,7 +3946,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 
 void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
 {
-	mmu_free_roots(vcpu);
+	kvm_mmu_free_roots(vcpu);
 }
 
 static unsigned long get_cr3(struct kvm_vcpu *vcpu)
@@ -4663,7 +4659,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-	mmu_free_roots(vcpu);
+	kvm_mmu_free_roots(vcpu);
 	WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
-- 
cgit v1.2.3


From ceef7d10dfb6284d512c499292e6daa35ea83f90 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 16 Apr 2018 12:50:33 +0200
Subject: KVM: x86: VMX: hyper-v: Enlightened MSR-Bitmap support

Enlightened MSR-Bitmap is a natural extension of Enlightened VMCS:
Hyper-V Top Level Functional Specification states:

"The L1 hypervisor may collaborate with the L0 hypervisor to make MSR
accesses more efficient. It can enable enlightened MSR bitmaps by setting
the corresponding field in the enlightened VMCS to 1. When enabled, the L0
hypervisor does not monitor the MSR bitmaps for changes. Instead, the L1
hypervisor must invalidate the corresponding clean field after making
changes to one of the MSR bitmaps."

I reached out to Hyper-V team for additional details and I got the
following information:

"Current Hyper-V implementation works as following:

If the enlightened MSR bitmap is not enabled:
- All MSR accesses of L2 guests cause physical VM-Exits

If the enlightened MSR bitmap is enabled:
- Physical VM-Exits for L2 accesses to certain MSRs (currently FS_BASE,
  GS_BASE and KERNEL_GS_BASE) are avoided, thus making these MSR accesses
  faster."

I tested my series with a tight rdmsrl loop in L2, for KERNEL_GS_BASE the
results are:

Without Enlightened MSR-Bitmap: 1300 cycles/read
With Enlightened MSR-Bitmap: 120 cycles/read

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Tested-by: Lan Tianyu <Tianyu.Lan@microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h |  9 ++++++++-
 arch/x86/kvm/vmx.c                 | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 416cb0e0c496..a8897615354e 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -303,6 +303,9 @@ struct ms_hyperv_tsc_page {
 /* TSC emulation after migration */
 #define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
 
+/* Nested features (CPUID 0x4000000A) EAX */
+#define HV_X64_NESTED_MSR_BITMAP		BIT(19)
+
 struct hv_reenlightenment_control {
 	__u64 vector:8;
 	__u64 reserved1:8;
@@ -668,7 +671,11 @@ struct hv_enlightened_vmcs {
 	u32 hv_clean_fields;
 	u32 hv_padding_32;
 	u32 hv_synthetic_controls;
-	u32 hv_enlightenments_control;
+	struct {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 reserved:30;
+	} hv_enlightenments_control;
 	u32 hv_vp_id;
 
 	u64 hv_vm_id;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3f1696570b41..467cab4e0efd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1089,6 +1089,16 @@ static inline u16 evmcs_read16(unsigned long field)
 	return *(u16 *)((char *)current_evmcs + offset);
 }
 
+static inline void evmcs_touch_msr_bitmap(void)
+{
+	if (unlikely(!current_evmcs))
+		return;
+
+	if (current_evmcs->hv_enlightenments_control.msr_bitmap)
+		current_evmcs->hv_clean_fields &=
+			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+}
+
 static void evmcs_load(u64 phys_addr)
 {
 	struct hv_vp_assist_page *vp_ap =
@@ -1173,6 +1183,7 @@ static inline u32 evmcs_read32(unsigned long field) { return 0; }
 static inline u16 evmcs_read16(unsigned long field) { return 0; }
 static inline void evmcs_load(u64 phys_addr) {}
 static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
+static inline void evmcs_touch_msr_bitmap(void) {}
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 static inline bool is_exception_n(u32 intr_info, u8 vector)
@@ -4219,6 +4230,14 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 		if (!loaded_vmcs->msr_bitmap)
 			goto out_vmcs;
 		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+
+		if (static_branch_unlikely(&enable_evmcs) &&
+		    (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
+			struct hv_enlightened_vmcs *evmcs =
+				(struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
+
+			evmcs->hv_enlightenments_control.msr_bitmap = 1;
+		}
 	}
 	return 0;
 
@@ -5332,6 +5351,9 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
 
+	if (static_branch_unlikely(&enable_evmcs))
+		evmcs_touch_msr_bitmap();
+
 	/*
 	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
 	 * have the write-low and read-high bitmap offsets the wrong way round.
@@ -5367,6 +5389,9 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
 
+	if (static_branch_unlikely(&enable_evmcs))
+		evmcs_touch_msr_bitmap();
+
 	/*
 	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
 	 * have the write-low and read-high bitmap offsets the wrong way round.
-- 
cgit v1.2.3


From 588716494258899389206fa50426e78cc9df89b9 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 9 May 2018 16:56:04 -0400
Subject: kvm: vmx: Introduce lapic_mode enumeration

The local APIC can be in one of three modes: disabled, xAPIC or
x2APIC. (A fourth mode, "invalid," is included for completeness.)

Using the new enumeration can make some of the APIC mode logic easier
to read. In kvm_set_apic_base, for instance, it is clear that one
cannot transition directly from x2APIC mode to xAPIC mode or directly
from APIC disabled to x2APIC mode.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
[Check invalid bits even if msr_info->host_initiated.  Reported by
 Wanpeng Li. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.h | 14 ++++++++++++++
 arch/x86/kvm/x86.c   | 26 +++++++++++++++-----------
 2 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index edce055e9fd7..ed0ed39abd36 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -16,6 +16,13 @@
 #define APIC_BUS_CYCLE_NS       1
 #define APIC_BUS_FREQUENCY      (1000000000ULL / APIC_BUS_CYCLE_NS)
 
+enum lapic_mode {
+	LAPIC_MODE_DISABLED = 0,
+	LAPIC_MODE_INVALID = X2APIC_ENABLE,
+	LAPIC_MODE_XAPIC = MSR_IA32_APICBASE_ENABLE,
+	LAPIC_MODE_X2APIC = MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE,
+};
+
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
@@ -89,6 +96,7 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
+enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
@@ -220,4 +228,10 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
+
+static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
+{
+	return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+}
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e6b4e5665d74..182693f8bd71 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -318,23 +318,27 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
+enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
+{
+	return kvm_apic_mode(kvm_get_apic_base(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
+
 int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
-	u64 old_state = vcpu->arch.apic_base &
-		(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
-	u64 new_state = msr_info->data &
-		(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+	enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
+	enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
 	u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
 		(guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
 
-	if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE)
-		return 1;
-	if (!msr_info->host_initiated &&
-	    ((new_state == MSR_IA32_APICBASE_ENABLE &&
-	      old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
-	     (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
-	      old_state == 0)))
+	if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
 		return 1;
+	if (!msr_info->host_initiated) {
+		if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
+			return 1;
+		if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
+			return 1;
+	}
 
 	kvm_lapic_set_base(vcpu, msr_info->data);
 	return 0;
-- 
cgit v1.2.3


From 8d860bbeedef97fe981d28fa7b71d77f3b29563f Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 9 May 2018 16:56:05 -0400
Subject: kvm: vmx: Basic APIC virtualization controls have three settings

Previously, we toggled between SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE
and SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES, depending on whether or
not the EXTD bit was set in MSR_IA32_APICBASE. However, if the local
APIC is disabled, we should not set either of these APIC
virtualization control bits.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/lapic.c            | 12 +++++------
 arch/x86/kvm/svm.c              |  4 ++--
 arch/x86/kvm/vmx.c              | 48 +++++++++++++++++++++++++----------------
 4 files changed, 38 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8cb846162694..187c8e09a019 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -995,7 +995,7 @@ struct kvm_x86_ops {
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
-	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+	void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
 	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b74c9c1405b9..776391cf69a5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1990,13 +1990,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		}
 	}
 
-	if ((old_value ^ value) & X2APIC_ENABLE) {
-		if (value & X2APIC_ENABLE) {
-			kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
-			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
-		} else
-			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
-	}
+	if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
+		kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
+
+	if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
+		kvm_x86_ops->set_virtual_apic_mode(vcpu);
 
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1fc05e428aba..220e5a89465a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5036,7 +5036,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
 }
 
-static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 {
 	return;
 }
@@ -7076,7 +7076,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.enable_nmi_window = enable_nmi_window,
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
-	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
+	.set_virtual_apic_mode = svm_set_virtual_apic_mode,
 	.get_enable_apicv = svm_get_enable_apicv,
 	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 467cab4e0efd..4149c5ee09fc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -481,7 +481,8 @@ struct nested_vmx {
 	bool sync_shadow_vmcs;
 	bool dirty_vmcs12;
 
-	bool change_vmcs01_virtual_x2apic_mode;
+	bool change_vmcs01_virtual_apic_mode;
+
 	/* L2 must run next, and mustn't decide to exit to L1. */
 	bool nested_run_pending;
 
@@ -9281,31 +9282,43 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 	vmcs_write32(TPR_THRESHOLD, irr);
 }
 
-static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 {
 	u32 sec_exec_control;
 
+	if (!lapic_in_kernel(vcpu))
+		return;
+
 	/* Postpone execution until vmcs01 is the current VMCS. */
 	if (is_guest_mode(vcpu)) {
-		to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+		to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
 		return;
 	}
 
-	if (!cpu_has_vmx_virtualize_x2apic_mode())
-		return;
-
 	if (!cpu_need_tpr_shadow(vcpu))
 		return;
 
 	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+	sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+			      SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
 
-	if (set) {
-		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
-	} else {
-		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
-		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-		vmx_flush_tlb(vcpu, true);
+	switch (kvm_get_apic_mode(vcpu)) {
+	case LAPIC_MODE_INVALID:
+		WARN_ONCE(true, "Invalid local APIC state");
+	case LAPIC_MODE_DISABLED:
+		break;
+	case LAPIC_MODE_XAPIC:
+		if (flexpriority_enabled) {
+			sec_exec_control |=
+				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+			vmx_flush_tlb(vcpu, true);
+		}
+		break;
+	case LAPIC_MODE_X2APIC:
+		if (cpu_has_vmx_virtualize_x2apic_mode())
+			sec_exec_control |=
+				SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+		break;
 	}
 	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
 
@@ -12087,10 +12100,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	if (kvm_has_tsc_control)
 		decache_tsc_multiplier(vmx);
 
-	if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
-		vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
-		vmx_set_virtual_x2apic_mode(vcpu,
-				vcpu->arch.apic_base & X2APIC_ENABLE);
+	if (vmx->nested.change_vmcs01_virtual_apic_mode) {
+		vmx->nested.change_vmcs01_virtual_apic_mode = false;
+		vmx_set_virtual_apic_mode(vcpu);
 	} else if (!nested_cpu_has_ept(vmcs12) &&
 		   nested_cpu_has2(vmcs12,
 				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
@@ -12718,7 +12730,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.enable_nmi_window = enable_nmi_window,
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
-	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+	.set_virtual_apic_mode = vmx_set_virtual_apic_mode,
 	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
 	.get_enable_apicv = vmx_get_enable_apicv,
 	.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
-- 
cgit v1.2.3


From ab5df31cee7f8f17adb59717cf569d315ec02644 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 9 May 2018 17:02:03 -0400
Subject: kvm: nVMX: Eliminate APIC access page sharing between L1 and L2

It is only possible to share the APIC access page between L1 and L2 if
they also share the virtual-APIC page.  If L2 has its own virtual-APIC
page, then MMIO accesses to L1's TPR from L2 will access L2's TPR
instead.  Moreover, L1's local APIC has to be in xAPIC mode, which is
another condition that hasn't been checked.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 32 ++++++--------------------------
 1 file changed, 6 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4149c5ee09fc..ea098131dcce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8871,11 +8871,13 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 	case EXIT_REASON_TPR_BELOW_THRESHOLD:
 		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
 	case EXIT_REASON_APIC_ACCESS:
-		return nested_cpu_has2(vmcs12,
-			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 	case EXIT_REASON_APIC_WRITE:
 	case EXIT_REASON_EOI_INDUCED:
-		/* apic_write and eoi_induced should exit unconditionally. */
+		/*
+		 * The controls for "virtualize APIC accesses," "APIC-
+		 * register virtualization," and "virtual-interrupt
+		 * delivery" only come from vmcs12.
+		 */
 		return true;
 	case EXIT_REASON_EPT_VIOLATION:
 		/*
@@ -9327,24 +9329,7 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 
 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
 {
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-	/*
-	 * Currently we do not handle the nested case where L2 has an
-	 * APIC access page of its own; that page is still pinned.
-	 * Hence, we skip the case where the VCPU is in guest mode _and_
-	 * L1 prepared an APIC access page for L2.
-	 *
-	 * For the case where L1 and L2 share the same APIC access page
-	 * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
-	 * in the vmcs12), this function will only update either the vmcs01
-	 * or the vmcs02.  If the former, the vmcs02 will be updated by
-	 * prepare_vmcs02.  If the latter, the vmcs01 will be updated in
-	 * the next L2->L1 exit.
-	 */
-	if (!is_guest_mode(vcpu) ||
-	    !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
-			     SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+	if (!is_guest_mode(vcpu)) {
 		vmcs_write64(APIC_ACCESS_ADDR, hpa);
 		vmx_flush_tlb(vcpu, true);
 	}
@@ -10418,11 +10403,6 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
 					SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 		}
-	} else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
-		   cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
-		vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
-			      SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
-		kvm_vcpu_reload_apic_access_page(vcpu);
 	}
 
 	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
-- 
cgit v1.2.3


From 1313cc2bd8f6568dd8801feef446afbe43e6d313 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 9 May 2018 17:02:04 -0400
Subject: kvm: mmu: Add guest_mode to kvm_mmu_page_role

L1 and L2 need to have disjoint mappings, so that L1's APIC access
page (under VMX) can be omitted from L2's mappings.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 ++-
 arch/x86/kvm/mmu.c              | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 187c8e09a019..b27de80f5870 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -258,7 +258,8 @@ union kvm_mmu_page_role {
 		unsigned smep_andnot_wp:1;
 		unsigned smap_andnot_wp:1;
 		unsigned ad_disabled:1;
-		unsigned :7;
+		unsigned guest_mode:1;
+		unsigned :6;
 
 		/*
 		 * This is left at the top of the word so that
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 98717cafdbcb..ca04766edbd4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4468,6 +4468,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
 	context->base_role.word = 0;
+	context->base_role.guest_mode = is_guest_mode(vcpu);
 	context->base_role.smm = is_smm(vcpu);
 	context->base_role.ad_disabled = (shadow_accessed_mask == 0);
 	context->page_fault = tdp_page_fault;
@@ -4534,6 +4535,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 		= smep && !is_write_protection(vcpu);
 	context->base_role.smap_andnot_wp
 		= smap && !is_write_protection(vcpu);
+	context->base_role.guest_mode = is_guest_mode(vcpu);
 	context->base_role.smm = is_smm(vcpu);
 	reset_shadow_zero_bits_mask(vcpu, context);
 }
@@ -4559,7 +4561,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->root_hpa = INVALID_PAGE;
 	context->direct_map = false;
 	context->base_role.ad_disabled = !accessed_dirty;
-
+	context->base_role.guest_mode = 1;
 	update_permission_bitmask(vcpu, context, true);
 	update_pkru_bitmask(vcpu, context, true);
 	update_last_nonleaf_level(vcpu, context);
@@ -4820,6 +4822,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	mask.smep_andnot_wp = 1;
 	mask.smap_andnot_wp = 1;
 	mask.smm = 1;
+	mask.guest_mode = 1;
 	mask.ad_disabled = 1;
 
 	/*
-- 
cgit v1.2.3


From 3a2936dedd207b99c64bf1507a62a9ae44114220 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 9 May 2018 17:02:05 -0400
Subject: kvm: mmu: Don't expose private memslots to L2

These private pages have special purposes in the virtualization of L1,
but not in the virtualization of L2. In particular, L1's APIC access
page should never be entered into L2's page tables, because this
causes a great deal of confusion when the APIC virtualization hardware
is being used to accelerate L2's accesses to its own APIC.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ca04766edbd4..8af8c8f88bd7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3807,6 +3807,14 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	struct kvm_memory_slot *slot;
 	bool async;
 
+	/*
+	 * Don't expose private memslots to L2.
+	 */
+	if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+		*pfn = KVM_PFN_NOSLOT;
+		return false;
+	}
+
 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 	async = false;
 	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
-- 
cgit v1.2.3


From d7c72c57b144997241f6933b6548ea86ced1b844 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Thu, 19 Apr 2018 15:21:06 +0800
Subject: MIPS: sni: Remove the read_persistent_clock()

The dummy read_persistent_clock() uses a timespec, which is not year
2038 safe on 32bit systems. Thus remove this obsolete interface.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19114/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/sni/time.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 0eb7d1e8821b..dbace1f3e1a9 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -171,9 +171,3 @@ void __init plat_time_init(void)
 	}
 	setup_pit_timer();
 }
-
-void read_persistent_clock(struct timespec *ts)
-{
-	ts->tv_sec = -1;
-	ts->tv_nsec = 0;
-}
-- 
cgit v1.2.3


From 09adad17191942cac01ccfbb897b976ac8f42c22 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Mon, 7 May 2018 17:28:27 +0800
Subject: MIPS: Convert read_persistent_clock() to read_persistent_clock64()

Since struct timespec is not y2038 safe on 32bit machines, this patch
converts read_persistent_clock() to read_persistent_clock64() using
struct timespec64, as well as converting mktime() to mktime64().

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@linux-mips.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/dec/time.c                   |  4 ++--
 arch/mips/include/asm/mc146818-time.h  |  4 ++--
 arch/mips/lasat/ds1603.c               |  2 +-
 arch/mips/loongson64/common/time.c     |  2 +-
 arch/mips/mti-malta/malta-time.c       |  2 +-
 arch/mips/sibyte/swarm/rtc_m41t81.c    |  4 ++--
 arch/mips/sibyte/swarm/rtc_xicor1241.c |  4 ++--
 arch/mips/sibyte/swarm/setup.c         | 10 +++++-----
 8 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index a2a150e4fbc2..9e992cf1129e 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -19,7 +19,7 @@
 #include <asm/dec/ioasic.h>
 #include <asm/dec/machtype.h>
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	unsigned int year, mon, day, hour, min, sec, real_year;
 	unsigned long flags;
@@ -54,7 +54,7 @@ void read_persistent_clock(struct timespec *ts)
 
 	year += real_year - 72 + 2000;
 
-	ts->tv_sec = mktime(year, mon, day, hour, min, sec);
+	ts->tv_sec = mktime64(year, mon, day, hour, min, sec);
 	ts->tv_nsec = 0;
 }
 
diff --git a/arch/mips/include/asm/mc146818-time.h b/arch/mips/include/asm/mc146818-time.h
index 9e1ad26abdc0..cbf5cec345f1 100644
--- a/arch/mips/include/asm/mc146818-time.h
+++ b/arch/mips/include/asm/mc146818-time.h
@@ -86,7 +86,7 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
 	return retval;
 }
 
-static inline unsigned long mc146818_get_cmos_time(void)
+static inline time64_t mc146818_get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
 	unsigned long flags;
@@ -113,7 +113,7 @@ static inline unsigned long mc146818_get_cmos_time(void)
 	spin_unlock_irqrestore(&rtc_lock, flags);
 	year = mc146818_decode_year(year);
 
-	return mktime(year, mon, day, hour, min, sec);
+	return mktime64(year, mon, day, hour, min, sec);
 }
 
 #endif /* __ASM_MC146818_TIME_H */
diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c
index 8bd5cf820eed..d75c8875a643 100644
--- a/arch/mips/lasat/ds1603.c
+++ b/arch/mips/lasat/ds1603.c
@@ -136,7 +136,7 @@ static void rtc_end_op(void)
 	lasat_ndelay(1000);
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	unsigned long word;
 	unsigned long flags;
diff --git a/arch/mips/loongson64/common/time.c b/arch/mips/loongson64/common/time.c
index e1a5382ad47e..0ba53c55ff33 100644
--- a/arch/mips/loongson64/common/time.c
+++ b/arch/mips/loongson64/common/time.c
@@ -29,7 +29,7 @@ void __init plat_time_init(void)
 #endif
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	ts->tv_sec = mc146818_get_cmos_time();
 	ts->tv_nsec = 0;
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 66c866740ff2..d22b7edc3886 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -134,7 +134,7 @@ static void __init estimate_frequencies(void)
 	}
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	ts->tv_sec = mc146818_get_cmos_time();
 	ts->tv_nsec = 0;
diff --git a/arch/mips/sibyte/swarm/rtc_m41t81.c b/arch/mips/sibyte/swarm/rtc_m41t81.c
index e62466445f08..aa27a2226472 100644
--- a/arch/mips/sibyte/swarm/rtc_m41t81.c
+++ b/arch/mips/sibyte/swarm/rtc_m41t81.c
@@ -188,7 +188,7 @@ int m41t81_set_time(unsigned long t)
 	return 0;
 }
 
-unsigned long m41t81_get_time(void)
+time64_t m41t81_get_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
 	unsigned long flags;
@@ -218,7 +218,7 @@ unsigned long m41t81_get_time(void)
 
 	year += 2000;
 
-	return mktime(year, mon, day, hour, min, sec);
+	return mktime64(year, mon, day, hour, min, sec);
 }
 
 int m41t81_probe(void)
diff --git a/arch/mips/sibyte/swarm/rtc_xicor1241.c b/arch/mips/sibyte/swarm/rtc_xicor1241.c
index 50a82c495427..a2121c1345a9 100644
--- a/arch/mips/sibyte/swarm/rtc_xicor1241.c
+++ b/arch/mips/sibyte/swarm/rtc_xicor1241.c
@@ -168,7 +168,7 @@ int xicor_set_time(unsigned long t)
 	return 0;
 }
 
-unsigned long xicor_get_time(void)
+time64_t xicor_get_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec, y2k;
 	unsigned long flags;
@@ -201,7 +201,7 @@ unsigned long xicor_get_time(void)
 
 	year += (y2k * 100);
 
-	return mktime(year, mon, day, hour, min, sec);
+	return mktime64(year, mon, day, hour, min, sec);
 }
 
 int xicor_probe(void)
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 494fb0a475ac..7073940c02bf 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -58,11 +58,11 @@ extern void sb1250_setup(void);
 
 extern int xicor_probe(void);
 extern int xicor_set_time(unsigned long);
-extern unsigned long xicor_get_time(void);
+extern time64_t xicor_get_time(void);
 
 extern int m41t81_probe(void);
 extern int m41t81_set_time(unsigned long);
-extern unsigned long m41t81_get_time(void);
+extern time64_t m41t81_get_time(void);
 
 const char *get_system_type(void)
 {
@@ -87,9 +87,9 @@ enum swarm_rtc_type {
 
 enum swarm_rtc_type swarm_rtc_type;
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
-	unsigned long sec;
+	time64_t sec;
 
 	switch (swarm_rtc_type) {
 	case RTC_XICOR:
@@ -102,7 +102,7 @@ void read_persistent_clock(struct timespec *ts)
 
 	case RTC_NONE:
 	default:
-		sec = mktime(2000, 1, 1, 0, 0, 0);
+		sec = mktime64(2000, 1, 1, 0, 0, 0);
 		break;
 	}
 	ts->tv_sec = sec;
-- 
cgit v1.2.3


From f06e7aa47f3cad55c5737eb87280e90e25882d60 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Mon, 7 May 2018 17:28:28 +0800
Subject: MIPS: Convert update_persistent_clock() to
 update_persistent_clock64()

Since struct timespec is not y2038 safe on 32bit machines, this patch
converts update_persistent_clock() to update_persistent_clock64() using
struct timespec64.

The rtc_mips_set_time() and rtc_mips_set_mmss() interfaces were using
'unsigned long' type that is not y2038 safe on 32bit machines, moreover
there is only one platform implementing rtc_mips_set_time() and two
platforms implementing rtc_mips_set_mmss(), so we can just make them each
implement update_persistent_clock64() directly, to get that helper out
of the common mips code by removing rtc_mips_set_time() and
rtc_mips_set_mmss() interfaces.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@linux-mips.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/dec/time.c                   |  8 ++++----
 arch/mips/include/asm/time.h           |  9 ---------
 arch/mips/kernel/time.c                | 15 ---------------
 arch/mips/lasat/ds1603.c               |  9 +++++++--
 arch/mips/lasat/sysctl.c               | 12 ++++++++++--
 arch/mips/sibyte/swarm/rtc_m41t81.c    |  4 ++--
 arch/mips/sibyte/swarm/rtc_xicor1241.c |  4 ++--
 arch/mips/sibyte/swarm/setup.c         |  8 +++++---
 8 files changed, 30 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 9e992cf1129e..c38686f89a18 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -59,14 +59,15 @@ void read_persistent_clock64(struct timespec64 *ts)
 }
 
 /*
- * In order to set the CMOS clock precisely, rtc_mips_set_mmss has to
+ * In order to set the CMOS clock precisely, update_persistent_clock64 has to
  * be called 500 ms after the second nowtime has started, because when
  * nowtime is written into the registers of the CMOS clock, it will
  * jump to the next second precisely 500 ms later.  Check the Dallas
  * DS1287 data sheet for details.
  */
-int rtc_mips_set_mmss(unsigned long nowtime)
+int update_persistent_clock64(struct timespec64 now)
 {
+	time64_t nowtime = now.tv_sec;
 	int retval = 0;
 	int real_seconds, real_minutes, cmos_minutes;
 	unsigned char save_control, save_freq_select;
@@ -91,8 +92,7 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 	 * messing with unknown time zones but requires your
 	 * RTC not to be off by more than 15 minutes
 	 */
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
+	real_minutes = div_s64_rem(nowtime, 60, &real_seconds);
 	if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
 		real_minutes += 30;	/* correct for half hour time zone */
 	real_minutes %= 60;
diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index 17d4cd20f18c..b85ec64ee7e9 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h
@@ -21,15 +21,6 @@
 
 extern spinlock_t rtc_lock;
 
-/*
- * RTC ops.  By default, they point to weak no-op RTC functions.
- *	rtc_mips_set_time - reverse the above translation and set time to RTC.
- *	rtc_mips_set_mmss - similar to rtc_set_time, but only min and sec need
- *			to be set.  Used by RTC sync-up.
- */
-extern int rtc_mips_set_time(unsigned long);
-extern int rtc_mips_set_mmss(unsigned long);
-
 /*
  * board specific routines required by time_init().
  */
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index a6ebc8135112..bfe02ded25d1 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -34,21 +34,6 @@
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
 
-int __weak rtc_mips_set_time(unsigned long sec)
-{
-	return -ENODEV;
-}
-
-int __weak rtc_mips_set_mmss(unsigned long nowtime)
-{
-	return rtc_mips_set_time(nowtime);
-}
-
-int update_persistent_clock(struct timespec now)
-{
-	return rtc_mips_set_mmss(now.tv_sec);
-}
-
 static int null_perf_irq(void)
 {
 	return 0;
diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c
index d75c8875a643..e6ce39fefa78 100644
--- a/arch/mips/lasat/ds1603.c
+++ b/arch/mips/lasat/ds1603.c
@@ -152,14 +152,19 @@ void read_persistent_clock64(struct timespec64 *ts)
 	ts->tv_nsec = 0;
 }
 
-int rtc_mips_set_mmss(unsigned long time)
+int update_persistent_clock64(struct timespec64 now)
 {
+	time64_t time = now.tv_sec;
 	unsigned long flags;
 
 	spin_lock_irqsave(&rtc_lock, flags);
 	rtc_init_op();
 	rtc_write_byte(SET_TIME_CMD);
-	rtc_write_word(time);
+	/*
+	 * Due to the hardware limitation, we cast to 'unsigned long' type,
+	 * so it will overflow in year 2106 on 32-bit machine.
+	 */
+	rtc_write_word((unsigned long)time);
 	rtc_end_op();
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 6f7422400f32..ead07c243c6a 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -73,8 +73,16 @@ int proc_dolasatrtc(struct ctl_table *table, int write,
 	if (r)
 		return r;
 
-	if (write)
-		rtc_mips_set_mmss(rtctmp);
+	if (write) {
+		/*
+		 * Due to the RTC hardware limitation, we can not actually
+		 * use the full 64-bit range here.
+		 */
+		ts.tv_sec = rtctmp;
+		ts.tv_nsec = 0;
+
+		update_persistent_clock64(ts);
+	}
 
 	return 0;
 }
diff --git a/arch/mips/sibyte/swarm/rtc_m41t81.c b/arch/mips/sibyte/swarm/rtc_m41t81.c
index aa27a2226472..4ac8ccdf56bb 100644
--- a/arch/mips/sibyte/swarm/rtc_m41t81.c
+++ b/arch/mips/sibyte/swarm/rtc_m41t81.c
@@ -141,13 +141,13 @@ static int m41t81_write(uint8_t addr, int b)
 	return 0;
 }
 
-int m41t81_set_time(unsigned long t)
+int m41t81_set_time(time64_t t)
 {
 	struct rtc_time tm;
 	unsigned long flags;
 
 	/* Note we don't care about the century */
-	rtc_time_to_tm(t, &tm);
+	rtc_time64_to_tm(t, &tm);
 
 	/*
 	 * Note the write order matters as it ensures the correctness.
diff --git a/arch/mips/sibyte/swarm/rtc_xicor1241.c b/arch/mips/sibyte/swarm/rtc_xicor1241.c
index a2121c1345a9..2dcaaa7e3bfa 100644
--- a/arch/mips/sibyte/swarm/rtc_xicor1241.c
+++ b/arch/mips/sibyte/swarm/rtc_xicor1241.c
@@ -109,13 +109,13 @@ static int xicor_write(uint8_t addr, int b)
 	}
 }
 
-int xicor_set_time(unsigned long t)
+int xicor_set_time(time64_t t)
 {
 	struct rtc_time tm;
 	int tmp;
 	unsigned long flags;
 
-	rtc_time_to_tm(t, &tm);
+	rtc_time64_to_tm(t, &tm);
 	tm.tm_year += 1900;
 
 	spin_lock_irqsave(&rtc_lock, flags);
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 7073940c02bf..152ca71cc2d7 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -57,11 +57,11 @@ extern void sb1250_setup(void);
 #endif
 
 extern int xicor_probe(void);
-extern int xicor_set_time(unsigned long);
+extern int xicor_set_time(time64_t);
 extern time64_t xicor_get_time(void);
 
 extern int m41t81_probe(void);
-extern int m41t81_set_time(unsigned long);
+extern int m41t81_set_time(time64_t);
 extern time64_t m41t81_get_time(void);
 
 const char *get_system_type(void)
@@ -109,8 +109,10 @@ void read_persistent_clock64(struct timespec64 *ts)
 	ts->tv_nsec = 0;
 }
 
-int rtc_mips_set_time(unsigned long sec)
+int update_persistent_clock64(struct timespec64 now)
 {
+	time64_t sec = now.tv_sec;
+
 	switch (swarm_rtc_type) {
 	case RTC_XICOR:
 		return xicor_set_time(sec);
-- 
cgit v1.2.3


From f83e4e1e0ef5c6db4f5c249fe485b2f1029180c5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 3 May 2018 14:45:11 +0300
Subject: MIPS: Re-use kstrtobool_from_user()

Re-use kstrtobool_from_user() instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/mm/sc-debugfs.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c
index 2e2132d3f5c7..2a116084216f 100644
--- a/arch/mips/mm/sc-debugfs.c
+++ b/arch/mips/mm/sc-debugfs.c
@@ -31,17 +31,10 @@ static ssize_t sc_prefetch_write(struct file *file,
 				 const char __user *user_buf,
 				 size_t count, loff_t *ppos)
 {
-	char buf[32];
-	ssize_t buf_size;
 	bool enabled;
 	int err;
 
-	buf_size = min(count, sizeof(buf) - 1);
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	err = strtobool(buf, &enabled);
+	err = kstrtobool_from_user(user_buf, count, &enabled);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From aae22f16022600bae990ec4c2f2cb997c7393216 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 10 May 2018 17:59:00 +0100
Subject: MIPS: VPE: Fix spelling mistake: "uneeded" -> "unneeded"

Trivial fix to spelling mistake in pr_warn message text.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: kernel-janitors@vger.kernel.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/kernel/vpe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 544ea21bfef9..0bef238d2c0c 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -872,7 +872,7 @@ static ssize_t vpe_write(struct file *file, const char __user *buffer,
 		return -ENODEV;
 
 	if ((count + v->len) > v->plen) {
-		pr_warn("VPE loader: elf size too big. Perhaps strip uneeded symbols\n");
+		pr_warn("VPE loader: elf size too big. Perhaps strip unneeded symbols\n");
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3


From fbc23c71dfc9406ee0585d058edd834b523ef0e7 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 10 May 2018 20:47:48 +0200
Subject: MIPS: JZ4740: dts: Add bindings for the jz4740-wdt driver

Also remove the watchdog platform_device from platform.c, since it
wasn't used anywhere anyway.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Wim Van Sebroeck <wim@linux-watchdog.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: linux-mips@linux-mips.org
Cc: linux-watchdog@vger.kernel.org
Cc: devicetree@vger.kernel.org
[jhogan@kernel.org: Drop jz4740_wdt_device declaration from header]
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/boot/dts/ingenic/jz4740.dtsi       |  8 ++++++++
 arch/mips/include/asm/mach-jz4740/platform.h |  1 -
 arch/mips/jz4740/platform.c                  | 16 ----------------
 3 files changed, 8 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index cd5185bb90ae..26c6b561d6f7 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -45,6 +45,14 @@
 		#clock-cells = <1>;
 	};
 
+	watchdog: watchdog@10002000 {
+		compatible = "ingenic,jz4740-watchdog";
+		reg = <0x10002000 0x10>;
+
+		clocks = <&cgu JZ4740_CLK_RTC>;
+		clock-names = "rtc";
+	};
+
 	rtc_dev: rtc@10003000 {
 		compatible = "ingenic,jz4740-rtc";
 		reg = <0x10003000 0x40>;
diff --git a/arch/mips/include/asm/mach-jz4740/platform.h b/arch/mips/include/asm/mach-jz4740/platform.h
index 3645974b7f65..c0c932ac72a7 100644
--- a/arch/mips/include/asm/mach-jz4740/platform.h
+++ b/arch/mips/include/asm/mach-jz4740/platform.h
@@ -29,7 +29,6 @@ extern struct platform_device jz4740_i2s_device;
 extern struct platform_device jz4740_pcm_device;
 extern struct platform_device jz4740_codec_device;
 extern struct platform_device jz4740_adc_device;
-extern struct platform_device jz4740_wdt_device;
 extern struct platform_device jz4740_pwm_device;
 extern struct platform_device jz4740_dma_device;
 
diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c
index 5b7cdd67a9d9..cbc5f8e87230 100644
--- a/arch/mips/jz4740/platform.c
+++ b/arch/mips/jz4740/platform.c
@@ -233,22 +233,6 @@ struct platform_device jz4740_adc_device = {
 	.resource	= jz4740_adc_resources,
 };
 
-/* Watchdog */
-static struct resource jz4740_wdt_resources[] = {
-	{
-		.start = JZ4740_WDT_BASE_ADDR,
-		.end   = JZ4740_WDT_BASE_ADDR + 0x10 - 1,
-		.flags = IORESOURCE_MEM,
-	},
-};
-
-struct platform_device jz4740_wdt_device = {
-	.name	       = "jz4740-wdt",
-	.id	       = -1,
-	.num_resources = ARRAY_SIZE(jz4740_wdt_resources),
-	.resource      = jz4740_wdt_resources,
-};
-
 /* PWM */
 struct platform_device jz4740_pwm_device = {
 	.name = "jz4740-pwm",
-- 
cgit v1.2.3


From 9a0225d99d669e41fa012a7419cd3cc109914d0b Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 10 May 2018 20:47:49 +0200
Subject: MIPS: JZ4780: dts: Fix watchdog node

- The previous node requested a memory area of 0x100 bytes, while the
  driver only manipulates four registers present in the first 0x10 bytes.

- The driver requests for the "rtc" clock, but the previous node did not
  provide any.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Mathieu Malaterre <malat@debian.org>
Acked-by: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Wim Van Sebroeck <wim@linux-watchdog.org>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: linux-mips@linux-mips.org
Cc: devicetree@vger.kernel.org
Cc: linux-watchdog@vger.kernel.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/boot/dts/ingenic/jz4780.dtsi | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index 9b5794667aee..a52f59bf58c7 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -221,7 +221,10 @@
 
 	watchdog: watchdog@10002000 {
 		compatible = "ingenic,jz4780-watchdog";
-		reg = <0x10002000 0x100>;
+		reg = <0x10002000 0x10>;
+
+		clocks = <&cgu JZ4780_CLK_RTCLK>;
+		clock-names = "rtc";
 	};
 
 	nemc: nemc@13410000 {
-- 
cgit v1.2.3


From c49173ffcad08846e730d4fc49c68b0e4a3448e4 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 10 May 2018 20:47:50 +0200
Subject: MIPS: qi_lb60: Enable the jz4740-wdt driver

The watchdog is an useful piece of hardware, so there's no reason not to
enable it.

Besides, this is important for restart to work after the change in the
next commit.

This commit enables the Kconfig option in the qi_lb60 defconfig.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Wim Van Sebroeck <wim@linux-watchdog.org>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: linux-mips@linux-mips.org
Cc: linux-watchdog@vger.kernel.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/configs/qi_lb60_defconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig
index 3b02ff9a7c64..d8b7211a7b0f 100644
--- a/arch/mips/configs/qi_lb60_defconfig
+++ b/arch/mips/configs/qi_lb60_defconfig
@@ -72,6 +72,8 @@ CONFIG_POWER_SUPPLY=y
 CONFIG_BATTERY_JZ4740=y
 CONFIG_CHARGER_GPIO=y
 # CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_JZ4740_WDT=y
 CONFIG_MFD_JZ4740_ADC=y
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
-- 
cgit v1.2.3


From 1761ad8c9e7985817b037c9a534fbbca452a7fda Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 10 May 2018 20:47:51 +0200
Subject: MIPS: JZ4740: Drop old platform reset code

This work is now performed by the watchdog driver directly.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Wim Van Sebroeck <wim@linux-watchdog.org>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: linux-mips@linux-mips.org
Cc: linux-watchdog@vger.kernel.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/jz4740/reset.c | 31 -------------------------------
 1 file changed, 31 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/jz4740/reset.c b/arch/mips/jz4740/reset.c
index 67780c4b6573..5bf0cf44b55f 100644
--- a/arch/mips/jz4740/reset.c
+++ b/arch/mips/jz4740/reset.c
@@ -12,18 +12,9 @@
  *
  */
 
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/pm.h>
-
 #include <asm/reboot.h>
 
-#include <asm/mach-jz4740/base.h>
-#include <asm/mach-jz4740/timer.h>
-
 #include "reset.h"
-#include "clock.h"
 
 static void jz4740_halt(void)
 {
@@ -36,29 +27,7 @@ static void jz4740_halt(void)
 	}
 }
 
-#define JZ_REG_WDT_DATA 0x00
-#define JZ_REG_WDT_COUNTER_ENABLE 0x04
-#define JZ_REG_WDT_COUNTER 0x08
-#define JZ_REG_WDT_CTRL 0x0c
-
-static void jz4740_restart(char *command)
-{
-	void __iomem *wdt_base = ioremap(JZ4740_WDT_BASE_ADDR, 0x0f);
-
-	jz4740_timer_enable_watchdog();
-
-	writeb(0, wdt_base + JZ_REG_WDT_COUNTER_ENABLE);
-
-	writew(0, wdt_base + JZ_REG_WDT_COUNTER);
-	writew(0, wdt_base + JZ_REG_WDT_DATA);
-	writew(BIT(2), wdt_base + JZ_REG_WDT_CTRL);
-
-	writeb(1, wdt_base + JZ_REG_WDT_COUNTER_ENABLE);
-	jz4740_halt();
-}
-
 void jz4740_reset_init(void)
 {
-	_machine_restart = jz4740_restart;
 	_machine_halt = jz4740_halt;
 }
-- 
cgit v1.2.3


From 49b031690abe1eb135a685c75fe78e9591f8818b Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 14 May 2018 22:04:58 +0200
Subject: MIPS: mscc: Add switch to ocelot

Ocelot has an integrated switch, add support for it.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: linux-mips@linux-mips.org
Cc: netdev@vger.kernel.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/boot/dts/mscc/ocelot.dtsi | 88 +++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
index dd239cab2f9d..4f33dbc67348 100644
--- a/arch/mips/boot/dts/mscc/ocelot.dtsi
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -91,6 +91,72 @@
 			status = "disabled";
 		};
 
+		switch@1010000 {
+			compatible = "mscc,vsc7514-switch";
+			reg = <0x1010000 0x10000>,
+			      <0x1030000 0x10000>,
+			      <0x1080000 0x100>,
+			      <0x10d0000 0x10000>,
+			      <0x11e0000 0x100>,
+			      <0x11f0000 0x100>,
+			      <0x1200000 0x100>,
+			      <0x1210000 0x100>,
+			      <0x1220000 0x100>,
+			      <0x1230000 0x100>,
+			      <0x1240000 0x100>,
+			      <0x1250000 0x100>,
+			      <0x1260000 0x100>,
+			      <0x1270000 0x100>,
+			      <0x1280000 0x100>,
+			      <0x1800000 0x80000>,
+			      <0x1880000 0x10000>;
+			reg-names = "sys", "rew", "qs", "hsio", "port0",
+				    "port1", "port2", "port3", "port4", "port5",
+				    "port6", "port7", "port8", "port9", "port10",
+				    "qsys", "ana";
+			interrupts = <21 22>;
+			interrupt-names = "xtr", "inj";
+
+			ethernet-ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port0: port@0 {
+					reg = <0>;
+				};
+				port1: port@1 {
+					reg = <1>;
+				};
+				port2: port@2 {
+					reg = <2>;
+				};
+				port3: port@3 {
+					reg = <3>;
+				};
+				port4: port@4 {
+					reg = <4>;
+				};
+				port5: port@5 {
+					reg = <5>;
+				};
+				port6: port@6 {
+					reg = <6>;
+				};
+				port7: port@7 {
+					reg = <7>;
+				};
+				port8: port@8 {
+					reg = <8>;
+				};
+				port9: port@9 {
+					reg = <9>;
+				};
+				port10: port@10 {
+					reg = <10>;
+				};
+			};
+		};
+
 		reset@1070008 {
 			compatible = "mscc,ocelot-chip-reset";
 			reg = <0x1070008 0x4>;
@@ -113,5 +179,27 @@
 				function = "uart2";
 			};
 		};
+
+		mdio0: mdio@107009c {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "mscc,ocelot-miim";
+			reg = <0x107009c 0x36>, <0x10700f0 0x8>;
+			interrupts = <14>;
+			status = "disabled";
+
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+			phy1: ethernet-phy@1 {
+				reg = <1>;
+			};
+			phy2: ethernet-phy@2 {
+				reg = <2>;
+			};
+			phy3: ethernet-phy@3 {
+				reg = <3>;
+			};
+		};
 	};
 };
-- 
cgit v1.2.3


From 8798e3921e3000a046d336920588745b6651959b Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 14 May 2018 22:04:59 +0200
Subject: MIPS: mscc: Connect phys to ports on ocelot_pcb123

Add phy to switch port connections for PCB123 for internal PHYs.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: linux-mips@linux-mips.org
Cc: netdev@vger.kernel.org
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/boot/dts/mscc/ocelot_pcb123.dts | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
index 29d6414f8886..4ccd65379059 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -25,3 +25,23 @@
 &uart2 {
 	status = "okay";
 };
+
+&mdio0 {
+	status = "okay";
+};
+
+&port0 {
+	phy-handle = <&phy0>;
+};
+
+&port1 {
+	phy-handle = <&phy1>;
+};
+
+&port2 {
+	phy-handle = <&phy2>;
+};
+
+&port3 {
+	phy-handle = <&phy3>;
+};
-- 
cgit v1.2.3


From 8270ab48e67333f760b3bef507c1524c7a06c699 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Fri, 20 Apr 2018 11:23:03 +0100
Subject: MIPS: Probe for MIPS MT perf counters per TC

Processors implementing the MIPS MT ASE may have performance counters
implemented per core or per TC. Processors implemented by MIPS
Technologies signify presence per TC through a bit in the implementation
specific Config7 register. Currently the code which probes for their
presence blindly reads a magic number corresponding to this bit, despite
it potentially having a different meaning in the CPU implementation.

Since CPU features are generally detected by cpu-probe.c, perform the
detection here instead. Introduce cpu_set_mt_per_tc_perf which checks
the bit in config7 and call it from MIPS CPUs known to implement this
bit and the MT ASE, specifically, the 34K, 1004K and interAptiv.

Once the presence of the per-tc counter is indicated in cpu_data, tests
for it can be updated to use this flag.

Suggested-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Matt Redfearn <matt.redfearn@mips.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Maciej W. Rozycki <macro@mips.com>
Cc: linux-mips@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/19136/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/include/asm/cpu.h      |  2 ++
 arch/mips/include/asm/mipsregs.h |  5 +++++
 arch/mips/kernel/cpu-probe.c     | 12 ++++++++++++
 3 files changed, 19 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index d39324c4adf1..5b9d02ef4f60 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -418,6 +418,8 @@ enum cpu_type_enum {
 				MBIT_ULL(54)	/* CPU shares FTLB RAM with another */
 #define MIPS_CPU_SHARED_FTLB_ENTRIES \
 				MBIT_ULL(55)	/* CPU shares FTLB entries with another */
+#define MIPS_CPU_MT_PER_TC_PERF_COUNTERS \
+				MBIT_ULL(56)	/* CPU has perf counters implemented per TC (MIPSMT ASE) */
 
 /*
  * CPU ASE encodings
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index f65859784a4c..ae461d91cd1f 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -685,6 +685,11 @@
 #define MIPS_CONF7_IAR		(_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR		(_ULCAST_(1) << 16)
 
+/* Config7 Bits specific to MIPS Technologies. */
+
+/* Performance counters implemented Per TC */
+#define MTI_CONF7_PTC		(_ULCAST_(1) << 19)
+
 /* WatchLo* register definitions */
 #define MIPS_WATCHLO_IRW	(_ULCAST_(0x7) << 0)
 
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 6b07b739f914..b2509c19cfb5 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -414,6 +414,14 @@ static int __init ftlb_disable(char *s)
 
 __setup("noftlb", ftlb_disable);
 
+/*
+ * Check if the CPU has per tc perf counters
+ */
+static inline void cpu_set_mt_per_tc_perf(struct cpuinfo_mips *c)
+{
+	if (read_c0_config7() & MTI_CONF7_PTC)
+		c->options |= MIPS_CPU_MT_PER_TC_PERF_COUNTERS;
+}
 
 static inline void check_errata(void)
 {
@@ -1572,6 +1580,7 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_34K;
 		c->writecombine = _CACHE_UNCACHED;
 		__cpu_name[cpu] = "MIPS 34Kc";
+		cpu_set_mt_per_tc_perf(c);
 		break;
 	case PRID_IMP_74K:
 		c->cputype = CPU_74K;
@@ -1592,6 +1601,7 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_1004K;
 		c->writecombine = _CACHE_UNCACHED;
 		__cpu_name[cpu] = "MIPS 1004Kc";
+		cpu_set_mt_per_tc_perf(c);
 		break;
 	case PRID_IMP_1074K:
 		c->cputype = CPU_1074K;
@@ -1601,10 +1611,12 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_INTERAPTIV_UP:
 		c->cputype = CPU_INTERAPTIV;
 		__cpu_name[cpu] = "MIPS interAptiv";
+		cpu_set_mt_per_tc_perf(c);
 		break;
 	case PRID_IMP_INTERAPTIV_MP:
 		c->cputype = CPU_INTERAPTIV;
 		__cpu_name[cpu] = "MIPS interAptiv (multi)";
+		cpu_set_mt_per_tc_perf(c);
 		break;
 	case PRID_IMP_PROAPTIV_UP:
 		c->cputype = CPU_PROAPTIV;
-- 
cgit v1.2.3


From 800fb71281ca2ed5c8a7299e10ebc0de2f61cdda Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Fri, 20 Apr 2018 11:23:04 +0100
Subject: MIPS: perf: More robustly probe for the presence of per-tc counters

The presence of per TC performance counters is now detected by
cpu-probe.c and indicated by MIPS_CPU_MT_PER_TC_PERF_COUNTERS in
cpu_data. Switch detection of the feature to use this new flag rather
than blindly testing the implementation specific config7 register with a
magic number.

Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Maciej W. Rozycki <macro@mips.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Robert Richter <rric@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: oprofile-list@lists.sf.net
Patchwork: https://patchwork.linux-mips.org/patch/19142/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/include/asm/cpu-features.h | 7 +++++++
 arch/mips/kernel/perf_event_mipsxx.c | 3 ---
 arch/mips/oprofile/op_model_mipsxx.c | 2 --
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 5f74590e0bea..9cdb4e4ce258 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -535,6 +535,13 @@
 # define cpu_has_shared_ftlb_entries 0
 #endif
 
+#ifdef CONFIG_MIPS_MT_SMP
+# define cpu_has_mipsmt_pertccounters \
+	(cpu_data[0].options & MIPS_CPU_MT_PER_TC_PERF_COUNTERS)
+#else
+# define cpu_has_mipsmt_pertccounters 0
+#endif /* CONFIG_MIPS_MT_SMP */
+
 /*
  * Guest capabilities
  */
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index ee73550f0b9a..458015da7149 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -129,8 +129,6 @@ static struct mips_pmu mipspmu;
 
 
 #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS
-static int cpu_has_mipsmt_pertccounters;
-
 static DEFINE_RWLOCK(pmuint_rwlock);
 
 #if defined(CONFIG_CPU_BMIPS5000)
@@ -1723,7 +1721,6 @@ init_hw_perf_events(void)
 	}
 
 #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS
-	cpu_has_mipsmt_pertccounters = read_c0_config7() & (1<<19);
 	if (!cpu_has_mipsmt_pertccounters)
 		counters = counters_total_to_per_cpu(counters);
 #endif
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index c3e4c18ef8d4..7c04b17f4a48 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -36,7 +36,6 @@ static int perfcount_irq;
 #endif
 
 #ifdef CONFIG_MIPS_MT_SMP
-static int cpu_has_mipsmt_pertccounters;
 #define WHAT		(MIPS_PERFCTRL_MT_EN_VPE | \
 			 M_PERFCTL_VPEID(cpu_vpe_id(&current_cpu_data)))
 #define vpe_id()	(cpu_has_mipsmt_pertccounters ? \
@@ -326,7 +325,6 @@ static int __init mipsxx_init(void)
 	}
 
 #ifdef CONFIG_MIPS_MT_SMP
-	cpu_has_mipsmt_pertccounters = read_c0_config7() & (1<<19);
 	if (!cpu_has_mipsmt_pertccounters)
 		counters = counters_total_to_per_cpu(counters);
 #endif
-- 
cgit v1.2.3


From 840a8b55effdc9a98b115f84b8bbb6a2f5d05226 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Fri, 20 Apr 2018 11:23:05 +0100
Subject: MIPS: perf: Use correct VPE ID when setting up VPE tracing

There are a couple of FIXME's in the perf code which state that
cpu_data[event->cpu].vpe_id reports 0 for both CPUs. This is no longer
the case, since the vpe_id is used extensively by SMP CPS.

VPE local counting gets around this by using smp_processor_id() instead.
As it happens this does work correctly to count events on the right VPE,
but relies on 2 assumptions:
a) Always having 2 VPEs / core.
b) The hardware only paying attention to the least significant bit of
the PERFCTL.VPEID field.
If either of these assumptions change then the incorrect VPEs events
will be counted.

Fix this by replacing smp_processor_id() with
cpu_vpe_id(&current_cpu_data), in the vpe_id() macro, and pass vpe_id()
to M_PERFCTL_VPEID() when setting up PERFCTL.VPEID. The FIXME's can also
be removed since they no longer apply.

Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19137/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/kernel/perf_event_mipsxx.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 458015da7149..11d1b2268fdd 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -135,12 +135,8 @@ static DEFINE_RWLOCK(pmuint_rwlock);
 #define vpe_id()	(cpu_has_mipsmt_pertccounters ? \
 			 0 : (smp_processor_id() & MIPS_CPUID_TO_COUNTER_MASK))
 #else
-/*
- * FIXME: For VSMP, vpe_id() is redefined for Perf-events, because
- * cpu_data[cpuid].vpe_id reports 0 for _both_ CPUs.
- */
 #define vpe_id()	(cpu_has_mipsmt_pertccounters ? \
-			 0 : smp_processor_id())
+			 0 : cpu_vpe_id(&current_cpu_data))
 #endif
 
 /* Copied from op_model_mipsxx.c */
@@ -1277,11 +1273,7 @@ static void check_and_calc_range(struct perf_event *event,
 			 */
 			hwc->config_base |= M_TC_EN_ALL;
 		} else {
-			/*
-			 * FIXME: cpu_data[event->cpu].vpe_id reports 0
-			 * for both CPUs.
-			 */
-			hwc->config_base |= M_PERFCTL_VPEID(event->cpu);
+			hwc->config_base |= M_PERFCTL_VPEID(vpe_id());
 			hwc->config_base |= M_TC_EN_VPE;
 		}
 	} else
-- 
cgit v1.2.3


From 84002c88599d6b537e54b003f763215be2075243 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Fri, 20 Apr 2018 11:23:06 +0100
Subject: MIPS: perf: Fix perf with MT counting other threads

When perf is used in non-system mode, i.e. without specifying CPUs to
count on, check_and_calc_range falls into the case when it sets
M_TC_EN_ALL in the counter config_base. This has the impact of always
counting for all of the threads in a core, even when the user has not
requested it. For example this can be seen with a test program which
executes 30002 instructions and 10000 branches running on one VPE and a
busy load on the other VPE in the core. Without this commit, the
expected count is not returned:

taskset 4 dd if=/dev/zero of=/dev/null count=100000 & taskset 8 perf
stat -e instructions:u,branches:u ./test_prog

 Performance counter stats for './test_prog':

            103235      instructions:u
             17015      branches:u

In order to fix this, remove check_and_calc_range entirely and perform
all of the logic in mipsxx_pmu_enable_event. Since
mipsxx_pmu_enable_event now requires the range of the event, ensure that
it is set by mipspmu_perf_event_encode in the same circumstances as
before (i.e. #ifdef CONFIG_MIPS_MT_SMP && num_possible_cpus() > 1).

The logic of mipsxx_pmu_enable_event now becomes:
If the CPU is a BMIPS5000, then use the special vpe_id() implementation
to select which VPE to count.
If the counter has a range greater than a single VPE, i.e. it is a
core-wide counter, then ensure that the counter is set up to count
events from all TCs (though, since this is true by definition, is this
necessary? Just enabling a core-wide counter in the per-VPE case appears
experimentally to return the same counts. This is left in for now as the
logic was present before).
If the event is set up to count a particular CPU (i.e. system mode),
then the VPE ID of that CPU is used for the counter.
Otherwise, the event should be counted on the CPU scheduling this thread
(this was the critical bit missing from the previous implementation) so
the VPE ID of this CPU is used for the counter.

With this commit, the same test as before returns the counts expected:

taskset 4 dd if=/dev/zero of=/dev/null count=100000 & taskset 8 perf
stat -e instructions:u,branches:u ./test_prog

 Performance counter stats for './test_prog':

             30002      instructions:u
             10000      branches:u

Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19138/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/kernel/perf_event_mipsxx.c | 78 ++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 11d1b2268fdd..413863508f6f 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -323,7 +323,11 @@ static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc,
 
 static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
 {
+	struct perf_event *event = container_of(evt, struct perf_event, hw);
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+#ifdef CONFIG_MIPS_MT_SMP
+	unsigned int range = evt->event_base >> 24;
+#endif /* CONFIG_MIPS_MT_SMP */
 
 	WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
 
@@ -331,11 +335,37 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
 		(evt->config_base & M_PERFCTL_CONFIG_MASK) |
 		/* Make sure interrupt enabled. */
 		MIPS_PERFCTRL_IE;
-	if (IS_ENABLED(CONFIG_CPU_BMIPS5000))
+
+#ifdef CONFIG_CPU_BMIPS5000
+	{
 		/* enable the counter for the calling thread */
 		cpuc->saved_ctrl[idx] |=
 			(1 << (12 + vpe_id())) | BRCM_PERFCTRL_TC;
+	}
+#else
+#ifdef CONFIG_MIPS_MT_SMP
+	if (range > V) {
+		/* The counter is processor wide. Set it up to count all TCs. */
+		pr_debug("Enabling perf counter for all TCs\n");
+		cpuc->saved_ctrl[idx] |= M_TC_EN_ALL;
+	} else
+#endif /* CONFIG_MIPS_MT_SMP */
+	{
+		unsigned int cpu, ctrl;
 
+		/*
+		 * Set up the counter for a particular CPU when event->cpu is
+		 * a valid CPU number. Otherwise set up the counter for the CPU
+		 * scheduling this thread.
+		 */
+		cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id();
+
+		ctrl = M_PERFCTL_VPEID(cpu_vpe_id(&cpu_data[cpu]));
+		ctrl |= M_TC_EN_VPE;
+		cpuc->saved_ctrl[idx] |= ctrl;
+		pr_debug("Enabling perf counter for CPU%d\n", cpu);
+	}
+#endif /* CONFIG_CPU_BMIPS5000 */
 	/*
 	 * We do not actually let the counter run. Leave it until start().
 	 */
@@ -649,13 +679,14 @@ static unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev)
  * event_id.
  */
 #ifdef CONFIG_MIPS_MT_SMP
-	return ((unsigned int)pev->range << 24) |
-		(pev->cntr_mask & 0xffff00) |
-		(pev->event_id & 0xff);
-#else
-	return (pev->cntr_mask & 0xffff00) |
-		(pev->event_id & 0xff);
-#endif
+	if (num_possible_cpus() > 1)
+		return ((unsigned int)pev->range << 24) |
+			(pev->cntr_mask & 0xffff00) |
+			(pev->event_id & 0xff);
+	else
+#endif /* CONFIG_MIPS_MT_SMP */
+		return ((pev->cntr_mask & 0xffff00) |
+			(pev->event_id & 0xff));
 }
 
 static const struct mips_perf_event *mipspmu_map_general_event(int idx)
@@ -1259,33 +1290,6 @@ static const struct mips_perf_event xlp_cache_map
 },
 };
 
-#ifdef CONFIG_MIPS_MT_SMP
-static void check_and_calc_range(struct perf_event *event,
-				 const struct mips_perf_event *pev)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (event->cpu >= 0) {
-		if (pev->range > V) {
-			/*
-			 * The user selected an event that is processor
-			 * wide, while expecting it to be VPE wide.
-			 */
-			hwc->config_base |= M_TC_EN_ALL;
-		} else {
-			hwc->config_base |= M_PERFCTL_VPEID(vpe_id());
-			hwc->config_base |= M_TC_EN_VPE;
-		}
-	} else
-		hwc->config_base |= M_TC_EN_ALL;
-}
-#else
-static void check_and_calc_range(struct perf_event *event,
-				 const struct mips_perf_event *pev)
-{
-}
-#endif
-
 static int __hw_perf_event_init(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
@@ -1321,10 +1325,6 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	hwc->config_base = MIPS_PERFCTRL_IE;
 
-	/* Calculate range bits and validate it. */
-	if (num_possible_cpus() > 1)
-		check_and_calc_range(event, pev);
-
 	hwc->event_base = mipspmu_perf_event_encode(pev);
 	if (PERF_TYPE_RAW == event->attr.type)
 		mutex_unlock(&raw_event_mutex);
-- 
cgit v1.2.3


From a1f158262a3e00fe396f2d21ef1cffdfc29226dc Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Fri, 20 Apr 2018 15:33:21 +1000
Subject: KVM: PPC: Book3S HV: Add 'online' register to ONE_REG interface

This adds a new KVM_REG_PPC_ONLINE register which userspace can set
to 0 or 1 via the GET/SET_ONE_REG interface to indicate whether it
considers the VCPU to be offline (0), that is, not currently running,
or online (1).  This will be used in a later patch to configure the
register which controls PURR and SPURR accumulation.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_host.h | 2 ++
 arch/powerpc/include/uapi/asm/kvm.h | 1 +
 arch/powerpc/kvm/book3s_hv.c        | 6 ++++++
 3 files changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 17498e9a26e4..9703f8f229c9 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -772,6 +772,8 @@ struct kvm_vcpu_arch {
 	u64 busy_preempt;
 
 	u32 emul_inst;
+
+	u32 online;
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 833ed9a16adf..1b32b56a03d3 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -633,6 +633,7 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
 
 #define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+#define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9963f65c212b..04bd71796098 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1526,6 +1526,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, vcpu->arch.dec_expires +
 				   vcpu->arch.vcore->tb_offset);
 		break;
+	case KVM_REG_PPC_ONLINE:
+		*val = get_reg_val(id, vcpu->arch.online);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -1757,6 +1760,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		vcpu->arch.dec_expires = set_reg_val(id, *val) -
 			vcpu->arch.vcore->tb_offset;
 		break;
+	case KVM_REG_PPC_ONLINE:
+		vcpu->arch.online = set_reg_val(id, *val);
+		break;
 	default:
 		r = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 7aa15842c15f8a32000372ad2b3195029fde6fd4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Fri, 20 Apr 2018 19:53:22 +1000
Subject: KVM: PPC: Book3S HV: Set RWMR on POWER8 so PURR/SPURR count correctly

Although Linux doesn't use PURR and SPURR ((Scaled) Processor
Utilization of Resources Register), other OSes depend on them.
On POWER8 they count at a rate depending on whether the VCPU is
idle or running, the activity of the VCPU, and the value in the
RWMR (Region-Weighting Mode Register).  Hardware expects the
hypervisor to update the RWMR when a core is dispatched to reflect
the number of online VCPUs in the vcore.

This adds code to maintain a count in the vcore struct indicating
how many VCPUs are online.  In kvmppc_run_core we use that count
to set the RWMR register on POWER8.  If the core is split because
of a static or dynamic micro-threading mode, we use the value for
8 threads.  The RWMR value is not relevant when the host is
executing because Linux does not use the PURR or SPURR register,
so we don't bother saving and restoring the host value.

For the sake of old userspace which does not set the KVM_REG_PPC_ONLINE
register, we set online to 1 if it was 0 at the time of a KVM_RUN
ioctl.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/include/asm/reg.h        |  1 +
 arch/powerpc/kvm/book3s_hv.c          | 61 ++++++++++++++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index e7377b73cfec..c1f3a870c48a 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -104,6 +104,7 @@ struct kvmppc_vcore {
 	ulong vtb;		/* virtual timebase */
 	ulong conferring_threads;
 	unsigned int halt_poll_ns;
+	atomic_t online_count;
 };
 
 struct kvmppc_vcpu_book3s {
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index cb0f272ce123..44b2be4a65d1 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -365,6 +365,7 @@
 #define SPRN_PSSCR	0x357	/* Processor Stop Status and Control Register (ISA 3.0) */
 #define SPRN_PSSCR_PR	0x337	/* PSSCR ISA 3.0, privileged mode access */
 #define SPRN_PMCR	0x374	/* Power Management Control Register */
+#define SPRN_RWMR	0x375	/* Region-Weighting Mode Register */
 
 /* HFSCR and FSCR bit numbers are the same */
 #define FSCR_SCV_LG	12	/* Enable System Call Vectored */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 04bd71796098..f61dd9efa6fb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -123,6 +123,32 @@ static bool no_mixing_hpt_and_radix;
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+/*
+ * RWMR values for POWER8.  These control the rate at which PURR
+ * and SPURR count and should be set according to the number of
+ * online threads in the vcore being run.
+ */
+#define RWMR_RPA_P8_1THREAD	0x164520C62609AECA
+#define RWMR_RPA_P8_2THREAD	0x7FFF2908450D8DA9
+#define RWMR_RPA_P8_3THREAD	0x164520C62609AECA
+#define RWMR_RPA_P8_4THREAD	0x199A421245058DA9
+#define RWMR_RPA_P8_5THREAD	0x164520C62609AECA
+#define RWMR_RPA_P8_6THREAD	0x164520C62609AECA
+#define RWMR_RPA_P8_7THREAD	0x164520C62609AECA
+#define RWMR_RPA_P8_8THREAD	0x164520C62609AECA
+
+static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
+	RWMR_RPA_P8_1THREAD,
+	RWMR_RPA_P8_1THREAD,
+	RWMR_RPA_P8_2THREAD,
+	RWMR_RPA_P8_3THREAD,
+	RWMR_RPA_P8_4THREAD,
+	RWMR_RPA_P8_5THREAD,
+	RWMR_RPA_P8_6THREAD,
+	RWMR_RPA_P8_7THREAD,
+	RWMR_RPA_P8_8THREAD,
+};
+
 static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
 		int *ip)
 {
@@ -1761,7 +1787,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 			vcpu->arch.vcore->tb_offset;
 		break;
 	case KVM_REG_PPC_ONLINE:
-		vcpu->arch.online = set_reg_val(id, *val);
+		i = set_reg_val(id, *val);
+		if (i && !vcpu->arch.online)
+			atomic_inc(&vcpu->arch.vcore->online_count);
+		else if (!i && vcpu->arch.online)
+			atomic_dec(&vcpu->arch.vcore->online_count);
+		vcpu->arch.online = i;
 		break;
 	default:
 		r = -EINVAL;
@@ -2856,6 +2887,25 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		}
 	}
 
+	/*
+	 * On POWER8, set RWMR register.
+	 * Since it only affects PURR and SPURR, it doesn't affect
+	 * the host, so we don't save/restore the host value.
+	 */
+	if (is_power8) {
+		unsigned long rwmr_val = RWMR_RPA_P8_8THREAD;
+		int n_online = atomic_read(&vc->online_count);
+
+		/*
+		 * Use the 8-thread value if we're doing split-core
+		 * or if the vcore's online count looks bogus.
+		 */
+		if (split == 1 && threads_per_subcore == MAX_SMT_THREADS &&
+		    n_online >= 1 && n_online <= MAX_SMT_THREADS)
+			rwmr_val = p8_rwmr_values[n_online];
+		mtspr(SPRN_RWMR, rwmr_val);
+	}
+
 	/* Start all the threads */
 	active = 0;
 	for (sub = 0; sub < core_info.n_subcores; ++sub) {
@@ -3358,6 +3408,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 #endif
 
+	/*
+	 * Force online to 1 for the sake of old userspace which doesn't
+	 * set it.
+	 */
+	if (!vcpu->arch.online) {
+		atomic_inc(&vcpu->arch.vcore->online_count);
+		vcpu->arch.online = 1;
+	}
+
 	kvmppc_core_prepare_to_enter(vcpu);
 
 	/* No need to go into the guest when all we'll do is come back out */
-- 
cgit v1.2.3


From 48e70b1ce667dc032f9166cc00ddb594ecc0065e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 19 Apr 2018 11:49:51 +1000
Subject: KVM: PPC: Book3S HV: Fix inaccurate comment

We now have interrupts hard-disabled when coming back from
kvmppc_hv_entry_trampoline, so this changes the comment to reflect
that.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_interrupts.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0e8493033288..82f2ff9410b6 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -137,7 +137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 /*
  * We return here in virtual mode after the guest exits
  * with something that we can't handle in real mode.
- * Interrupts are enabled again at this point.
+ * Interrupts are still hard-disabled.
  */
 
 	/*
-- 
cgit v1.2.3


From c6b61661d229e42b58d5e511191e925d105a5cce Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Mon, 14 May 2018 20:00:27 +1000
Subject: KVM: PPC: Book3S: Use correct page shift in H_STUFF_TCE

The other TCE handlers use page shift from the guest visible TCE table
(described by kvmppc_spapr_tce_iommu_table) so let's make H_STUFF_TCE
handlers do the same thing.

This should cause no behavioral change now but soon we will allow
the iommu_table::it_page_shift being different from from the emulated
table page size so this will play a role.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_vio.c    | 2 +-
 arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 4dffa611376d..041e54d26750 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -615,7 +615,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
 		return H_PARAMETER;
 
 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
-		unsigned long entry = ioba >> stit->tbl->it_page_shift;
+		unsigned long entry = ioba >> stt->page_shift;
 
 		for (i = 0; i < npages; ++i) {
 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 6651f736a0b1..e220fabb2f5d 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -526,7 +526,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 		return H_PARAMETER;
 
 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
-		unsigned long entry = ioba >> stit->tbl->it_page_shift;
+		unsigned long entry = ioba >> stt->page_shift;
 
 		for (i = 0; i < npages; ++i) {
 			ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
-- 
cgit v1.2.3


From ca1fc489cfa06a554fd71eb46d8927614ec7e6f3 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Mon, 14 May 2018 20:00:28 +1000
Subject: KVM: PPC: Book3S: Allow backing bigger guest IOMMU pages with smaller
 physical pages

At the moment we only support in the host the IOMMU page sizes which
the guest is aware of, which is 4KB/64KB/16MB. However P9 does not support
16MB IOMMU pages, 2MB and 1GB pages are supported instead. We can still
emulate bigger guest pages (for example 16MB) with smaller host pages
(4KB/64KB/2MB).

This allows the physical IOMMU pages to use a page size smaller or equal
than the guest visible IOMMU page size.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_vio.c    | 64 +++++++++++++++++++++++++++++--------
 arch/powerpc/kvm/book3s_64_vio_hv.c | 50 +++++++++++++++++++++++++----
 2 files changed, 94 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 041e54d26750..984f1978a19c 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -176,14 +176,12 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 
 		if (!tbltmp)
 			continue;
-		/*
-		 * Make sure hardware table parameters are exactly the same;
-		 * this is used in the TCE handlers where boundary checks
-		 * use only the first attached table.
-		 */
-		if ((tbltmp->it_page_shift == stt->page_shift) &&
-				(tbltmp->it_offset == stt->offset) &&
-				(tbltmp->it_size == stt->size)) {
+		/* Make sure hardware table parameters are compatible */
+		if ((tbltmp->it_page_shift <= stt->page_shift) &&
+				(tbltmp->it_offset << tbltmp->it_page_shift ==
+				 stt->offset << stt->page_shift) &&
+				(tbltmp->it_size << tbltmp->it_page_shift ==
+				 stt->size << stt->page_shift)) {
 			/*
 			 * Reference the table to avoid races with
 			 * add/remove DMA windows.
@@ -396,7 +394,7 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
 	return H_SUCCESS;
 }
 
-static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
 		struct iommu_table *tbl, unsigned long entry)
 {
 	enum dma_data_direction dir = DMA_NONE;
@@ -416,7 +414,24 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
 	return ret;
 }
 
-long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
+		unsigned long entry)
+{
+	unsigned long i, ret = H_SUCCESS;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry * subpages;
+
+	for (i = 0; i < subpages; ++i) {
+		ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
+		if (ret != H_SUCCESS)
+			break;
+	}
+
+	return ret;
+}
+
+long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 		unsigned long entry, unsigned long ua,
 		enum dma_data_direction dir)
 {
@@ -453,6 +468,27 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
 	return 0;
 }
 
+static long kvmppc_tce_iommu_map(struct kvm *kvm,
+		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
+		unsigned long entry, unsigned long ua,
+		enum dma_data_direction dir)
+{
+	unsigned long i, pgoff, ret = H_SUCCESS;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry * subpages;
+
+	for (i = 0, pgoff = 0; i < subpages;
+			++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
+
+		ret = kvmppc_tce_iommu_do_map(kvm, tbl,
+				io_entry + i, ua + pgoff, dir);
+		if (ret != H_SUCCESS)
+			break;
+	}
+
+	return ret;
+}
+
 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 		      unsigned long ioba, unsigned long tce)
 {
@@ -491,10 +527,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 
 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
 		if (dir == DMA_NONE)
-			ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
 					stit->tbl, entry);
 		else
-			ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl,
+			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
 					entry, ua, dir);
 
 		if (ret == H_SUCCESS)
@@ -570,7 +606,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 			return H_PARAMETER;
 
 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
-			ret = kvmppc_tce_iommu_map(vcpu->kvm,
+			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
 					stit->tbl, entry + i, ua,
 					iommu_tce_direction(tce));
 
@@ -618,7 +654,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
 		unsigned long entry = ioba >> stt->page_shift;
 
 		for (i = 0; i < npages; ++i) {
-			ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
 					stit->tbl, entry + i);
 
 			if (ret == H_SUCCESS)
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index e220fabb2f5d..635f3ca8129a 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -221,7 +221,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
 	return H_SUCCESS;
 }
 
-static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
+static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
 		struct iommu_table *tbl, unsigned long entry)
 {
 	enum dma_data_direction dir = DMA_NONE;
@@ -245,7 +245,24 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
 	return ret;
 }
 
-static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
+		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
+		unsigned long entry)
+{
+	unsigned long i, ret = H_SUCCESS;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry * subpages;
+
+	for (i = 0; i < subpages; ++i) {
+		ret = kvmppc_rm_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
+		if (ret != H_SUCCESS)
+			break;
+	}
+
+	return ret;
+}
+
+static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
 		unsigned long entry, unsigned long ua,
 		enum dma_data_direction dir)
 {
@@ -290,6 +307,27 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
 	return 0;
 }
 
+static long kvmppc_rm_tce_iommu_map(struct kvm *kvm,
+		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
+		unsigned long entry, unsigned long ua,
+		enum dma_data_direction dir)
+{
+	unsigned long i, pgoff, ret = H_SUCCESS;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry * subpages;
+
+	for (i = 0, pgoff = 0; i < subpages;
+			++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
+
+		ret = kvmppc_rm_tce_iommu_do_map(kvm, tbl,
+				io_entry + i, ua + pgoff, dir);
+		if (ret != H_SUCCESS)
+			break;
+	}
+
+	return ret;
+}
+
 long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 		unsigned long ioba, unsigned long tce)
 {
@@ -327,10 +365,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 
 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
 		if (dir == DMA_NONE)
-			ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+			ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, stt,
 					stit->tbl, entry);
 		else
-			ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+			ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
 					stit->tbl, entry, ua, dir);
 
 		if (ret == H_SUCCESS)
@@ -477,7 +515,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 			return H_PARAMETER;
 
 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
-			ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+			ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
 					stit->tbl, entry + i, ua,
 					iommu_tce_direction(tce));
 
@@ -529,7 +567,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 		unsigned long entry = ioba >> stt->page_shift;
 
 		for (i = 0; i < npages; ++i) {
-			ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+			ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, stt,
 					stit->tbl, entry + i);
 
 			if (ret == H_SUCCESS)
-- 
cgit v1.2.3


From e45719af1caff16dbc0f6bf7bbfbc5e7a54738a5 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Mon, 14 May 2018 20:00:29 +1000
Subject: KVM: PPC: Book3S: Check KVM_CREATE_SPAPR_TCE_64 parameters

Although it does not seem possible to break the host by passing bad
parameters when creating a TCE table in KVM, it is still better to get
an early clear indication of that than debugging weird effect this might
bring.

This adds some sanity checks that the page size is 4KB..16GB as this is
what the actual LoPAPR supports and that the window actually fits 64bit
space.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_vio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 984f1978a19c..80ead383d8ee 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -300,7 +300,8 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	int ret = -ENOMEM;
 	int i;
 
-	if (!args->size)
+	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
+		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
 		return -EINVAL;
 
 	size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
-- 
cgit v1.2.3


From 16d5c39d54031afe8a5663b4638030d9fc38ba19 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Thu, 10 May 2018 23:57:19 +0530
Subject: KVM: PPC: Book3S: Change return type to vm_fault_t

Use new return type vm_fault_t for fault handler
in struct vm_operations_struct. For now, this is
just documenting that the function returns a
VM_FAULT value rather than an errno.  Once all
instances are converted, vm_fault_t will become
a distinct type.

commit 1c8f422059ae ("mm: change return type to
vm_fault_t")

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_vio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 80ead383d8ee..d066e37551ec 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -235,7 +235,7 @@ static void release_spapr_tce_table(struct rcu_head *head)
 	kfree(stt);
 }
 
-static int kvm_spapr_tce_fault(struct vm_fault *vmf)
+static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
 {
 	struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
 	struct page *page;
-- 
cgit v1.2.3


From 55531b7431db789766ac952391e95c170db48581 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.vnet.ibm.com>
Date: Thu, 15 Feb 2018 16:33:47 +0100
Subject: KVM: s390: Add storage key facility interpretation control

Up to now we always expected to have the storage key facility
available for our (non-VSIE) KVM guests. For huge page support, we
need to be able to disable it, so let's introduce that now.

We add the use_skf variable to manage KVM storage key facility
usage. Also we rename use_skey in the mm context struct to uses_skeys
to make it more clear that it is an indication that the vm actively
uses storage keys.

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Reviewed-by: Farhan Ali <alifm@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h    |  1 +
 arch/s390/include/asm/mmu.h         |  2 +-
 arch/s390/include/asm/mmu_context.h |  2 +-
 arch/s390/include/asm/pgtable.h     |  4 ++--
 arch/s390/kvm/kvm-s390.c            |  3 ++-
 arch/s390/kvm/priv.c                | 28 ++++++++++++++++------------
 arch/s390/mm/gmap.c                 |  6 +++---
 arch/s390/mm/pgtable.c              |  4 ++--
 8 files changed, 28 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 81cdb6b55118..a2188e309bd6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -812,6 +812,7 @@ struct kvm_arch{
 	int use_irqchip;
 	int use_cmma;
 	int use_pfmfi;
+	int use_skf;
 	int user_cpu_state_ctrl;
 	int user_sigp;
 	int user_stsi;
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index c639c95850e4..f5ff9dbad8ac 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -21,7 +21,7 @@ typedef struct {
 	/* The mmu context uses extended page tables. */
 	unsigned int has_pgste:1;
 	/* The mmu context uses storage keys. */
-	unsigned int use_skey:1;
+	unsigned int uses_skeys:1;
 	/* The mmu context uses CMM. */
 	unsigned int uses_cmm:1;
 } mm_context_t;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 324f6f452982..d16bc79c30bb 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -30,7 +30,7 @@ static inline int init_new_context(struct task_struct *tsk,
 		test_thread_flag(TIF_PGSTE) ||
 		(current->mm && current->mm->context.alloc_pgste);
 	mm->context.has_pgste = 0;
-	mm->context.use_skey = 0;
+	mm->context.uses_skeys = 0;
 	mm->context.uses_cmm = 0;
 #endif
 	switch (mm->context.asce_limit) {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 2d24d33bf188..c9f155b67660 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -507,10 +507,10 @@ static inline int mm_alloc_pgste(struct mm_struct *mm)
  * faults should no longer be backed by zero pages
  */
 #define mm_forbids_zeropage mm_has_pgste
-static inline int mm_use_skey(struct mm_struct *mm)
+static inline int mm_uses_skeys(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
-	if (mm->context.use_skey)
+	if (mm->context.uses_skeys)
 		return 1;
 #endif
 	return 0;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 64c986243018..007db8faafa5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1493,7 +1493,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 		return -EINVAL;
 
 	/* Is this guest using storage keys? */
-	if (!mm_use_skey(current->mm))
+	if (!mm_uses_skeys(current->mm))
 		return KVM_S390_GET_SKEYS_NONE;
 
 	/* Enforce sane limit on memory allocation */
@@ -2066,6 +2066,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.css_support = 0;
 	kvm->arch.use_irqchip = 0;
 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
+	kvm->arch.use_skf = sclp.has_skey;
 	kvm->arch.epoch = 0;
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ebfa0442e569..e8c62703c764 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -205,24 +205,28 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 
 int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
 {
-	int rc = 0;
+	int rc;
 	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
 
 	trace_kvm_s390_skey_related_inst(vcpu);
-	if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
+	/* Already enabled? */
+	if (vcpu->kvm->arch.use_skf &&
+	    !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
 	    !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
-		return rc;
+		return 0;
 
 	rc = s390_enable_skey();
 	VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
-	if (!rc) {
-		if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
-			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
-		else
-			sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
-					     ICTL_RRBE);
-	}
-	return rc;
+	if (rc)
+		return rc;
+
+	if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
+	if (!vcpu->kvm->arch.use_skf)
+		sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+	else
+		sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+	return 0;
 }
 
 static int try_handle_skey(struct kvm_vcpu *vcpu)
@@ -232,7 +236,7 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
 	rc = kvm_s390_skey_check_enable(vcpu);
 	if (rc)
 		return rc;
-	if (sclp.has_skey) {
+	if (vcpu->kvm->arch.use_skf) {
 		/* with storage-key facility, SIE interprets it for us */
 		kvm_s390_retry_instr(vcpu);
 		VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 2c55a2b9d6c6..bc56ec8abcf7 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2184,14 +2184,14 @@ int s390_enable_skey(void)
 	int rc = 0;
 
 	down_write(&mm->mmap_sem);
-	if (mm_use_skey(mm))
+	if (mm_uses_skeys(mm))
 		goto out_up;
 
-	mm->context.use_skey = 1;
+	mm->context.uses_skeys = 1;
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
 				MADV_UNMERGEABLE, &vma->vm_flags)) {
-			mm->context.use_skey = 0;
+			mm->context.uses_skeys = 0;
 			rc = -ENOMEM;
 			goto out_up;
 		}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4f2b65d01a70..301e466e4263 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -158,7 +158,7 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
 #ifdef CONFIG_PGSTE
 	unsigned long address, bits, skey;
 
-	if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
+	if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
 		return pgste;
 	address = pte_val(pte) & PAGE_MASK;
 	skey = (unsigned long) page_get_storage_key(address);
@@ -180,7 +180,7 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
 	unsigned long address;
 	unsigned long nkey;
 
-	if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
+	if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
 		return;
 	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
 	address = pte_val(entry) & PAGE_MASK;
-- 
cgit v1.2.3


From 20c922f04b17aa51a75e514eca8fcbfa337a002d Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
Date: Sun, 22 Apr 2018 11:37:03 -0400
Subject: KVM: s390: reset crypto attributes for all vcpus

Introduces a new function to reset the crypto attributes for all
vcpus whether they are running or not. Each vcpu in KVM will
be removed from SIE prior to resetting the crypto attributes in its
SIE state description. After all vcpus have had their crypto attributes
reset the vcpus will be restored to SIE.

This function is incorporated into the kvm_s390_vm_set_crypto(kvm)
function to fix a reported issue whereby the crypto key wrapping
attributes could potentially get out of synch for running vcpus.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reported-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 17 ++++++++++++-----
 arch/s390/kvm/kvm-s390.h | 13 +++++++++++++
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 007db8faafa5..d9799946722e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -791,11 +791,21 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 
 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 
-static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
+void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 {
 	struct kvm_vcpu *vcpu;
 	int i;
 
+	kvm_s390_vcpu_block_all(kvm);
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_s390_vcpu_crypto_setup(vcpu);
+
+	kvm_s390_vcpu_unblock_all(kvm);
+}
+
+static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
+{
 	if (!test_kvm_facility(kvm, 76))
 		return -EINVAL;
 
@@ -832,10 +842,7 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 		return -ENXIO;
 	}
 
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		kvm_s390_vcpu_crypto_setup(vcpu);
-		exit_sie(vcpu);
-	}
+	kvm_s390_vcpu_crypto_reset_all(kvm);
 	mutex_unlock(&kvm->lock);
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 1b5621f4fe5b..981e3ba97461 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -410,4 +410,17 @@ static inline int kvm_s390_use_sca_entries(void)
 }
 void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
 				     struct mcck_volatile_info *mcck_info);
+
+/**
+ * kvm_s390_vcpu_crypto_reset_all
+ *
+ * Reset the crypto attributes for each vcpu. This can be done while the vcpus
+ * are running as each vcpu will be removed from SIE before resetting the crypt
+ * attributes and restored to SIE afterward.
+ *
+ * Note: The kvm->lock must be held while calling this function
+ *
+ * @kvm: the KVM guest
+ */
+void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
 #endif
-- 
cgit v1.2.3


From b9224cd7381aea7380e230d7488d8672143600e4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 30 Apr 2018 17:55:24 +0200
Subject: KVM: s390: introduce defines for control registers

In KVM code we use masks to test/set control registers.

Let's define the ones we use in arch/s390/include/asm/ctl_reg.h and
replace all occurrences in KVM code.

As we will be needing the define for Clock-comparator sign control soon,
let's also add it.

Suggested-by: Collin L. Walling <walling@linux.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Collin Walling <walling@linux.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/ctl_reg.h | 12 ++++++++++++
 arch/s390/kvm/guestdbg.c        |  2 +-
 arch/s390/kvm/interrupt.c       | 20 ++++++++++----------
 arch/s390/kvm/kvm-s390.c        | 10 +++++++---
 4 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 99c93d0346f9..4600453536c2 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -10,8 +10,20 @@
 
 #include <linux/const.h>
 
+#define CR0_CLOCK_COMPARATOR_SIGN	_BITUL(63 - 10)
+#define CR0_EMERGENCY_SIGNAL_SUBMASK	_BITUL(63 - 49)
+#define CR0_EXTERNAL_CALL_SUBMASK	_BITUL(63 - 50)
+#define CR0_CLOCK_COMPARATOR_SUBMASK	_BITUL(63 - 52)
+#define CR0_CPU_TIMER_SUBMASK		_BITUL(63 - 53)
+#define CR0_SERVICE_SIGNAL_SUBMASK	_BITUL(63 - 54)
+#define CR0_UNUSED_56			_BITUL(63 - 56)
+#define CR0_INTERRUPT_KEY_SUBMASK	_BITUL(63 - 57)
+#define CR0_MEASUREMENT_ALERT_SUBMASK	_BITUL(63 - 58)
+
 #define CR2_GUARDED_STORAGE		_BITUL(63 - 59)
 
+#define CR14_UNUSED_32			_BITUL(63 - 32)
+#define CR14_UNUSED_33			_BITUL(63 - 33)
 #define CR14_CHANNEL_REPORT_SUBMASK	_BITUL(63 - 35)
 #define CR14_RECOVERY_SUBMASK		_BITUL(63 - 36)
 #define CR14_DEGRADATION_SUBMASK	_BITUL(63 - 37)
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index b5f3e82006d0..394a5f53805b 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -153,7 +153,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
 
 	if (guestdbg_sstep_enabled(vcpu)) {
 		/* disable timer (clock-comparator) interrupts */
-		vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
+		vcpu->arch.sie_block->gcr[0] &= ~CR0_CLOCK_COMPARATOR_SUBMASK;
 		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
 		vcpu->arch.sie_block->gcr[10] = 0;
 		vcpu->arch.sie_block->gcr[11] = -1UL;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 37d06e022238..daa09f89ca2d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -159,7 +159,7 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
 static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 {
 	if (psw_extint_disabled(vcpu) ||
-	    !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+	    !(vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SUBMASK))
 		return 0;
 	if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
 		/* No timer interrupts when single stepping */
@@ -172,7 +172,7 @@ static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 	const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
 	const u64 ckc = vcpu->arch.sie_block->ckc;
 
-	if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
+	if (vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SIGN) {
 		if ((s64)ckc >= (s64)now)
 			return 0;
 	} else if (ckc >= now) {
@@ -184,7 +184,7 @@ static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
 {
 	return !psw_extint_disabled(vcpu) &&
-	       (vcpu->arch.sie_block->gcr[0] & 0x400ul);
+	       (vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK);
 }
 
 static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
@@ -285,15 +285,15 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 		active_mask &= ~IRQ_PEND_IO_MASK;
 	else
 		active_mask = disable_iscs(vcpu, active_mask);
-	if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_EXTERNAL_CALL_SUBMASK))
 		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
-	if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_EMERGENCY_SIGNAL_SUBMASK))
 		__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
-	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SUBMASK))
 		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
-	if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
 		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
-	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
 		__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
 	if (psw_mchk_disabled(vcpu))
 		active_mask &= ~IRQ_PEND_MCHK_MASK;
@@ -1042,7 +1042,7 @@ int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 	/* external call pending and deliverable */
 	if (kvm_s390_ext_call_pending(vcpu) &&
 	    !psw_extint_disabled(vcpu) &&
-	    (vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+	    (vcpu->arch.sie_block->gcr[0] & CR0_EXTERNAL_CALL_SUBMASK))
 		return 1;
 
 	if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
@@ -1062,7 +1062,7 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
 	u64 cputm, sltime = 0;
 
 	if (ckc_interrupts_enabled(vcpu)) {
-		if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
+		if (vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SIGN) {
 			if ((s64)now < (s64)ckc)
 				sltime = tod_to_ns((s64)ckc - (s64)now);
 		} else if (now < ckc) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d9799946722e..60bb3b7243d9 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2441,8 +2441,12 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->ckc       = 0UL;
 	vcpu->arch.sie_block->todpr     = 0;
 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
-	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
-	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
+	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
+					CR0_INTERRUPT_KEY_SUBMASK |
+					CR0_MEASUREMENT_ALERT_SUBMASK;
+	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
+					CR14_UNUSED_33 |
+					CR14_EXTERNAL_DAMAGE_SUBMASK;
 	/* make sure the new fpc will be lazily loaded */
 	save_fpu_regs();
 	current->thread.fpu.fpc = 0;
@@ -3200,7 +3204,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
 		return 0;
 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
 		return 0;
-	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
 		return 0;
 	if (!vcpu->arch.gmap->pfault_enabled)
 		return 0;
-- 
cgit v1.2.3


From 9ac96d759fa2de2386a4fccab80880f99d1161d2 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 27 Apr 2018 14:36:12 +0200
Subject: KVM: s390: no need to inititalize kvm->arch members to 0

KVM is allocated with kzalloc(), so these members are already 0.

Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Collin Walling <walling@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 60bb3b7243d9..fd7ce3ab45eb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1989,10 +1989,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	rc = -ENOMEM;
 
-	kvm->arch.use_esca = 0; /* start with basic SCA */
 	if (!sclp.has_64bscao)
 		alloc_flags |= GFP_DMA;
 	rwlock_init(&kvm->arch.sca_lock);
+	/* start with basic SCA */
 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
 	if (!kvm->arch.sca)
 		goto out_err;
@@ -2043,8 +2043,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_s390_crypto_init(kvm);
 
 	mutex_init(&kvm->arch.float_int.ais_lock);
-	kvm->arch.float_int.simm = 0;
-	kvm->arch.float_int.nimm = 0;
 	spin_lock_init(&kvm->arch.float_int.lock);
 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
@@ -2070,12 +2068,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		kvm->arch.gmap->pfault_enabled = 0;
 	}
 
-	kvm->arch.css_support = 0;
-	kvm->arch.use_irqchip = 0;
 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
 	kvm->arch.use_skf = sclp.has_skey;
-	kvm->arch.epoch = 0;
-
 	spin_lock_init(&kvm->arch.start_stop_lock);
 	kvm_s390_vsie_init(kvm);
 	kvm_s390_gisa_init(kvm);
-- 
cgit v1.2.3


From 33d1b2729e409e8327dec2d13a9144dfa76a947c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 27 Apr 2018 14:36:13 +0200
Subject: KVM: s390: generalize kvm_s390_get_tod_clock_ext()

Move the Multiple-epoch facility handling into it and rename it to
kvm_s390_get_tod_clock().

This leaves us with:
- kvm_s390_set_tod_clock()
- kvm_s390_get_tod_clock()
- kvm_s390_get_tod_clock_fast()

So all Multiple-epoch facility is hidden in these functions.

Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Collin Walling <walling@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fd7ce3ab45eb..e521f7699032 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1040,8 +1040,8 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 	return ret;
 }
 
-static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
-					struct kvm_s390_vm_tod_clock *gtod)
+static void kvm_s390_get_tod_clock(struct kvm *kvm,
+				   struct kvm_s390_vm_tod_clock *gtod)
 {
 	struct kvm_s390_tod_clock_ext htod;
 
@@ -1050,10 +1050,12 @@ static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
 	get_tod_clock_ext((char *)&htod);
 
 	gtod->tod = htod.tod + kvm->arch.epoch;
-	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
-
-	if (gtod->tod < htod.tod)
-		gtod->epoch_idx += 1;
+	gtod->epoch_idx = 0;
+	if (test_kvm_facility(kvm, 139)) {
+		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
+		if (gtod->tod < htod.tod)
+			gtod->epoch_idx += 1;
+	}
 
 	preempt_enable();
 }
@@ -1063,12 +1065,7 @@ static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 	struct kvm_s390_vm_tod_clock gtod;
 
 	memset(&gtod, 0, sizeof(gtod));
-
-	if (test_kvm_facility(kvm, 139))
-		kvm_s390_get_tod_clock_ext(kvm, &gtod);
-	else
-		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
-
+	kvm_s390_get_tod_clock(kvm, &gtod);
 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 		return -EFAULT;
 
-- 
cgit v1.2.3


From 2c8180e885c1b2844a24dcaf4a675972b8ce8edc Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 9 May 2018 16:12:18 +0200
Subject: KVM: s390: vsie: simplify < 8k address checks

This makes it certainly more readable.

Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/vsie.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 969882b54266..84c89cb9636f 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -557,7 +557,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
 		gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
 	if (gpa) {
-		if (!(gpa & ~0x1fffUL))
+		if (gpa < 2 * PAGE_SIZE)
 			rc = set_validity_icpt(scb_s, 0x0038U);
 		else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
 			rc = set_validity_icpt(scb_s, 0x0011U);
@@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
 	gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
 	if (gpa && (scb_s->ecb & ECB_TE)) {
-		if (!(gpa & ~0x1fffUL)) {
+		if (gpa < 2 * PAGE_SIZE) {
 			rc = set_validity_icpt(scb_s, 0x0080U);
 			goto unpin;
 		}
@@ -594,7 +594,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
 	gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
 	if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
-		if (!(gpa & ~0x1fffUL)) {
+		if (gpa < 2 * PAGE_SIZE) {
 			rc = set_validity_icpt(scb_s, 0x1310U);
 			goto unpin;
 		}
@@ -613,7 +613,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
 	gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
 	if (gpa && (scb_s->ecb3 & ECB3_RI)) {
-		if (!(gpa & ~0x1fffUL)) {
+		if (gpa < 2 * PAGE_SIZE) {
 			rc = set_validity_icpt(scb_s, 0x0043U);
 			goto unpin;
 		}
@@ -632,7 +632,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
 		gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
 		sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
-		if (!gpa || !(gpa & ~0x1fffUL)) {
+		if (!gpa || gpa < 2 * PAGE_SIZE) {
 			rc = set_validity_icpt(scb_s, 0x10b0U);
 			goto unpin;
 		}
-- 
cgit v1.2.3


From 1e9d42194e4c8f0ba3f9d4f72b5f54050ddf7a39 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 19 Apr 2018 22:00:07 +0200
Subject: i2c: gpio: move header to platform_data

This header only contains platform_data. Move it to the proper directory.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: James Hogan <jhogan@kernel.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/arm/mach-ks8695/board-acs5k.c    | 2 +-
 arch/arm/mach-omap1/board-htcherald.c | 2 +-
 arch/arm/mach-pxa/palmz72.c           | 2 +-
 arch/arm/mach-pxa/viper.c             | 2 +-
 arch/arm/mach-sa1100/simpad.c         | 2 +-
 arch/mips/alchemy/board-gpr.c         | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-ks8695/board-acs5k.c b/arch/arm/mach-ks8695/board-acs5k.c
index 937eb1d47e7b..ef835d82cdb9 100644
--- a/arch/arm/mach-ks8695/board-acs5k.c
+++ b/arch/arm/mach-ks8695/board-acs5k.c
@@ -19,7 +19,7 @@
 #include <linux/gpio/machine.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
-#include <linux/i2c-gpio.h>
+#include <linux/platform_data/i2c-gpio.h>
 #include <linux/platform_data/pca953x.h>
 
 #include <linux/mtd/mtd.h>
diff --git a/arch/arm/mach-omap1/board-htcherald.c b/arch/arm/mach-omap1/board-htcherald.c
index 67d46690a56e..da8f3fc3180f 100644
--- a/arch/arm/mach-omap1/board-htcherald.c
+++ b/arch/arm/mach-omap1/board-htcherald.c
@@ -31,7 +31,7 @@
 #include <linux/gpio.h>
 #include <linux/gpio_keys.h>
 #include <linux/i2c.h>
-#include <linux/i2c-gpio.h>
+#include <linux/platform_data/i2c-gpio.h>
 #include <linux/htcpld.h>
 #include <linux/leds.h>
 #include <linux/spi/spi.h>
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 5877e547cecd..c053c8ce1586 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -30,7 +30,7 @@
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
 #include <linux/usb/gpio_vbus.h>
-#include <linux/i2c-gpio.h>
+#include <linux/platform_data/i2c-gpio.h>
 #include <linux/gpio/machine.h>
 
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index 90d0f277de55..39e05b7008d8 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -35,7 +35,7 @@
 #include <linux/sched.h>
 #include <linux/gpio.h>
 #include <linux/jiffies.h>
-#include <linux/i2c-gpio.h>
+#include <linux/platform_data/i2c-gpio.h>
 #include <linux/gpio/machine.h>
 #include <linux/platform_data/i2c-pxa.h>
 #include <linux/serial_8250.h>
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index ace010479eb6..49a61e6f3c5f 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -37,7 +37,7 @@
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
 #include <linux/leds.h>
-#include <linux/i2c-gpio.h>
+#include <linux/platform_data/i2c-gpio.h>
 
 #include "generic.h"
 
diff --git a/arch/mips/alchemy/board-gpr.c b/arch/mips/alchemy/board-gpr.c
index 4e79dbd54a33..fa75d75b5ba9 100644
--- a/arch/mips/alchemy/board-gpr.c
+++ b/arch/mips/alchemy/board-gpr.c
@@ -29,7 +29,7 @@
 #include <linux/leds.h>
 #include <linux/gpio.h>
 #include <linux/i2c.h>
-#include <linux/i2c-gpio.h>
+#include <linux/platform_data/i2c-gpio.h>
 #include <linux/gpio/machine.h>
 #include <asm/bootinfo.h>
 #include <asm/idle.h>
-- 
cgit v1.2.3


From 79fc540fd543f47e77e1c7d407f2c082872a4625 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 19 Apr 2018 22:00:10 +0200
Subject: i2c: omap: move header to platform_data

This header only contains platform_data. Move it to the proper directory.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/common.h               | 2 +-
 arch/arm/mach-omap1/i2c.c                  | 2 +-
 arch/arm/mach-omap2/common.h               | 2 +-
 arch/arm/mach-omap2/omap_hwmod_2420_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_2430_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_33xx_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_54xx_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_7xx_data.c  | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h
index d83ff257eaa8..c6537d2c2859 100644
--- a/arch/arm/mach-omap1/common.h
+++ b/arch/arm/mach-omap1/common.h
@@ -27,7 +27,7 @@
 #define __ARCH_ARM_MACH_OMAP1_COMMON_H
 
 #include <linux/mtd/mtd.h>
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 #include <linux/reboot.h>
 
 #include <asm/exception.h>
diff --git a/arch/arm/mach-omap1/i2c.c b/arch/arm/mach-omap1/i2c.c
index 5bdf3c4190f9..9250f263ac51 100644
--- a/arch/arm/mach-omap1/i2c.c
+++ b/arch/arm/mach-omap1/i2c.c
@@ -20,7 +20,7 @@
  */
 
 #include <linux/i2c.h>
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 #include <mach/mux.h>
 #include "soc.h"
 
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index fbe0b78bf489..ed1a7e2f176a 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -30,7 +30,7 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/mfd/twl.h>
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 #include <linux/reboot.h>
 #include <linux/irqchip/irq-omap-intc.h>
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_2420_data.c b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
index fe66cf247874..d684fac8f592 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2420_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
@@ -13,7 +13,7 @@
  * XXX these should be marked initdata for multi-OMAP kernels
  */
 
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 #include <linux/omap-dma.h>
 
 #include "omap_hwmod.h"
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index 74eefd30518c..abef9f6f9bf5 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -13,7 +13,7 @@
  * XXX these should be marked initdata for multi-OMAP kernels
  */
 
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 #include <linux/platform_data/hsmmc-omap.h>
 #include <linux/omap-dma.h>
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
index 53e1ac3724f2..c9483bc06228 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
@@ -14,7 +14,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 
 #include "omap_hwmod.h"
 #include "omap_hwmod_common_data.h"
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 23336b6c7125..9c0953de24da 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -15,7 +15,7 @@
  * XXX these should be marked initdata for multi-OMAP kernels
  */
 
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 #include <linux/power/smartreflex.h>
 #include <linux/platform_data/hsmmc-omap.h>
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index e4f8ae9cd637..9e4b4243fec7 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -23,7 +23,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/hsmmc-omap.h>
 #include <linux/power/smartreflex.h>
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 
 #include <linux/omap-dma.h>
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
index c72cd84b07ec..890c789485d3 100644
--- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
@@ -20,7 +20,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/hsmmc-omap.h>
 #include <linux/power/smartreflex.h>
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 
 #include <linux/omap-dma.h>
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index 62352d1e6361..56b141fce973 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -20,7 +20,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/hsmmc-omap.h>
 #include <linux/power/smartreflex.h>
-#include <linux/i2c-omap.h>
+#include <linux/platform_data/i2c-omap.h>
 
 #include <linux/omap-dma.h>
 
-- 
cgit v1.2.3


From e5c7137793a754500e65ffd7477e88ff7a06ac53 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 19 Apr 2018 22:00:11 +0200
Subject: i2c: pca-platform: move header to platform_data

This header only contains platform_data. Move it to the proper directory.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 arch/sh/boards/board-sh7785lcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index d7d232dea33e..3cba60ff7aab 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -17,7 +17,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
-#include <linux/i2c-pca-platform.h>
+#include <linux/platform_data/i2c-pca-platform.h>
 #include <linux/i2c-algo-pca.h>
 #include <linux/usb/r8a66597.h>
 #include <linux/sh_intc.h>
-- 
cgit v1.2.3


From 1143a70665c2175a33a40d8f2dc277978fbf7640 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 7 May 2018 14:20:07 +0800
Subject: KVM: PPC: Add pt_regs into kvm_vcpu_arch and move vcpu->arch.gpr[]
 into it

Current regs are scattered at kvm_vcpu_arch structure and it will
be more neat to organize them into pt_regs structure.

Also it will enable reimplementation of MMIO emulation code with
analyse_instr() later.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h    |  4 +--
 arch/powerpc/include/asm/kvm_book3s_64.h |  8 ++---
 arch/powerpc/include/asm/kvm_booke.h     |  4 +--
 arch/powerpc/include/asm/kvm_host.h      |  2 +-
 arch/powerpc/kernel/asm-offsets.c        |  2 +-
 arch/powerpc/kvm/book3s_64_vio_hv.c      |  2 +-
 arch/powerpc/kvm/book3s_hv_builtin.c     |  6 ++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      | 15 +++++----
 arch/powerpc/kvm/book3s_hv_rm_xics.c     |  2 +-
 arch/powerpc/kvm/book3s_pr.c             | 56 ++++++++++++++++----------------
 arch/powerpc/kvm/book3s_xive_template.c  |  4 +--
 arch/powerpc/kvm/e500_emulate.c          |  4 +--
 12 files changed, 55 insertions(+), 54 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index c1f3a870c48a..e3182f7ae499 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -275,12 +275,12 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
-	vcpu->arch.gpr[num] = val;
+	vcpu->arch.regs.gpr[num] = val;
 }
 
 static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
 {
-	return vcpu->arch.gpr[num];
+	return vcpu->arch.regs.gpr[num];
 }
 
 static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index c424e44f4c00..38dbcad086d6 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -490,8 +490,8 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
 	vcpu->arch.ppr = vcpu->arch.ppr_tm;
 	vcpu->arch.dscr = vcpu->arch.dscr_tm;
 	vcpu->arch.tar = vcpu->arch.tar_tm;
-	memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm,
-	       sizeof(vcpu->arch.gpr));
+	memcpy(vcpu->arch.regs.gpr, vcpu->arch.gpr_tm,
+	       sizeof(vcpu->arch.regs.gpr));
 	vcpu->arch.fp  = vcpu->arch.fp_tm;
 	vcpu->arch.vr  = vcpu->arch.vr_tm;
 	vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
@@ -507,8 +507,8 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
 	vcpu->arch.ppr_tm = vcpu->arch.ppr;
 	vcpu->arch.dscr_tm = vcpu->arch.dscr;
 	vcpu->arch.tar_tm = vcpu->arch.tar;
-	memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr,
-	       sizeof(vcpu->arch.gpr));
+	memcpy(vcpu->arch.gpr_tm, vcpu->arch.regs.gpr,
+	       sizeof(vcpu->arch.regs.gpr));
 	vcpu->arch.fp_tm  = vcpu->arch.fp;
 	vcpu->arch.vr_tm  = vcpu->arch.vr;
 	vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index bc6e29e4dfd4..f5fc9569ef56 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -36,12 +36,12 @@
 
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
-	vcpu->arch.gpr[num] = val;
+	vcpu->arch.regs.gpr[num] = val;
 }
 
 static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
 {
-	return vcpu->arch.gpr[num];
+	return vcpu->arch.regs.gpr[num];
 }
 
 static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9703f8f229c9..a75443a372bb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -486,7 +486,7 @@ struct kvm_vcpu_arch {
 	struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
 #endif
 
-	ulong gpr[32];
+	struct pt_regs regs;
 
 	struct thread_fp_state fp;
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 373dc1d6ef44..774c6a8ebfb4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -425,7 +425,7 @@ int main(void)
 	OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
 	OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
 	OFFSET(VCPU_GUEST_PID, kvm_vcpu, arch.pid);
-	OFFSET(VCPU_GPRS, kvm_vcpu, arch.gpr);
+	OFFSET(VCPU_GPRS, kvm_vcpu, arch.regs.gpr);
 	OFFSET(VCPU_VRSAVE, kvm_vcpu, arch.vrsave);
 	OFFSET(VCPU_FPRS, kvm_vcpu, arch.fp.fpr);
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 635f3ca8129a..925fc316a104 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -609,7 +609,7 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 	page = stt->pages[idx / TCES_PER_PAGE];
 	tbl = (u64 *)page_address(page);
 
-	vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
+	vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
 
 	return H_SUCCESS;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index de18299f92b7..2b127586be30 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -211,9 +211,9 @@ long kvmppc_h_random(struct kvm_vcpu *vcpu)
 
 	/* Only need to do the expensive mfmsr() on radix */
 	if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
-		r = powernv_get_random_long(&vcpu->arch.gpr[4]);
+		r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]);
 	else
-		r = powernv_get_random_real_mode(&vcpu->arch.gpr[4]);
+		r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]);
 	if (r)
 		return H_SUCCESS;
 
@@ -562,7 +562,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
 {
 	if (!kvmppc_xics_enabled(vcpu))
 		return H_TOO_HARD;
-	vcpu->arch.gpr[5] = get_tb();
+	vcpu->arch.regs.gpr[5] = get_tb();
 	if (xive_enabled()) {
 		if (is_rm())
 			return xive_rm_h_xirr(vcpu);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 78e6a392330f..8e12c5c3c4ee 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -418,7 +418,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 		    long pte_index, unsigned long pteh, unsigned long ptel)
 {
 	return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
-				 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
+				 vcpu->arch.pgdir, true,
+				 &vcpu->arch.regs.gpr[4]);
 }
 
 #ifdef __BIG_ENDIAN__
@@ -561,13 +562,13 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 		     unsigned long pte_index, unsigned long avpn)
 {
 	return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
-				  &vcpu->arch.gpr[4]);
+				  &vcpu->arch.regs.gpr[4]);
 }
 
 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long *args = &vcpu->arch.gpr[4];
+	unsigned long *args = &vcpu->arch.regs.gpr[4];
 	__be64 *hp, *hptes[4];
 	unsigned long tlbrb[4];
 	long int i, j, k, n, found, indexes[4];
@@ -787,8 +788,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 			r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
 			r &= ~HPTE_GR_RESERVED;
 		}
-		vcpu->arch.gpr[4 + i * 2] = v;
-		vcpu->arch.gpr[5 + i * 2] = r;
+		vcpu->arch.regs.gpr[4 + i * 2] = v;
+		vcpu->arch.regs.gpr[5 + i * 2] = r;
 	}
 	return H_SUCCESS;
 }
@@ -834,7 +835,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
 			}
 		}
 	}
-	vcpu->arch.gpr[4] = gr;
+	vcpu->arch.regs.gpr[4] = gr;
 	ret = H_SUCCESS;
  out:
 	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
@@ -881,7 +882,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
 			kvmppc_set_dirty_from_hpte(kvm, v, gr);
 		}
 	}
-	vcpu->arch.gpr[4] = gr;
+	vcpu->arch.regs.gpr[4] = gr;
 	ret = H_SUCCESS;
  out:
 	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 2a862618f072..758d1d23215e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -517,7 +517,7 @@ unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
 	} while (!icp_rm_try_update(icp, old_state, new_state));
 
 	/* Return the result in GPR4 */
-	vcpu->arch.gpr[4] = xirr;
+	vcpu->arch.regs.gpr[4] = xirr;
 
 	return check_too_hard(xics, icp);
 }
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d3f304d06adf..899bc9a02ab5 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -147,20 +147,20 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 
-	svcpu->gpr[0] = vcpu->arch.gpr[0];
-	svcpu->gpr[1] = vcpu->arch.gpr[1];
-	svcpu->gpr[2] = vcpu->arch.gpr[2];
-	svcpu->gpr[3] = vcpu->arch.gpr[3];
-	svcpu->gpr[4] = vcpu->arch.gpr[4];
-	svcpu->gpr[5] = vcpu->arch.gpr[5];
-	svcpu->gpr[6] = vcpu->arch.gpr[6];
-	svcpu->gpr[7] = vcpu->arch.gpr[7];
-	svcpu->gpr[8] = vcpu->arch.gpr[8];
-	svcpu->gpr[9] = vcpu->arch.gpr[9];
-	svcpu->gpr[10] = vcpu->arch.gpr[10];
-	svcpu->gpr[11] = vcpu->arch.gpr[11];
-	svcpu->gpr[12] = vcpu->arch.gpr[12];
-	svcpu->gpr[13] = vcpu->arch.gpr[13];
+	svcpu->gpr[0] = vcpu->arch.regs.gpr[0];
+	svcpu->gpr[1] = vcpu->arch.regs.gpr[1];
+	svcpu->gpr[2] = vcpu->arch.regs.gpr[2];
+	svcpu->gpr[3] = vcpu->arch.regs.gpr[3];
+	svcpu->gpr[4] = vcpu->arch.regs.gpr[4];
+	svcpu->gpr[5] = vcpu->arch.regs.gpr[5];
+	svcpu->gpr[6] = vcpu->arch.regs.gpr[6];
+	svcpu->gpr[7] = vcpu->arch.regs.gpr[7];
+	svcpu->gpr[8] = vcpu->arch.regs.gpr[8];
+	svcpu->gpr[9] = vcpu->arch.regs.gpr[9];
+	svcpu->gpr[10] = vcpu->arch.regs.gpr[10];
+	svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
+	svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
+	svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
 	svcpu->cr  = vcpu->arch.cr;
 	svcpu->xer = vcpu->arch.xer;
 	svcpu->ctr = vcpu->arch.ctr;
@@ -194,20 +194,20 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
 	if (!svcpu->in_use)
 		goto out;
 
-	vcpu->arch.gpr[0] = svcpu->gpr[0];
-	vcpu->arch.gpr[1] = svcpu->gpr[1];
-	vcpu->arch.gpr[2] = svcpu->gpr[2];
-	vcpu->arch.gpr[3] = svcpu->gpr[3];
-	vcpu->arch.gpr[4] = svcpu->gpr[4];
-	vcpu->arch.gpr[5] = svcpu->gpr[5];
-	vcpu->arch.gpr[6] = svcpu->gpr[6];
-	vcpu->arch.gpr[7] = svcpu->gpr[7];
-	vcpu->arch.gpr[8] = svcpu->gpr[8];
-	vcpu->arch.gpr[9] = svcpu->gpr[9];
-	vcpu->arch.gpr[10] = svcpu->gpr[10];
-	vcpu->arch.gpr[11] = svcpu->gpr[11];
-	vcpu->arch.gpr[12] = svcpu->gpr[12];
-	vcpu->arch.gpr[13] = svcpu->gpr[13];
+	vcpu->arch.regs.gpr[0] = svcpu->gpr[0];
+	vcpu->arch.regs.gpr[1] = svcpu->gpr[1];
+	vcpu->arch.regs.gpr[2] = svcpu->gpr[2];
+	vcpu->arch.regs.gpr[3] = svcpu->gpr[3];
+	vcpu->arch.regs.gpr[4] = svcpu->gpr[4];
+	vcpu->arch.regs.gpr[5] = svcpu->gpr[5];
+	vcpu->arch.regs.gpr[6] = svcpu->gpr[6];
+	vcpu->arch.regs.gpr[7] = svcpu->gpr[7];
+	vcpu->arch.regs.gpr[8] = svcpu->gpr[8];
+	vcpu->arch.regs.gpr[9] = svcpu->gpr[9];
+	vcpu->arch.regs.gpr[10] = svcpu->gpr[10];
+	vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
+	vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
+	vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
 	vcpu->arch.cr  = svcpu->cr;
 	vcpu->arch.xer = svcpu->xer;
 	vcpu->arch.ctr = svcpu->ctr;
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 99c3620b40d9..6e41ba7ec8f4 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -334,7 +334,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
 	 */
 
 	/* Return interrupt and old CPPR in GPR4 */
-	vcpu->arch.gpr[4] = hirq | (old_cppr << 24);
+	vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
 
 	return H_SUCCESS;
 }
@@ -369,7 +369,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long
 	hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
 
 	/* Return interrupt and old CPPR in GPR4 */
-	vcpu->arch.gpr[4] = hirq | (xc->cppr << 24);
+	vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
 
 	return H_SUCCESS;
 }
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 990db69a1d0b..8f871fb75228 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -53,7 +53,7 @@ static int dbell2prio(ulong param)
 
 static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
 {
-	ulong param = vcpu->arch.gpr[rb];
+	ulong param = vcpu->arch.regs.gpr[rb];
 	int prio = dbell2prio(param);
 
 	if (prio < 0)
@@ -65,7 +65,7 @@ static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
 
 static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
 {
-	ulong param = vcpu->arch.gpr[rb];
+	ulong param = vcpu->arch.regs.gpr[rb];
 	int prio = dbell2prio(rb);
 	int pir = param & PPC_DBELL_PIR_MASK;
 	int i;
-- 
cgit v1.2.3


From 173c520a049f57e2af498a3f0557d07797ce1c1b Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 7 May 2018 14:20:08 +0800
Subject: KVM: PPC: Move nip/ctr/lr/xer registers to pt_regs in kvm_vcpu_arch

This patch moves nip/ctr/lr/xer registers from scattered places in
kvm_vcpu_arch to pt_regs structure.

cr register is "unsigned long" in pt_regs and u32 in vcpu->arch.
It will need more consideration and may move in later patches.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h    | 16 ++++++-------
 arch/powerpc/include/asm/kvm_book3s_64.h | 12 +++++-----
 arch/powerpc/include/asm/kvm_booke.h     | 16 ++++++-------
 arch/powerpc/include/asm/kvm_host.h      |  4 ----
 arch/powerpc/kernel/asm-offsets.c        | 16 ++++++-------
 arch/powerpc/kvm/book3s_32_mmu.c         |  2 +-
 arch/powerpc/kvm/book3s_hv.c             |  6 ++---
 arch/powerpc/kvm/book3s_hv_tm.c          | 10 ++++----
 arch/powerpc/kvm/book3s_hv_tm_builtin.c  | 10 ++++----
 arch/powerpc/kvm/book3s_pr.c             | 16 ++++++-------
 arch/powerpc/kvm/booke.c                 | 41 +++++++++++++++++---------------
 arch/powerpc/kvm/booke_emulate.c         |  6 ++---
 arch/powerpc/kvm/e500_emulate.c          |  2 +-
 arch/powerpc/kvm/e500_mmu.c              |  2 +-
 14 files changed, 79 insertions(+), 80 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index e3182f7ae499..20d3d5a87296 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -295,42 +295,42 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 
 static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
 {
-	vcpu->arch.xer = val;
+	vcpu->arch.regs.xer = val;
 }
 
 static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.xer;
+	return vcpu->arch.regs.xer;
 }
 
 static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
 {
-	vcpu->arch.ctr = val;
+	vcpu->arch.regs.ctr = val;
 }
 
 static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.ctr;
+	return vcpu->arch.regs.ctr;
 }
 
 static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
 {
-	vcpu->arch.lr = val;
+	vcpu->arch.regs.link = val;
 }
 
 static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.lr;
+	return vcpu->arch.regs.link;
 }
 
 static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
 {
-	vcpu->arch.pc = val;
+	vcpu->arch.regs.nip = val;
 }
 
 static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.pc;
+	return vcpu->arch.regs.nip;
 }
 
 static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38dbcad086d6..dc435a5af7d6 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -483,9 +483,9 @@ static inline u64 sanitize_msr(u64 msr)
 static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.cr  = vcpu->arch.cr_tm;
-	vcpu->arch.xer = vcpu->arch.xer_tm;
-	vcpu->arch.lr  = vcpu->arch.lr_tm;
-	vcpu->arch.ctr = vcpu->arch.ctr_tm;
+	vcpu->arch.regs.xer = vcpu->arch.xer_tm;
+	vcpu->arch.regs.link  = vcpu->arch.lr_tm;
+	vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
 	vcpu->arch.amr = vcpu->arch.amr_tm;
 	vcpu->arch.ppr = vcpu->arch.ppr_tm;
 	vcpu->arch.dscr = vcpu->arch.dscr_tm;
@@ -500,9 +500,9 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
 static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.cr_tm  = vcpu->arch.cr;
-	vcpu->arch.xer_tm = vcpu->arch.xer;
-	vcpu->arch.lr_tm  = vcpu->arch.lr;
-	vcpu->arch.ctr_tm = vcpu->arch.ctr;
+	vcpu->arch.xer_tm = vcpu->arch.regs.xer;
+	vcpu->arch.lr_tm  = vcpu->arch.regs.link;
+	vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
 	vcpu->arch.amr_tm = vcpu->arch.amr;
 	vcpu->arch.ppr_tm = vcpu->arch.ppr;
 	vcpu->arch.dscr_tm = vcpu->arch.dscr;
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index f5fc9569ef56..d513e3ed1c65 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -56,12 +56,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 
 static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
 {
-	vcpu->arch.xer = val;
+	vcpu->arch.regs.xer = val;
 }
 
 static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.xer;
+	return vcpu->arch.regs.xer;
 }
 
 static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
@@ -72,32 +72,32 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
 
 static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
 {
-	vcpu->arch.ctr = val;
+	vcpu->arch.regs.ctr = val;
 }
 
 static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.ctr;
+	return vcpu->arch.regs.ctr;
 }
 
 static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
 {
-	vcpu->arch.lr = val;
+	vcpu->arch.regs.link = val;
 }
 
 static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.lr;
+	return vcpu->arch.regs.link;
 }
 
 static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
 {
-	vcpu->arch.pc = val;
+	vcpu->arch.regs.nip = val;
 }
 
 static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.pc;
+	return vcpu->arch.regs.nip;
 }
 
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a75443a372bb..8b0ee5e09ea3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -521,14 +521,10 @@ struct kvm_vcpu_arch {
 	u32 qpr[32];
 #endif
 
-	ulong pc;
-	ulong ctr;
-	ulong lr;
 #ifdef CONFIG_PPC_BOOK3S
 	ulong tar;
 #endif
 
-	ulong xer;
 	u32 cr;
 
 #ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 774c6a8ebfb4..70a345c73281 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -431,14 +431,14 @@ int main(void)
 #ifdef CONFIG_ALTIVEC
 	OFFSET(VCPU_VRS, kvm_vcpu, arch.vr.vr);
 #endif
-	OFFSET(VCPU_XER, kvm_vcpu, arch.xer);
-	OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr);
-	OFFSET(VCPU_LR, kvm_vcpu, arch.lr);
+	OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
+	OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
+	OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
 #ifdef CONFIG_PPC_BOOK3S
 	OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
 #endif
 	OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
-	OFFSET(VCPU_PC, kvm_vcpu, arch.pc);
+	OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
 	OFFSET(VCPU_SRR0, kvm_vcpu, arch.shregs.srr0);
@@ -695,10 +695,10 @@ int main(void)
 
 #else /* CONFIG_PPC_BOOK3S */
 	OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
-	OFFSET(VCPU_XER, kvm_vcpu, arch.xer);
-	OFFSET(VCPU_LR, kvm_vcpu, arch.lr);
-	OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr);
-	OFFSET(VCPU_PC, kvm_vcpu, arch.pc);
+	OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
+	OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
+	OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
+	OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
 	OFFSET(VCPU_SPRG9, kvm_vcpu, arch.sprg9);
 	OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
 	OFFSET(VCPU_FAULT_DEAR, kvm_vcpu, arch.fault_dear);
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 1992676c7a94..45c8ea4a0487 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -52,7 +52,7 @@
 static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
 {
 #ifdef DEBUG_MMU_PTE_IP
-	return vcpu->arch.pc == DEBUG_MMU_PTE_IP;
+	return vcpu->arch.regs.nip == DEBUG_MMU_PTE_IP;
 #else
 	return true;
 #endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f61dd9efa6fb..336e3468e700 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -397,13 +397,13 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 
 	pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
 	pr_err("pc  = %.16lx  msr = %.16llx  trap = %x\n",
-	       vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
+	       vcpu->arch.regs.nip, vcpu->arch.shregs.msr, vcpu->arch.trap);
 	for (r = 0; r < 16; ++r)
 		pr_err("r%2d = %.16lx  r%d = %.16lx\n",
 		       r, kvmppc_get_gpr(vcpu, r),
 		       r+16, kvmppc_get_gpr(vcpu, r+16));
 	pr_err("ctr = %.16lx  lr  = %.16lx\n",
-	       vcpu->arch.ctr, vcpu->arch.lr);
+	       vcpu->arch.regs.ctr, vcpu->arch.regs.link);
 	pr_err("srr0 = %.16llx srr1 = %.16llx\n",
 	       vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
 	pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
@@ -411,7 +411,7 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 	pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
 	       vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
 	pr_err("cr = %.8x  xer = %.16lx  dsisr = %.8x\n",
-	       vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
+	       vcpu->arch.cr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
 	pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
 	pr_err("fault dar = %.16lx dsisr = %.8x\n",
 	       vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index bf710ad3a6d7..008285058f9b 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -19,7 +19,7 @@ static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
 	u64 texasr, tfiar;
 	u64 msr = vcpu->arch.shregs.msr;
 
-	tfiar = vcpu->arch.pc & ~0x3ull;
+	tfiar = vcpu->arch.regs.nip & ~0x3ull;
 	texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
 	if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
 		texasr |= TEXASR_SUSP;
@@ -57,8 +57,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 			       (newmsr & MSR_TM)));
 		newmsr = sanitize_msr(newmsr);
 		vcpu->arch.shregs.msr = newmsr;
-		vcpu->arch.cfar = vcpu->arch.pc - 4;
-		vcpu->arch.pc = vcpu->arch.shregs.srr0;
+		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
 		return RESUME_GUEST;
 
 	case PPC_INST_RFEBB:
@@ -90,8 +90,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.bescr = bescr;
 		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
 		vcpu->arch.shregs.msr = msr;
-		vcpu->arch.cfar = vcpu->arch.pc - 4;
-		vcpu->arch.pc = vcpu->arch.ebbrr;
+		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.regs.nip = vcpu->arch.ebbrr;
 		return RESUME_GUEST;
 
 	case PPC_INST_MTMSRD:
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
index d98ccfd2b88c..b2c7c6fca4f9 100644
--- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -35,8 +35,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
 			return 0;
 		newmsr = sanitize_msr(newmsr);
 		vcpu->arch.shregs.msr = newmsr;
-		vcpu->arch.cfar = vcpu->arch.pc - 4;
-		vcpu->arch.pc = vcpu->arch.shregs.srr0;
+		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
 		return 1;
 
 	case PPC_INST_RFEBB:
@@ -58,8 +58,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
 		mtspr(SPRN_BESCR, bescr);
 		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
 		vcpu->arch.shregs.msr = msr;
-		vcpu->arch.cfar = vcpu->arch.pc - 4;
-		vcpu->arch.pc = mfspr(SPRN_EBBRR);
+		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.regs.nip = mfspr(SPRN_EBBRR);
 		return 1;
 
 	case PPC_INST_MTMSRD:
@@ -103,7 +103,7 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
 void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.shregs.msr &= ~MSR_TS_MASK;	/* go to N state */
-	vcpu->arch.pc = vcpu->arch.tfhar;
+	vcpu->arch.regs.nip = vcpu->arch.tfhar;
 	copy_from_checkpoint(vcpu);
 	vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
 }
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 899bc9a02ab5..67061d399cd9 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -162,10 +162,10 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
 	svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
 	svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
 	svcpu->cr  = vcpu->arch.cr;
-	svcpu->xer = vcpu->arch.xer;
-	svcpu->ctr = vcpu->arch.ctr;
-	svcpu->lr  = vcpu->arch.lr;
-	svcpu->pc  = vcpu->arch.pc;
+	svcpu->xer = vcpu->arch.regs.xer;
+	svcpu->ctr = vcpu->arch.regs.ctr;
+	svcpu->lr  = vcpu->arch.regs.link;
+	svcpu->pc  = vcpu->arch.regs.nip;
 #ifdef CONFIG_PPC_BOOK3S_64
 	svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
 #endif
@@ -209,10 +209,10 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
 	vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
 	vcpu->arch.cr  = svcpu->cr;
-	vcpu->arch.xer = svcpu->xer;
-	vcpu->arch.ctr = svcpu->ctr;
-	vcpu->arch.lr  = svcpu->lr;
-	vcpu->arch.pc  = svcpu->pc;
+	vcpu->arch.regs.xer = svcpu->xer;
+	vcpu->arch.regs.ctr = svcpu->ctr;
+	vcpu->arch.regs.link  = svcpu->lr;
+	vcpu->arch.regs.nip  = svcpu->pc;
 	vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
 	vcpu->arch.fault_dar   = svcpu->fault_dar;
 	vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 876d4f294fdd..a9ca016da670 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -77,8 +77,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 {
 	int i;
 
-	printk("pc:   %08lx msr:  %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
-	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
+	printk("pc:   %08lx msr:  %08llx\n", vcpu->arch.regs.nip,
+			vcpu->arch.shared->msr);
+	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.regs.link,
+			vcpu->arch.regs.ctr);
 	printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
 					    vcpu->arch.shared->srr1);
 
@@ -491,24 +493,25 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	if (allowed) {
 		switch (int_class) {
 		case INT_CLASS_NONCRIT:
-			set_guest_srr(vcpu, vcpu->arch.pc,
+			set_guest_srr(vcpu, vcpu->arch.regs.nip,
 				      vcpu->arch.shared->msr);
 			break;
 		case INT_CLASS_CRIT:
-			set_guest_csrr(vcpu, vcpu->arch.pc,
+			set_guest_csrr(vcpu, vcpu->arch.regs.nip,
 				       vcpu->arch.shared->msr);
 			break;
 		case INT_CLASS_DBG:
-			set_guest_dsrr(vcpu, vcpu->arch.pc,
+			set_guest_dsrr(vcpu, vcpu->arch.regs.nip,
 				       vcpu->arch.shared->msr);
 			break;
 		case INT_CLASS_MC:
-			set_guest_mcsrr(vcpu, vcpu->arch.pc,
+			set_guest_mcsrr(vcpu, vcpu->arch.regs.nip,
 					vcpu->arch.shared->msr);
 			break;
 		}
 
-		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+		vcpu->arch.regs.nip = vcpu->arch.ivpr |
+					vcpu->arch.ivor[priority];
 		if (update_esr == true)
 			kvmppc_set_esr(vcpu, vcpu->arch.queued_esr);
 		if (update_dear == true)
@@ -826,7 +829,7 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	case EMULATE_FAIL:
 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
-		       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+		       __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst);
 		/* For debugging, encode the failing instruction and
 		 * report it to userspace. */
 		run->hw.hardware_exit_reason = ~0ULL << 32;
@@ -875,7 +878,7 @@ static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	 */
 	vcpu->arch.dbsr = 0;
 	run->debug.arch.status = 0;
-	run->debug.arch.address = vcpu->arch.pc;
+	run->debug.arch.address = vcpu->arch.regs.nip;
 
 	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
 		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
@@ -971,7 +974,7 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	case EMULATE_FAIL:
 		pr_debug("%s: load instruction from guest address %lx failed\n",
-		       __func__, vcpu->arch.pc);
+		       __func__, vcpu->arch.regs.nip);
 		/* For debugging, encode the failing instruction and
 		 * report it to userspace. */
 		run->hw.hardware_exit_reason = ~0ULL << 32;
@@ -1169,7 +1172,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOKE_INTERRUPT_SPE_FP_DATA:
 	case BOOKE_INTERRUPT_SPE_FP_ROUND:
 		printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
-		       __func__, exit_nr, vcpu->arch.pc);
+		       __func__, exit_nr, vcpu->arch.regs.nip);
 		run->hw.hardware_exit_reason = exit_nr;
 		r = RESUME_HOST;
 		break;
@@ -1299,7 +1302,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 
 	case BOOKE_INTERRUPT_ITLB_MISS: {
-		unsigned long eaddr = vcpu->arch.pc;
+		unsigned long eaddr = vcpu->arch.regs.nip;
 		gpa_t gpaddr;
 		gfn_t gfn;
 		int gtlb_index;
@@ -1391,7 +1394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	int i;
 	int r;
 
-	vcpu->arch.pc = 0;
+	vcpu->arch.regs.nip = 0;
 	vcpu->arch.shared->pir = vcpu->vcpu_id;
 	kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
 	kvmppc_set_msr(vcpu, 0);
@@ -1440,10 +1443,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
 	vcpu_load(vcpu);
 
-	regs->pc = vcpu->arch.pc;
+	regs->pc = vcpu->arch.regs.nip;
 	regs->cr = kvmppc_get_cr(vcpu);
-	regs->ctr = vcpu->arch.ctr;
-	regs->lr = vcpu->arch.lr;
+	regs->ctr = vcpu->arch.regs.ctr;
+	regs->lr = vcpu->arch.regs.link;
 	regs->xer = kvmppc_get_xer(vcpu);
 	regs->msr = vcpu->arch.shared->msr;
 	regs->srr0 = kvmppc_get_srr0(vcpu);
@@ -1471,10 +1474,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
 	vcpu_load(vcpu);
 
-	vcpu->arch.pc = regs->pc;
+	vcpu->arch.regs.nip = regs->pc;
 	kvmppc_set_cr(vcpu, regs->cr);
-	vcpu->arch.ctr = regs->ctr;
-	vcpu->arch.lr = regs->lr;
+	vcpu->arch.regs.ctr = regs->ctr;
+	vcpu->arch.regs.link = regs->lr;
 	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_msr(vcpu, regs->msr);
 	kvmppc_set_srr0(vcpu, regs->srr0);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index a82f64502de1..d23e582f0fee 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -34,19 +34,19 @@
 
 static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.pc = vcpu->arch.shared->srr0;
+	vcpu->arch.regs.nip = vcpu->arch.shared->srr0;
 	kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
 }
 
 static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.pc = vcpu->arch.dsrr0;
+	vcpu->arch.regs.nip = vcpu->arch.dsrr0;
 	kvmppc_set_msr(vcpu, vcpu->arch.dsrr1);
 }
 
 static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.pc = vcpu->arch.csrr0;
+	vcpu->arch.regs.nip = vcpu->arch.csrr0;
 	kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
 }
 
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 8f871fb75228..3f8189eb56ed 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -94,7 +94,7 @@ static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	switch (get_oc(inst)) {
 	case EHPRIV_OC_DEBUG:
 		run->exit_reason = KVM_EXIT_DEBUG;
-		run->debug.arch.address = vcpu->arch.pc;
+		run->debug.arch.address = vcpu->arch.regs.nip;
 		run->debug.arch.status = 0;
 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
 		emulated = EMULATE_EXIT_USER;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index ddbf8f0284c0..24296f4cadc6 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -513,7 +513,7 @@ void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
 {
 	unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
 
-	kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
+	kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.regs.nip, as);
 }
 
 void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From f19d1f367a506bc645f8d6695942b8873fc82c84 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 7 May 2018 14:20:09 +0800
Subject: KVM: PPC: Fix a mmio_host_swabbed uninitialized usage issue

When KVM emulates VMX store, it will invoke kvmppc_get_vmx_data() to
retrieve VMX reg val. kvmppc_get_vmx_data() will check mmio_host_swabbed
to decide which double word of vr[] to be used. But the
mmio_host_swabbed can be uninitialized during VMX store procedure:

kvmppc_emulate_loadstore
	\- kvmppc_handle_store128_by2x64
		\- kvmppc_get_vmx_data

So vcpu->arch.mmio_host_swabbed is not meant to be used at all for
emulation of store instructions, and this patch makes that true for
VMX stores. This patch also initializes mmio_host_swabbed to avoid
possible future problems.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/emulate_loadstore.c | 1 +
 arch/powerpc/kvm/powerpc.c           | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index a382e15135e6..b8a3aefc3033 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -111,6 +111,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmio_sp64_extend = 0;
 	vcpu->arch.mmio_sign_extend = 0;
 	vcpu->arch.mmio_vmx_copy_nums = 0;
+	vcpu->arch.mmio_host_swabbed = 0;
 
 	switch (get_op(inst)) {
 	case 31:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4e387647b5af..bef27b16d233 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1374,7 +1374,7 @@ static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
 	if (di > 1)
 		return -1;
 
-	if (vcpu->arch.mmio_host_swabbed)
+	if (kvmppc_need_byteswap(vcpu))
 		di = 1 - di;
 
 	w0 = vrs.u[di * 2];
-- 
cgit v1.2.3


From b7557451475d747740bc1598045bd273ece80ab0 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 17 May 2018 16:59:10 +1000
Subject: KVM: PPC: Book3S HV: Lockless tlbie for HPT hcalls

tlbies to an LPAR do not have to be serialised since POWER4/PPC970,
after which the MMU_FTR_LOCKLESS_TLBIE feature was introduced to
avoid tlbie locking.

Since commit c17b98cf6028 ("KVM: PPC: Book3S HV: Remove code for
PPC970 processors"), KVM no longer supports processors that do not
have this feature, so the tlbie locking can be removed completely.
A sanity check for the feature is put in kvmppc_mmu_hv_init.

Testing was done on a POWER9 system in HPT mode, with a -smp 32 guest
in HPT mode. 32 instances of the powerpc fork benchmark from selftests
were run with --fork, and the results measured.

Without this patch, total throughput was about 13.5K/sec, and this is
the top of the host profile:

   74.52%  [k] do_tlbies
    2.95%  [k] kvmppc_book3s_hv_page_fault
    1.80%  [k] calc_checksum
    1.80%  [k] kvmppc_vcpu_run_hv
    1.49%  [k] kvmppc_run_core

After this patch, throughput was about 51K/sec, with this profile:

   21.28%  [k] do_tlbies
    5.26%  [k] kvmppc_run_core
    4.88%  [k] kvmppc_book3s_hv_page_fault
    3.30%  [k] _raw_spin_lock_irqsave
    3.25%  [k] gup_pgd_range

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_host.h |  1 -
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  3 +++
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 21 ---------------------
 3 files changed, 3 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8b0ee5e09ea3..89f44ecc4dbd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -269,7 +269,6 @@ struct kvm_arch {
 	unsigned long host_lpcr;
 	unsigned long sdr1;
 	unsigned long host_sdr1;
-	int tlbie_lock;
 	unsigned long lpcr;
 	unsigned long vrma_slb_v;
 	int mmu_ready;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index a670fa5fbe50..37cd6434d1c8 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -272,6 +272,9 @@ int kvmppc_mmu_hv_init(void)
 	if (!cpu_has_feature(CPU_FTR_HVMODE))
 		return -EINVAL;
 
+	if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
+		return -EINVAL;
+
 	/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
 	host_lpid = mfspr(SPRN_LPID);
 	rsvd_lpid = LPID_RSVD;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 8e12c5c3c4ee..1f22d9e977d4 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -435,24 +435,6 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
 		(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
 }
 
-static inline int try_lock_tlbie(unsigned int *lock)
-{
-	unsigned int tmp, old;
-	unsigned int token = LOCK_TOKEN;
-
-	asm volatile("1:lwarx	%1,0,%2\n"
-		     "	cmpwi	cr0,%1,0\n"
-		     "	bne	2f\n"
-		     "  stwcx.	%3,0,%2\n"
-		     "	bne-	1b\n"
-		     "  isync\n"
-		     "2:"
-		     : "=&r" (tmp), "=&r" (old)
-		     : "r" (lock), "r" (token)
-		     : "cc", "memory");
-	return old == 0;
-}
-
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		      long npages, int global, bool need_sync)
 {
@@ -464,8 +446,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 	 * the RS field, this is backwards-compatible with P7 and P8.
 	 */
 	if (global) {
-		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-			cpu_relax();
 		if (need_sync)
 			asm volatile("ptesync" : : : "memory");
 		for (i = 0; i < npages; ++i) {
@@ -484,7 +464,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		}
 
 		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-		kvm->arch.tlbie_lock = 0;
 	} else {
 		if (need_sync)
 			asm volatile("ptesync" : : : "memory");
-- 
cgit v1.2.3


From a5fad1e959529eda20f38d1e02be65ab629de899 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 17 May 2018 17:06:26 +1000
Subject: KVM: PPC: Book3S HV: Use a helper to unmap ptes in the radix fault
 path

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 46 +++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 481da8f93fa4..2c49b31ec7fb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -228,6 +228,25 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
 	kmem_cache_free(kvm_pmd_cache, pmdp);
 }
 
+static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
+			     unsigned long gpa, unsigned int shift)
+
+{
+	unsigned long page_size = 1ul << shift;
+	unsigned long old;
+
+	old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
+	kvmppc_radix_tlbie_page(kvm, gpa, shift);
+	if (old & _PAGE_DIRTY) {
+		unsigned long gfn = gpa >> PAGE_SHIFT;
+		struct kvm_memory_slot *memslot;
+
+		memslot = gfn_to_memslot(kvm, gfn);
+		if (memslot && memslot->dirty_bitmap)
+			kvmppc_update_dirty_map(memslot, gfn, page_size);
+	}
+}
+
 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			     unsigned int level, unsigned long mmu_seq)
 {
@@ -235,7 +254,6 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	pud_t *pud, *new_pud = NULL;
 	pmd_t *pmd, *new_pmd = NULL;
 	pte_t *ptep, *new_ptep = NULL;
-	unsigned long old;
 	int ret;
 
 	/* Traverse the guest's 2nd-level tree, allocate new levels needed */
@@ -287,17 +305,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			goto out_unlock;
 		}
 		/* Valid 1GB page here already, remove it */
-		old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
-					      ~0UL, 0, hgpa, PUD_SHIFT);
-		kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
-		if (old & _PAGE_DIRTY) {
-			unsigned long gfn = hgpa >> PAGE_SHIFT;
-			struct kvm_memory_slot *memslot;
-			memslot = gfn_to_memslot(kvm, gfn);
-			if (memslot && memslot->dirty_bitmap)
-				kvmppc_update_dirty_map(memslot,
-							gfn, PUD_SIZE);
-		}
+		kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT);
 	}
 	if (level == 2) {
 		if (!pud_none(*pud)) {
@@ -338,17 +346,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			goto out_unlock;
 		}
 		/* Valid 2MB page here already, remove it */
-		old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
-					      ~0UL, 0, lgpa, PMD_SHIFT);
-		kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
-		if (old & _PAGE_DIRTY) {
-			unsigned long gfn = lgpa >> PAGE_SHIFT;
-			struct kvm_memory_slot *memslot;
-			memslot = gfn_to_memslot(kvm, gfn);
-			if (memslot && memslot->dirty_bitmap)
-				kvmppc_update_dirty_map(memslot,
-							gfn, PMD_SIZE);
-		}
+		kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT);
 	}
 	if (level == 1) {
 		if (!pmd_none(*pmd)) {
@@ -373,6 +371,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	}
 	ptep = pte_offset_kernel(pmd, gpa);
 	if (pte_present(*ptep)) {
+		unsigned long old;
+
 		/* Check if someone else set the same thing */
 		if (pte_raw(*ptep) == pte_raw(pte)) {
 			ret = 0;
-- 
cgit v1.2.3


From a5704e83aa3d672327409509b2d1bff2def72966 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 17 May 2018 17:06:27 +1000
Subject: KVM: PPC: Book3S HV: Recursively unmap all page table entries when
 unmapping

When partition scope mappings are unmapped with kvm_unmap_radix, the
pte is cleared, but the page table structure is left in place. If the
next page fault requests a different page table geometry (e.g., due to
THP promotion or split), kvmppc_create_pte is responsible for changing
the page tables.

When a page table entry is to be converted to a large pte, the page
table entry is cleared, the PWC flushed, then the page table it points
to freed. This will cause pte page tables to leak when a 1GB page is
to replace a pud entry points to a pmd table with pte tables under it:
The pmd table will be freed, but its pte tables will be missed.

Fix this by replacing the simple clear and free code with one that
walks down the page tables and frees children. Care must be taken to
clear the root entry being unmapped then flushing the PWC before
freeing any page tables, as explained in comments.

This requires PWC flush to logically become a flush-all-PWC (which it
already is in hardware, but the KVM API needs to be changed to avoid
confusion).

This code also checks that no unexpected pte entries exist in any page
table being freed, and unmaps those and emits a WARN. This is an
expensive operation for the pte page level, but partition scope
changes are rare, so it's unconditional for now to iron out bugs. It
can be put under a CONFIG option or removed after some time.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 192 +++++++++++++++++++++++----------
 1 file changed, 138 insertions(+), 54 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 2c49b31ec7fb..e514370ab5ae 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -165,7 +165,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 	asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
 }
 
-static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
+static void kvmppc_radix_flush_pwc(struct kvm *kvm)
 {
 	unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
 
@@ -247,6 +247,139 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
 	}
 }
 
+/*
+ * kvmppc_free_p?d are used to free existing page tables, and recursively
+ * descend and clear and free children.
+ * Callers are responsible for flushing the PWC.
+ *
+ * When page tables are being unmapped/freed as part of page fault path
+ * (full == false), ptes are not expected. There is code to unmap them
+ * and emit a warning if encountered, but there may already be data
+ * corruption due to the unexpected mappings.
+ */
+static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
+{
+	if (full) {
+		memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
+	} else {
+		pte_t *p = pte;
+		unsigned long it;
+
+		for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
+			if (pte_val(*p) == 0)
+				continue;
+			WARN_ON_ONCE(1);
+			kvmppc_unmap_pte(kvm, p,
+					 pte_pfn(*p) << PAGE_SHIFT,
+					 PAGE_SHIFT);
+		}
+	}
+
+	kvmppc_pte_free(pte);
+}
+
+static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
+{
+	unsigned long im;
+	pmd_t *p = pmd;
+
+	for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
+		if (!pmd_present(*p))
+			continue;
+		if (pmd_is_leaf(*p)) {
+			if (full) {
+				pmd_clear(p);
+			} else {
+				WARN_ON_ONCE(1);
+				kvmppc_unmap_pte(kvm, (pte_t *)p,
+					 pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
+					 PMD_SHIFT);
+			}
+		} else {
+			pte_t *pte;
+
+			pte = pte_offset_map(p, 0);
+			kvmppc_unmap_free_pte(kvm, pte, full);
+			pmd_clear(p);
+		}
+	}
+	kvmppc_pmd_free(pmd);
+}
+
+static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
+{
+	unsigned long iu;
+	pud_t *p = pud;
+
+	for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
+		if (!pud_present(*p))
+			continue;
+		if (pud_huge(*p)) {
+			pud_clear(p);
+		} else {
+			pmd_t *pmd;
+
+			pmd = pmd_offset(p, 0);
+			kvmppc_unmap_free_pmd(kvm, pmd, true);
+			pud_clear(p);
+		}
+	}
+	pud_free(kvm->mm, pud);
+}
+
+void kvmppc_free_radix(struct kvm *kvm)
+{
+	unsigned long ig;
+	pgd_t *pgd;
+
+	if (!kvm->arch.pgtable)
+		return;
+	pgd = kvm->arch.pgtable;
+	for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+		pud_t *pud;
+
+		if (!pgd_present(*pgd))
+			continue;
+		pud = pud_offset(pgd, 0);
+		kvmppc_unmap_free_pud(kvm, pud);
+		pgd_clear(pgd);
+	}
+	pgd_free(kvm->mm, kvm->arch.pgtable);
+	kvm->arch.pgtable = NULL;
+}
+
+static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
+					      unsigned long gpa)
+{
+	pte_t *pte = pte_offset_kernel(pmd, 0);
+
+	/*
+	 * Clearing the pmd entry then flushing the PWC ensures that the pte
+	 * page no longer be cached by the MMU, so can be freed without
+	 * flushing the PWC again.
+	 */
+	pmd_clear(pmd);
+	kvmppc_radix_flush_pwc(kvm);
+
+	kvmppc_unmap_free_pte(kvm, pte, false);
+}
+
+static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
+					unsigned long gpa)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+
+	/*
+	 * Clearing the pud entry then flushing the PWC ensures that the pmd
+	 * page and any children pte pages will no longer be cached by the MMU,
+	 * so can be freed without flushing the PWC again.
+	 */
+	pud_clear(pud);
+	kvmppc_radix_flush_pwc(kvm);
+
+	kvmppc_unmap_free_pmd(kvm, pmd, false);
+}
+
 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			     unsigned int level, unsigned long mmu_seq)
 {
@@ -312,11 +445,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			/*
 			 * There's a page table page here, but we wanted to
 			 * install a large page, so remove and free the page
-			 * table page.  new_pmd will be NULL since level == 2.
+			 * table page.
 			 */
-			new_pmd = pmd_offset(pud, 0);
-			pud_clear(pud);
-			kvmppc_radix_flush_pwc(kvm, gpa);
+			kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa);
 		}
 		kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
 		ret = 0;
@@ -353,11 +484,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			/*
 			 * There's a page table page here, but we wanted to
 			 * install a large page, so remove and free the page
-			 * table page.  new_ptep will be NULL since level == 1.
+			 * table page.
 			 */
-			new_ptep = pte_offset_kernel(pmd, 0);
-			pmd_clear(pmd);
-			kvmppc_radix_flush_pwc(kvm, gpa);
+			kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa);
 		}
 		kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
 		ret = 0;
@@ -734,51 +863,6 @@ int kvmppc_init_vm_radix(struct kvm *kvm)
 	return 0;
 }
 
-void kvmppc_free_radix(struct kvm *kvm)
-{
-	unsigned long ig, iu, im;
-	pte_t *pte;
-	pmd_t *pmd;
-	pud_t *pud;
-	pgd_t *pgd;
-
-	if (!kvm->arch.pgtable)
-		return;
-	pgd = kvm->arch.pgtable;
-	for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
-		if (!pgd_present(*pgd))
-			continue;
-		pud = pud_offset(pgd, 0);
-		for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
-			if (!pud_present(*pud))
-				continue;
-			if (pud_huge(*pud)) {
-				pud_clear(pud);
-				continue;
-			}
-			pmd = pmd_offset(pud, 0);
-			for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
-				if (pmd_is_leaf(*pmd)) {
-					pmd_clear(pmd);
-					continue;
-				}
-				if (!pmd_present(*pmd))
-					continue;
-				pte = pte_offset_map(pmd, 0);
-				memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
-				kvmppc_pte_free(pte);
-				pmd_clear(pmd);
-			}
-			kvmppc_pmd_free(pmd_offset(pud, 0));
-			pud_clear(pud);
-		}
-		pud_free(kvm->mm, pud_offset(pgd, 0));
-		pgd_clear(pgd);
-	}
-	pgd_free(kvm->mm, kvm->arch.pgtable);
-	kvm->arch.pgtable = NULL;
-}
-
 static void pte_ctor(void *addr)
 {
 	memset(addr, 0, RADIX_PTE_TABLE_SIZE);
-- 
cgit v1.2.3


From d91cb39ffa7b8af2ed1ff012b95835ff057a6400 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 17 May 2018 17:06:28 +1000
Subject: KVM: PPC: Book3S HV: Make radix use the Linux translation flush
 functions for partition scope

This has the advantage of consolidating TLB flush code in fewer
places, and it also implements powerpc:tlbie trace events.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 36 ++++++++--------------------------
 1 file changed, 8 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index e514370ab5ae..e55db915af49 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -139,41 +139,21 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	return 0;
 }
 
-#ifdef CONFIG_PPC_64K_PAGES
-#define MMU_BASE_PSIZE	MMU_PAGE_64K
-#else
-#define MMU_BASE_PSIZE	MMU_PAGE_4K
-#endif
-
 static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 				    unsigned int pshift)
 {
-	int psize = MMU_BASE_PSIZE;
-
-	if (pshift >= PUD_SHIFT)
-		psize = MMU_PAGE_1G;
-	else if (pshift >= PMD_SHIFT)
-		psize = MMU_PAGE_2M;
-	addr &= ~0xfffUL;
-	addr |= mmu_psize_defs[psize].ap << 5;
-	asm volatile("ptesync": : :"memory");
-	asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
-		     : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
-	if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
-		asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
-			     : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
-	asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+	unsigned long psize = PAGE_SIZE;
+
+	if (pshift)
+		psize = 1UL << pshift;
+
+	addr &= ~(psize - 1);
+	radix__flush_tlb_lpid_page(kvm->arch.lpid, addr, psize);
 }
 
 static void kvmppc_radix_flush_pwc(struct kvm *kvm)
 {
-	unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
-
-	asm volatile("ptesync": : :"memory");
-	/* RIC=1 PRS=0 R=1 IS=2 */
-	asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
-		     : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
-	asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+	radix__flush_pwc_lpid(kvm->arch.lpid);
 }
 
 unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
-- 
cgit v1.2.3


From 9a4506e11b9717db2e03c8eedc14d2baaf78b66b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 17 May 2018 17:06:29 +1000
Subject: KVM: PPC: Book3S HV: Make radix handle process scoped LPID flush in
 C, with relocation on

The radix guest code can has fewer restrictions about what context it
can run in, so move this flushing out of assembly and have it use the
Linux TLB flush implementations introduced previously.

This allows powerpc:tlbie trace events to be used.

This changes the tlbiel sequence to only execute RIC=2 flush once on
the first set flushed, then RIC=0 for the rest of the sets. The end
result of the flush should be unchanged. This matches the local PID
flush pattern that was introduced in a5998fcb92 ("powerpc/mm/radix:
Optimise tlbiel flush all case").

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c            | 26 ++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 13 ++++++-------
 2 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 336e3468e700..9b6a118ea771 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2958,6 +2958,32 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
 		spin_unlock(&core_info.vc[sub]->lock);
 
+	if (kvm_is_radix(vc->kvm)) {
+		int tmp = pcpu;
+
+		/*
+		 * Do we need to flush the process scoped TLB for the LPAR?
+		 *
+		 * On POWER9, individual threads can come in here, but the
+		 * TLB is shared between the 4 threads in a core, hence
+		 * invalidating on one thread invalidates for all.
+		 * Thus we make all 4 threads use the same bit here.
+		 *
+		 * Hash must be flushed in realmode in order to use tlbiel.
+		 */
+		mtspr(SPRN_LPID, vc->kvm->arch.lpid);
+		isync();
+
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			tmp &= ~0x3UL;
+
+		if (cpumask_test_cpu(tmp, &vc->kvm->arch.need_tlb_flush)) {
+			radix__local_flush_tlb_lpid_guest(vc->kvm->arch.lpid);
+			/* Clear the bit after the TLB flush */
+			cpumask_clear_cpu(tmp, &vc->kvm->arch.need_tlb_flush);
+		}
+	}
+
 	/*
 	 * Interrupts will be enabled once we get into the guest,
 	 * so tell lockdep that we're about to enable interrupts.
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 07ca1b2a7966..ef9e665fc8e2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -636,6 +636,10 @@ kvmppc_hv_entry:
 	/* Primary thread switches to guest partition. */
 	cmpwi	r6,0
 	bne	10f
+
+	/* Radix has already switched LPID and flushed core TLB */
+	bne	cr7, 22f
+
 	lwz	r7,KVM_LPID(r9)
 BEGIN_FTR_SECTION
 	ld	r6,KVM_SDR1(r9)
@@ -647,7 +651,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	mtspr	SPRN_LPID,r7
 	isync
 
-	/* See if we need to flush the TLB */
+	/* See if we need to flush the TLB. Hash has to be done in RM */
 	lhz	r6,PACAPACAINDEX(r13)	/* test_bit(cpu, need_tlb_flush) */
 BEGIN_FTR_SECTION
 	/*
@@ -674,15 +678,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	li	r7,0x800		/* IS field = 0b10 */
 	ptesync
 	li	r0,0			/* RS for P9 version of tlbiel */
-	bne	cr7, 29f
 28:	tlbiel	r7			/* On P9, rs=0, RIC=0, PRS=0, R=0 */
 	addi	r7,r7,0x1000
 	bdnz	28b
-	b	30f
-29:	PPC_TLBIEL(7,0,2,1,1)		/* for radix, RIC=2, PRS=1, R=1 */
-	addi	r7,r7,0x1000
-	bdnz	29b
-30:	ptesync
+	ptesync
 23:	ldarx	r7,0,r6			/* clear the bit after TLB flushed */
 	andc	r7,r7,r8
 	stdcx.	r7,0,r6
-- 
cgit v1.2.3


From bc64dd0e1c4eddbec75dd5aa86b60c2a834aaef3 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 17 May 2018 17:06:30 +1000
Subject: KVM: PPC: Book3S HV: radix: Refine IO region partition scope
 attributes

When the radix fault handler has no page from the process address
space (e.g., for IO memory), it looks up the process pte and sets
partition table pte using that to get attributes like CI and guarded.
If the process table entry is to be writable, set _PAGE_DIRTY as well
to avoid an RC update. If not, then ensure _PAGE_DIRTY does not come
across. Set _PAGE_ACCESSED as well to avoid RC update.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index e55db915af49..b0ba3628adc2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -674,9 +674,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			unsigned long mask = (1ul << shift) - PAGE_SIZE;
 			pte = __pte(pte_val(pte) | (hva & mask));
 		}
-		if (!(writing || upgrade_write))
-			pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
-		pte = __pte(pte_val(pte) | _PAGE_EXEC);
+		pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
+		if (writing || upgrade_write) {
+			if (pte_val(pte) & _PAGE_WRITE)
+				pte = __pte(pte_val(pte) | _PAGE_DIRTY);
+		} else {
+			pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
+		}
 	}
 
 	/* Allocate space in the tree and write the PTE */
-- 
cgit v1.2.3


From 878cf2bb2d8d6164df7b63b2239859f99fea212a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 17 May 2018 17:06:31 +1000
Subject: KVM: PPC: Book3S HV: radix: Do not clear partition PTE when RC or
 write bits do not match

Adding the write bit and RC bits to pte permissions does not require a
pte clear and flush. There should not be other bits changed here,
because restricting access or changing the PFN must have already
invalidated any existing ptes (otherwise the race is already lost).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 68 +++++++++++++++++++++++-----------
 1 file changed, 47 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index b0ba3628adc2..176f911ee983 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -156,7 +156,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm)
 	radix__flush_pwc_lpid(kvm->arch.lpid);
 }
 
-unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
+static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
 				      unsigned long clr, unsigned long set,
 				      unsigned long addr, unsigned int shift)
 {
@@ -360,6 +360,15 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
 	kvmppc_unmap_free_pmd(kvm, pmd, false);
 }
 
+/*
+ * There are a number of bits which may differ between different faults to
+ * the same partition scope entry. RC bits, in the course of cleaning and
+ * aging. And the write bit can change, either the access could have been
+ * upgraded, or a read fault could happen concurrently with a write fault
+ * that sets those bits first.
+ */
+#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
+
 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			     unsigned int level, unsigned long mmu_seq)
 {
@@ -404,19 +413,28 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	if (pud_huge(*pud)) {
 		unsigned long hgpa = gpa & PUD_MASK;
 
+		/* Check if we raced and someone else has set the same thing */
+		if (level == 2) {
+			if (pud_raw(*pud) == pte_raw(pte)) {
+				ret = 0;
+				goto out_unlock;
+			}
+			/* Valid 1GB page here already, add our extra bits */
+			WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) &
+							PTE_BITS_MUST_MATCH);
+			kvmppc_radix_update_pte(kvm, (pte_t *)pud,
+					      0, pte_val(pte), hgpa, PUD_SHIFT);
+			ret = 0;
+			goto out_unlock;
+		}
 		/*
 		 * If we raced with another CPU which has just put
 		 * a 1GB pte in after we saw a pmd page, try again.
 		 */
-		if (level <= 1 && !new_pmd) {
+		if (!new_pmd) {
 			ret = -EAGAIN;
 			goto out_unlock;
 		}
-		/* Check if we raced and someone else has set the same thing */
-		if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
-			ret = 0;
-			goto out_unlock;
-		}
 		/* Valid 1GB page here already, remove it */
 		kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT);
 	}
@@ -443,19 +461,29 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	if (pmd_is_leaf(*pmd)) {
 		unsigned long lgpa = gpa & PMD_MASK;
 
+		/* Check if we raced and someone else has set the same thing */
+		if (level == 1) {
+			if (pmd_raw(*pmd) == pte_raw(pte)) {
+				ret = 0;
+				goto out_unlock;
+			}
+			/* Valid 2MB page here already, add our extra bits */
+			WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
+							PTE_BITS_MUST_MATCH);
+			kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+					      0, pte_val(pte), lgpa, PMD_SHIFT);
+			ret = 0;
+			goto out_unlock;
+		}
+
 		/*
 		 * If we raced with another CPU which has just put
 		 * a 2MB pte in after we saw a pte page, try again.
 		 */
-		if (level == 0 && !new_ptep) {
+		if (!new_ptep) {
 			ret = -EAGAIN;
 			goto out_unlock;
 		}
-		/* Check if we raced and someone else has set the same thing */
-		if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
-			ret = 0;
-			goto out_unlock;
-		}
 		/* Valid 2MB page here already, remove it */
 		kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT);
 	}
@@ -480,19 +508,17 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 	}
 	ptep = pte_offset_kernel(pmd, gpa);
 	if (pte_present(*ptep)) {
-		unsigned long old;
-
 		/* Check if someone else set the same thing */
 		if (pte_raw(*ptep) == pte_raw(pte)) {
 			ret = 0;
 			goto out_unlock;
 		}
-		/* PTE was previously valid, so invalidate it */
-		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
-					      0, gpa, 0);
-		kvmppc_radix_tlbie_page(kvm, gpa, 0);
-		if (old & _PAGE_DIRTY)
-			mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+		/* Valid page here already, add our extra bits */
+		WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) &
+							PTE_BITS_MUST_MATCH);
+		kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0);
+		ret = 0;
+		goto out_unlock;
 	}
 	kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
 	ret = 0;
-- 
cgit v1.2.3


From eadce3b48b5a8ffec7c8abbd4950a501c91d2515 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 18 May 2018 03:49:43 +1000
Subject: KVM: PPC: Book3S HV: Fix kvmppc_bad_host_intr for real mode
 interrupts

When CONFIG_RELOCATABLE=n, the Linux real mode interrupt handlers call
into KVM using real address. This needs to be translated to the kernel
linear effective address before the MMU is switched on.

kvmppc_bad_host_intr misses adding these bits, so when it is used to
handle a system reset interrupt (that always gets delivered in real
mode), it results in an instruction access fault immediately after
the MMU is turned on.

Fix this by ensuring the top 2 address bits are set when the MMU is
turned on.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index ef9e665fc8e2..5e6e493e065e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -3568,6 +3568,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	bcl	20, 31, .+4
 5:	mflr	r3
 	addi	r3, r3, 9f - 5b
+	li	r4, -1
+	rldimi	r3, r4, 62, 0	/* ensure 0xc000000000000000 bits are set */
 	ld	r4, PACAKMSR(r13)
 	mtspr	SPRN_SRR0, r3
 	mtspr	SPRN_SRR1, r4
-- 
cgit v1.2.3


From 7c1bd80cc216e7255bfabb94222676b51ab6868e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 18 May 2018 03:49:44 +1000
Subject: KVM: PPC: Book3S HV: Send kvmppc_bad_interrupt NMIs to Linux handlers

It's possible to take a SRESET or MCE in these paths due to a bug
in the host code or a NMI IPI, etc. A recent bug attempting to load
a virtual address from real mode gave th complete but cryptic error,
abridged:

      Oops: Bad interrupt in KVM entry/exit code, sig: 6 [#1]
      LE SMP NR_CPUS=2048 NUMA PowerNV
      CPU: 53 PID: 6582 Comm: qemu-system-ppc Not tainted
      NIP:  c0000000000155ac LR: c0000000000c2430 CTR: c000000000015580
      REGS: c000000fff76dd80 TRAP: 0200   Not tainted
      MSR:  9000000000201003 <SF,HV,ME,RI,LE>  CR: 48082222  XER: 00000000
      CFAR: 0000000102900ef0 DAR: d00017fffd941a28 DSISR: 00000040 SOFTE: 3
      NIP [c0000000000155ac] perf_trace_tlbie+0x2c/0x1a0
      LR [c0000000000c2430] do_tlbies+0x230/0x2f0

Sending the NMIs through the Linux handlers gives a nicer output:

      Severe Machine check interrupt [Not recovered]
        NIP [c0000000000155ac]: perf_trace_tlbie+0x2c/0x1a0
        Initiator: CPU
        Error type: Real address [Load (bad)]
          Effective address: d00017fffcc01a28
      opal: Machine check interrupt unrecoverable: MSR(RI=0)
      opal: Hardware platform error: Unrecoverable Machine Check exception
      CPU: 0 PID: 6700 Comm: qemu-system-ppc Tainted: G   M
      NIP:  c0000000000155ac LR: c0000000000c23c0 CTR: c000000000015580
      REGS: c000000fff9e9d80 TRAP: 0200   Tainted: G   M
      MSR:  9000000000201001 <SF,HV,ME,LE>  CR: 48082222  XER: 00000000
      CFAR: 000000010cbc1a30 DAR: d00017fffcc01a28 DSISR: 00000040 SOFTE: 3
      NIP [c0000000000155ac] perf_trace_tlbie+0x2c/0x1a0
      LR [c0000000000c23c0] do_tlbies+0x1c0/0x280

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 2b127586be30..d4a3f4da409b 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -18,6 +18,7 @@
 #include <linux/cma.h>
 #include <linux/bitops.h>
 
+#include <asm/asm-prototypes.h>
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -633,7 +634,19 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 
 void kvmppc_bad_interrupt(struct pt_regs *regs)
 {
-	die("Bad interrupt in KVM entry/exit code", regs, SIGABRT);
+	/*
+	 * 100 could happen at any time, 200 can happen due to invalid real
+	 * address access for example (or any time due to a hardware problem).
+	 */
+	if (TRAP(regs) == 0x100) {
+		get_paca()->in_nmi++;
+		system_reset_exception(regs);
+		get_paca()->in_nmi--;
+	} else if (TRAP(regs) == 0x200) {
+		machine_check_exception(regs);
+	} else {
+		die("Bad interrupt in KVM entry/exit code", regs, SIGABRT);
+	}
 	panic("Bad KVM trap");
 }
 
-- 
cgit v1.2.3


From ec531d027ab29b0cfa1c80c8af561b0e74bd4283 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Fri, 18 May 2018 21:49:28 +1000
Subject: KVM: PPC: Book3S PR: Enable use on POWER9 inside HPT-mode guests

This relaxes the restriction on using PR KVM on POWER9.  The existing
code does work inside a guest partition running in HPT mode, because
hypercalls such as H_ENTER use the old HPTE format, not the new
format used by POWER9, and so no change to PR KVM's HPT manipulation
code is required.  PR KVM will still refuse to run if the kernel is
using radix translation or if it is running bare-metal.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 67061d399cd9..3d0251edc13c 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1735,9 +1735,16 @@ static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
 static int kvmppc_core_check_processor_compat_pr(void)
 {
 	/*
-	 * Disable KVM for Power9 untill the required bits merged.
+	 * PR KVM can work on POWER9 inside a guest partition
+	 * running in HPT mode.  It can't work if we are using
+	 * radix translation (because radix provides no way for
+	 * a process to have unique translations in quadrant 3)
+	 * or in a bare-metal HPT-mode host (because POWER9
+	 * uses a modified HPTE format which the PR KVM code
+	 * has not been adapted to use).
 	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
+	if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+	    (radix_enabled() || cpu_has_feature(CPU_FTR_HVMODE)))
 		return -EIO;
 	return 0;
 }
-- 
cgit v1.2.3


From 46c4a30b0b1638c9b87dfea41436c5699e9fe24e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 10 May 2018 12:13:47 +0100
Subject: arm64: KVM: Use lm_alias() for kvm_ksym_ref()

For historical reasons, we open-code lm_alias() in kvm_ksym_ref().

Let's use lm_alias() to avoid duplication and make things clearer.

As we have to pull this from <linux/mm.h> (which is not safe for
inclusion in assembly), we may as well move the kvm_ksym_ref()
definition into the existing !__ASSEMBLY__ block.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Christoffer Dall <christoffer.dall@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: kvmarm@lists.cs.columbia.edu
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_asm.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index f6648a3e4152..a9ceeec5a76f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,16 +33,19 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+#ifndef __ASSEMBLY__
+
+#include <linux/mm.h>
+
 /* Translate a kernel address of @sym into its equivalent linear mapping */
 #define kvm_ksym_ref(sym)						\
 	({								\
 		void *val = &sym;					\
 		if (!is_kernel_in_hyp_mode())				\
-			val = phys_to_virt((u64)&sym - kimage_voffset);	\
+			val = lm_alias(&sym);				\
 		val;							\
 	 })
 
-#ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
 
-- 
cgit v1.2.3


From 21325631f395b1885a0979ac2eb9838ab2036526 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Tue, 17 Apr 2018 16:40:02 +0100
Subject: MIPS: memset.S: Reinstate delay slot indentation

Assembly language within the MIPS kernel conventionally indents
instructions which are in a branch delay slot to make them easier to
see. Commit 8483b14aaa81 ("MIPS: lib: memset: Whitespace fixes") rather
inexplicably removed all of these indentations from memset.S. Reinstate
the convention for all instructions in a branch delay slot. This
effectively reverts the above commit, plus other locations introduced
with MIPSR6 support.

Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19111/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/lib/memset.S | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index f7327979a8f8..1cc306520a55 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -95,7 +95,7 @@
 
 	sltiu		t0, a2, STORSIZE	/* very small region? */
 	bnez		t0, .Lsmall_memset\@
-	andi		t0, a0, STORMASK	/* aligned? */
+	 andi		t0, a0, STORMASK	/* aligned? */
 
 #ifdef CONFIG_CPU_MICROMIPS
 	move		t8, a1			/* used by 'swp' instruction */
@@ -103,12 +103,12 @@
 #endif
 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
 	beqz		t0, 1f
-	PTR_SUBU	t0, STORSIZE		/* alignment in bytes */
+	 PTR_SUBU	t0, STORSIZE		/* alignment in bytes */
 #else
 	.set		noat
 	li		AT, STORSIZE
 	beqz		t0, 1f
-	PTR_SUBU	t0, AT			/* alignment in bytes */
+	 PTR_SUBU	t0, AT			/* alignment in bytes */
 	.set		at
 #endif
 
@@ -149,7 +149,7 @@
 1:	ori		t1, a2, 0x3f		/* # of full blocks */
 	xori		t1, 0x3f
 	beqz		t1, .Lmemset_partial\@	/* no block to fill */
-	andi		t0, a2, 0x40-STORSIZE
+	 andi		t0, a2, 0x40-STORSIZE
 
 	PTR_ADDU	t1, a0			/* end address */
 	.set		reorder
@@ -174,7 +174,7 @@
 	.set		at
 #endif
 	jr		t1
-	PTR_ADDU	a0, t0			/* dest ptr */
+	 PTR_ADDU	a0, t0			/* dest ptr */
 
 	.set		push
 	.set		noreorder
@@ -186,7 +186,7 @@
 
 	beqz		a2, 1f
 #ifndef CONFIG_CPU_MIPSR6
-	PTR_ADDU	a0, a2			/* What's left */
+	 PTR_ADDU	a0, a2			/* What's left */
 	R10KCBARRIER(0(ra))
 #ifdef __MIPSEB__
 	EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
@@ -194,7 +194,7 @@
 	EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@)
 #endif
 #else
-	PTR_SUBU	t0, $0, a2
+	 PTR_SUBU	t0, $0, a2
 	PTR_ADDIU	t0, 1
 	STORE_BYTE(0)
 	STORE_BYTE(1)
@@ -210,11 +210,11 @@
 0:
 #endif
 1:	jr		ra
-	move		a2, zero
+	 move		a2, zero
 
 .Lsmall_memset\@:
 	beqz		a2, 2f
-	PTR_ADDU	t1, a0, a2
+	 PTR_ADDU	t1, a0, a2
 
 1:	PTR_ADDIU	a0, 1			/* fill bytewise */
 	R10KCBARRIER(0(ra))
@@ -222,7 +222,7 @@
 	 EX(sb, a1, -1(a0), .Lsmall_fixup\@)
 
 2:	jr		ra			/* done */
-	move		a2, zero
+	 move		a2, zero
 	.if __memset == 1
 	END(memset)
 	.set __memset, 0
@@ -238,7 +238,7 @@
 
 .Lfirst_fixup\@:
 	jr	ra
-	nop
+	 nop
 
 .Lfwd_fixup\@:
 	PTR_L		t0, TI_TASK($28)
@@ -246,7 +246,7 @@
 	LONG_L		t0, THREAD_BUADDR(t0)
 	LONG_ADDU	a2, t1
 	jr		ra
-	LONG_SUBU	a2, t0
+	 LONG_SUBU	a2, t0
 
 .Lpartial_fixup\@:
 	PTR_L		t0, TI_TASK($28)
@@ -254,7 +254,7 @@
 	LONG_L		t0, THREAD_BUADDR(t0)
 	LONG_ADDU	a2, a0
 	jr		ra
-	LONG_SUBU	a2, t0
+	 LONG_SUBU	a2, t0
 
 .Llast_fixup\@:
 	jr		ra
@@ -278,7 +278,7 @@
 LEAF(memset)
 EXPORT_SYMBOL(memset)
 	beqz		a1, 1f
-	move		v0, a0			/* result */
+	 move		v0, a0			/* result */
 
 	andi		a1, 0xff		/* spread fillword */
 	LONG_SLL		t1, a1, 8
-- 
cgit v1.2.3


From 94dd7fa1c0b75e909fa54d86ce2d1aaf2c54ceb7 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 21 May 2018 13:24:20 +0800
Subject: KVM: PPC: Add KVMPPC_VSX_COPY_WORD_LOAD_DUMP type support for mmio
 emulation

Some VSX instructions like lxvwsx will splat word into VSR. This patch
adds a new VSX copy type KVMPPC_VSX_COPY_WORD_LOAD_DUMP to support this.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/kvm/powerpc.c          | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 89f44ecc4dbd..4bade292892f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -453,6 +453,7 @@ struct mmio_hpte_cache {
 #define KVMPPC_VSX_COPY_WORD		1
 #define KVMPPC_VSX_COPY_DWORD		2
 #define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP	3
+#define KVMPPC_VSX_COPY_WORD_LOAD_DUMP	4
 
 struct openpic;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index bef27b16d233..45daf3b9d9d2 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -907,6 +907,26 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
 	}
 }
 
+static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
+	u32 gpr)
+{
+	union kvmppc_one_reg val;
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+		val.vsx32val[0] = gpr;
+		val.vsx32val[1] = gpr;
+		val.vsx32val[2] = gpr;
+		val.vsx32val[3] = gpr;
+		VCPU_VSX_VR(vcpu, index) = val.vval;
+	} else {
+		val.vsx32val[0] = gpr;
+		val.vsx32val[1] = gpr;
+		VCPU_VSX_FPR(vcpu, index, 0) = val.vsxval[0];
+		VCPU_VSX_FPR(vcpu, index, 1) = val.vsxval[0];
+	}
+}
+
 static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
 	u32 gpr32)
 {
@@ -1061,6 +1081,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		else if (vcpu->arch.mmio_vsx_copy_type ==
 				KVMPPC_VSX_COPY_DWORD_LOAD_DUMP)
 			kvmppc_set_vsr_dword_dump(vcpu, gpr);
+		else if (vcpu->arch.mmio_vsx_copy_type ==
+				KVMPPC_VSX_COPY_WORD_LOAD_DUMP)
+			kvmppc_set_vsr_word_dump(vcpu, gpr);
 		break;
 #endif
 #ifdef CONFIG_ALTIVEC
-- 
cgit v1.2.3


From 7092360399644ad4b12ac573c1996536b9e9b4b6 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 21 May 2018 13:24:21 +0800
Subject: KVM: PPC: Reimplement non-SIMD LOAD/STORE instruction mmio emulation
 with analyse_instr() input

This patch reimplements non-SIMD LOAD/STORE instruction MMIO emulation
with analyse_instr() input. It utilizes the BYTEREV/UPDATE/SIGNEXT
properties exported by analyse_instr() and invokes
kvmppc_handle_load(s)/kvmppc_handle_store() accordingly.

It also moves CACHEOP type handling into the skeleton.

instruction_type within kvm_ppc.h is renamed to avoid conflict with
sstep.h.

Suggested-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_ppc.h   |   6 +-
 arch/powerpc/kvm/book3s.c            |   4 +-
 arch/powerpc/kvm/e500_mmu_host.c     |   8 +-
 arch/powerpc/kvm/emulate_loadstore.c | 271 ++++++-----------------------------
 4 files changed, 51 insertions(+), 238 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index abe7032cdb54..139cdf0abf90 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,7 +52,7 @@ enum emulation_result {
 	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
 };
 
-enum instruction_type {
+enum instruction_fetch_type {
 	INST_GENERIC,
 	INST_SC,		/* system call */
 };
@@ -93,7 +93,7 @@ extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				int is_default_endian);
 
 extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
-				 enum instruction_type type, u32 *inst);
+				 enum instruction_fetch_type type, u32 *inst);
 
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 		     bool data);
@@ -330,7 +330,7 @@ extern struct kvmppc_ops *kvmppc_hv_ops;
 extern struct kvmppc_ops *kvmppc_pr_ops;
 
 static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu,
-					enum instruction_type type, u32 *inst)
+				enum instruction_fetch_type type, u32 *inst)
 {
 	int ret = EMULATE_DONE;
 	u32 fetched_inst;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 97d4a112648f..320cdcf84591 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -450,8 +450,8 @@ int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
 	return r;
 }
 
-int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
-					 u32 *inst)
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+		enum instruction_fetch_type type, u32 *inst)
 {
 	ulong pc = kvmppc_get_pc(vcpu);
 	int r;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index c878b4ffb86f..8f2985e46f6f 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -625,8 +625,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 }
 
 #ifdef CONFIG_KVM_BOOKE_HV
-int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
-			  u32 *instr)
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+		enum instruction_fetch_type type, u32 *instr)
 {
 	gva_t geaddr;
 	hpa_t addr;
@@ -715,8 +715,8 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
 	return EMULATE_DONE;
 }
 #else
-int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
-			  u32 *instr)
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+		enum instruction_fetch_type type, u32 *instr)
 {
 	return EMULATE_AGAIN;
 }
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index b8a3aefc3033..af7c71a0ae6f 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -31,6 +31,7 @@
 #include <asm/kvm_ppc.h>
 #include <asm/disassemble.h>
 #include <asm/ppc-opcode.h>
+#include <asm/sstep.h>
 #include "timing.h"
 #include "trace.h"
 
@@ -84,8 +85,9 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	struct kvm_run *run = vcpu->run;
 	u32 inst;
 	int ra, rs, rt;
-	enum emulation_result emulated;
+	enum emulation_result emulated = EMULATE_FAIL;
 	int advance = 1;
+	struct instruction_op op;
 
 	/* this default type might be overwritten by subcategories */
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
@@ -113,144 +115,61 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmio_vmx_copy_nums = 0;
 	vcpu->arch.mmio_host_swabbed = 0;
 
-	switch (get_op(inst)) {
-	case 31:
-		switch (get_xop(inst)) {
-		case OP_31_XOP_LWZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-			break;
-
-		case OP_31_XOP_LWZUX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_LBZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-			break;
-
-		case OP_31_XOP_LBZUX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_STDX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 8, 1);
-			break;
-
-		case OP_31_XOP_STDUX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 8, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
+	emulated = EMULATE_FAIL;
+	vcpu->arch.regs.msr = vcpu->arch.shared->msr;
+	vcpu->arch.regs.ccr = vcpu->arch.cr;
+	if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
+		int type = op.type & INSTR_TYPE_MASK;
+		int size = GETSIZE(op.type);
 
-		case OP_31_XOP_STWX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 4, 1);
-			break;
+		switch (type) {
+		case LOAD:  {
+			int instr_byte_swap = op.type & BYTEREV;
 
-		case OP_31_XOP_STWUX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 4, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
+			if (op.type & SIGNEXT)
+				emulated = kvmppc_handle_loads(run, vcpu,
+						op.reg, size, !instr_byte_swap);
+			else
+				emulated = kvmppc_handle_load(run, vcpu,
+						op.reg, size, !instr_byte_swap);
 
-		case OP_31_XOP_STBX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 1, 1);
-			break;
+			if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
 
-		case OP_31_XOP_STBUX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 1, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_LHAX:
-			emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-			break;
-
-		case OP_31_XOP_LHAUX:
-			emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_LHZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
 			break;
+		}
+		case STORE:
+			/* if need byte reverse, op.val has been reversed by
+			 * analyse_instr().
+			 */
+			emulated = kvmppc_handle_store(run, vcpu, op.val,
+					size, 1);
 
-		case OP_31_XOP_LHZUX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
+			if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
 
-		case OP_31_XOP_STHX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 2, 1);
-			break;
-
-		case OP_31_XOP_STHUX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 2, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
 			break;
-
-		case OP_31_XOP_DCBST:
-		case OP_31_XOP_DCBF:
-		case OP_31_XOP_DCBI:
+		case CACHEOP:
 			/* Do nothing. The guest is performing dcbi because
 			 * hardware DMA is not snooped by the dcache, but
 			 * emulated DMA either goes through the dcache as
 			 * normal writes, or the host kernel has handled dcache
-			 * coherence. */
-			break;
-
-		case OP_31_XOP_LWBRX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
-			break;
-
-		case OP_31_XOP_STWBRX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 4, 0);
-			break;
-
-		case OP_31_XOP_LHBRX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
+			 * coherence.
+			 */
+			emulated = EMULATE_DONE;
 			break;
-
-		case OP_31_XOP_STHBRX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 2, 0);
-			break;
-
-		case OP_31_XOP_LDBRX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 8, 0);
-			break;
-
-		case OP_31_XOP_STDBRX:
-			emulated = kvmppc_handle_store(run, vcpu,
-					kvmppc_get_gpr(vcpu, rs), 8, 0);
-			break;
-
-		case OP_31_XOP_LDX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
-			break;
-
-		case OP_31_XOP_LDUX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		default:
 			break;
+		}
+	}
 
-		case OP_31_XOP_LWAX:
-			emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
-			break;
 
-		case OP_31_XOP_LWAUX:
-			emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
+	if ((emulated == EMULATE_DONE) || (emulated == EMULATE_DO_MMIO))
+		goto out;
 
+	switch (get_op(inst)) {
+	case 31:
+		switch (get_xop(inst)) {
 #ifdef CONFIG_PPC_FPU
 		case OP_31_XOP_LFSX:
 			if (kvmppc_check_fp_disabled(vcpu))
@@ -502,10 +421,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 		}
 		break;
 
-	case OP_LWZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-		break;
-
 #ifdef CONFIG_PPC_FPU
 	case OP_STFS:
 		if (kvmppc_check_fp_disabled(vcpu))
@@ -542,110 +457,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	                               8, 1);
 		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
 		break;
-#endif
-
-	case OP_LD:
-		rt = get_rt(inst);
-		switch (inst & 3) {
-		case 0:	/* ld */
-			emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
-			break;
-		case 1: /* ldu */
-			emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-		case 2:	/* lwa */
-			emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
-			break;
-		default:
-			emulated = EMULATE_FAIL;
-		}
-		break;
-
-	case OP_LWZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LBZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-		break;
 
-	case OP_LBZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STW:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               4, 1);
-		break;
-
-	case OP_STD:
-		rs = get_rs(inst);
-		switch (inst & 3) {
-		case 0:	/* std */
-			emulated = kvmppc_handle_store(run, vcpu,
-				kvmppc_get_gpr(vcpu, rs), 8, 1);
-			break;
-		case 1: /* stdu */
-			emulated = kvmppc_handle_store(run, vcpu,
-				kvmppc_get_gpr(vcpu, rs), 8, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-		default:
-			emulated = EMULATE_FAIL;
-		}
-		break;
-
-	case OP_STWU:
-		emulated = kvmppc_handle_store(run, vcpu,
-				kvmppc_get_gpr(vcpu, rs), 4, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STB:
-		emulated = kvmppc_handle_store(run, vcpu,
-				kvmppc_get_gpr(vcpu, rs), 1, 1);
-		break;
-
-	case OP_STBU:
-		emulated = kvmppc_handle_store(run, vcpu,
-				kvmppc_get_gpr(vcpu, rs), 1, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LHZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-		break;
-
-	case OP_LHZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LHA:
-		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-		break;
-
-	case OP_LHAU:
-		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STH:
-		emulated = kvmppc_handle_store(run, vcpu,
-				kvmppc_get_gpr(vcpu, rs), 2, 1);
-		break;
-
-	case OP_STHU:
-		emulated = kvmppc_handle_store(run, vcpu,
-				kvmppc_get_gpr(vcpu, rs), 2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-#ifdef CONFIG_PPC_FPU
 	case OP_LFS:
 		if (kvmppc_check_fp_disabled(vcpu))
 			return EMULATE_DONE;
@@ -684,6 +496,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 		break;
 	}
 
+out:
 	if (emulated == EMULATE_FAIL) {
 		advance = 0;
 		kvmppc_core_queue_program(vcpu, 0);
-- 
cgit v1.2.3


From 2e6baa46b4ae785e3e954aaf9d2e8a0bb06ad33a Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 21 May 2018 13:24:22 +0800
Subject: KVM: PPC: Add giveup_ext() hook to PPC KVM ops

Currently HV will save math regs(FP/VEC/VSX) when trap into host. But
PR KVM will only save math regs when qemu task switch out of CPU, or
when returning from qemu code.

To emulate FP/VEC/VSX mmio load, PR KVM need to make sure that math
regs were flushed firstly and then be able to update saved VCPU
FPR/VEC/VSX area reasonably.

This patch adds giveup_ext() field to KVM ops. Only PR KVM has non-NULL
giveup_ext() ops. kvmppc_complete_mmio_load() can invoke that hook
(when not NULL) to flush math regs accordingly, before updating saved
register vals.

Math regs flush is also necessary for STORE, which will be covered
in later patch within this patch series.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_ppc.h | 1 +
 arch/powerpc/kvm/book3s_pr.c       | 1 +
 arch/powerpc/kvm/powerpc.c         | 9 +++++++++
 3 files changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 139cdf0abf90..1f087c4cb386 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -324,6 +324,7 @@ struct kvmppc_ops {
 	int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
 	int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
 			    unsigned long flags);
+	void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3d0251edc13c..c74a8885427d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1789,6 +1789,7 @@ static struct kvmppc_ops kvm_ops_pr = {
 #ifdef CONFIG_PPC_BOOK3S_64
 	.hcall_implemented = kvmppc_hcall_impl_pr,
 #endif
+	.giveup_ext = kvmppc_giveup_ext,
 };
 
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 45daf3b9d9d2..8ce9e7ba5fed 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1061,6 +1061,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
 		break;
 	case KVM_MMIO_REG_FPR:
+		if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+			vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_FP);
+
 		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
 		break;
 #ifdef CONFIG_PPC_BOOK3S
@@ -1074,6 +1077,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 #endif
 #ifdef CONFIG_VSX
 	case KVM_MMIO_REG_VSX:
+		if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+			vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VSX);
+
 		if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD)
 			kvmppc_set_vsr_dword(vcpu, gpr);
 		else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD)
@@ -1088,6 +1094,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 #endif
 #ifdef CONFIG_ALTIVEC
 	case KVM_MMIO_REG_VMX:
+		if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+			vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VEC);
+
 		kvmppc_set_vmx_dword(vcpu, gpr);
 		break;
 #endif
-- 
cgit v1.2.3


From 2b33cb585f940180b4c527f0de46ff226a9d25ab Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 21 May 2018 13:24:23 +0800
Subject: KVM: PPC: Reimplement LOAD_FP/STORE_FP instruction mmio emulation
 with analyse_instr() input

This patch reimplements LOAD_FP/STORE_FP instruction MMIO emulation with
analyse_instr() input. It utilizes the FPCONV/UPDATE properties exported by
analyse_instr() and invokes kvmppc_handle_load(s)/kvmppc_handle_store()
accordingly.

For FP store MMIO emulation, the FP regs need to be flushed firstly so
that the right FP reg vals can be read from vcpu->arch.fpr, which will
be stored into MMIO data.

Suggested-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/emulate_loadstore.c | 201 ++++++++---------------------------
 1 file changed, 44 insertions(+), 157 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index af7c71a0ae6f..82f13c17e055 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -138,6 +138,26 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
 			break;
 		}
+#ifdef CONFIG_PPC_FPU
+		case LOAD_FP:
+			if (kvmppc_check_fp_disabled(vcpu))
+				return EMULATE_DONE;
+
+			if (op.type & FPCONV)
+				vcpu->arch.mmio_sp64_extend = 1;
+
+			if (op.type & SIGNEXT)
+				emulated = kvmppc_handle_loads(run, vcpu,
+					     KVM_MMIO_REG_FPR|op.reg, size, 1);
+			else
+				emulated = kvmppc_handle_load(run, vcpu,
+					     KVM_MMIO_REG_FPR|op.reg, size, 1);
+
+			if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+
+			break;
+#endif
 		case STORE:
 			/* if need byte reverse, op.val has been reversed by
 			 * analyse_instr().
@@ -149,6 +169,30 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
 
 			break;
+#ifdef CONFIG_PPC_FPU
+		case STORE_FP:
+			if (kvmppc_check_fp_disabled(vcpu))
+				return EMULATE_DONE;
+
+			/* The FP registers need to be flushed so that
+			 * kvmppc_handle_store() can read actual FP vals
+			 * from vcpu->arch.
+			 */
+			if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+				vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+						MSR_FP);
+
+			if (op.type & FPCONV)
+				vcpu->arch.mmio_sp64_extend = 1;
+
+			emulated = kvmppc_handle_store(run, vcpu,
+					VCPU_FPR(vcpu, op.reg), size, 1);
+
+			if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+
+			break;
+#endif
 		case CACHEOP:
 			/* Do nothing. The guest is performing dcbi because
 			 * hardware DMA is not snooped by the dcache, but
@@ -170,93 +214,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	switch (get_op(inst)) {
 	case 31:
 		switch (get_xop(inst)) {
-#ifdef CONFIG_PPC_FPU
-		case OP_31_XOP_LFSX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_sp64_extend = 1;
-			emulated = kvmppc_handle_load(run, vcpu,
-				KVM_MMIO_REG_FPR|rt, 4, 1);
-			break;
-
-		case OP_31_XOP_LFSUX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_sp64_extend = 1;
-			emulated = kvmppc_handle_load(run, vcpu,
-				KVM_MMIO_REG_FPR|rt, 4, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_LFDX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			emulated = kvmppc_handle_load(run, vcpu,
-				KVM_MMIO_REG_FPR|rt, 8, 1);
-			break;
-
-		case OP_31_XOP_LFDUX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			emulated = kvmppc_handle_load(run, vcpu,
-				KVM_MMIO_REG_FPR|rt, 8, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_LFIWAX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			emulated = kvmppc_handle_loads(run, vcpu,
-				KVM_MMIO_REG_FPR|rt, 4, 1);
-			break;
-
-		case OP_31_XOP_LFIWZX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			emulated = kvmppc_handle_load(run, vcpu,
-				KVM_MMIO_REG_FPR|rt, 4, 1);
-			break;
-
-		case OP_31_XOP_STFSX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_sp64_extend = 1;
-			emulated = kvmppc_handle_store(run, vcpu,
-				VCPU_FPR(vcpu, rs), 4, 1);
-			break;
-
-		case OP_31_XOP_STFSUX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_sp64_extend = 1;
-			emulated = kvmppc_handle_store(run, vcpu,
-				VCPU_FPR(vcpu, rs), 4, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_STFDX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			emulated = kvmppc_handle_store(run, vcpu,
-				VCPU_FPR(vcpu, rs), 8, 1);
-			break;
-
-		case OP_31_XOP_STFDUX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			emulated = kvmppc_handle_store(run, vcpu,
-				VCPU_FPR(vcpu, rs), 8, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_STFIWX:
-			if (kvmppc_check_fp_disabled(vcpu))
-				return EMULATE_DONE;
-			emulated = kvmppc_handle_store(run, vcpu,
-				VCPU_FPR(vcpu, rs), 4, 1);
-			break;
-#endif
-
 #ifdef CONFIG_VSX
 		case OP_31_XOP_LXSDX:
 			if (kvmppc_check_vsx_disabled(vcpu))
@@ -421,76 +378,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 		}
 		break;
 
-#ifdef CONFIG_PPC_FPU
-	case OP_STFS:
-		if (kvmppc_check_fp_disabled(vcpu))
-			return EMULATE_DONE;
-		vcpu->arch.mmio_sp64_extend = 1;
-		emulated = kvmppc_handle_store(run, vcpu,
-			VCPU_FPR(vcpu, rs),
-			4, 1);
-		break;
-
-	case OP_STFSU:
-		if (kvmppc_check_fp_disabled(vcpu))
-			return EMULATE_DONE;
-		vcpu->arch.mmio_sp64_extend = 1;
-		emulated = kvmppc_handle_store(run, vcpu,
-			VCPU_FPR(vcpu, rs),
-			4, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STFD:
-		if (kvmppc_check_fp_disabled(vcpu))
-			return EMULATE_DONE;
-		emulated = kvmppc_handle_store(run, vcpu,
-			VCPU_FPR(vcpu, rs),
-	                               8, 1);
-		break;
-
-	case OP_STFDU:
-		if (kvmppc_check_fp_disabled(vcpu))
-			return EMULATE_DONE;
-		emulated = kvmppc_handle_store(run, vcpu,
-			VCPU_FPR(vcpu, rs),
-	                               8, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LFS:
-		if (kvmppc_check_fp_disabled(vcpu))
-			return EMULATE_DONE;
-		vcpu->arch.mmio_sp64_extend = 1;
-		emulated = kvmppc_handle_load(run, vcpu,
-			KVM_MMIO_REG_FPR|rt, 4, 1);
-		break;
-
-	case OP_LFSU:
-		if (kvmppc_check_fp_disabled(vcpu))
-			return EMULATE_DONE;
-		vcpu->arch.mmio_sp64_extend = 1;
-		emulated = kvmppc_handle_load(run, vcpu,
-			KVM_MMIO_REG_FPR|rt, 4, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LFD:
-		if (kvmppc_check_fp_disabled(vcpu))
-			return EMULATE_DONE;
-		emulated = kvmppc_handle_load(run, vcpu,
-			KVM_MMIO_REG_FPR|rt, 8, 1);
-		break;
-
-	case OP_LFDU:
-		if (kvmppc_check_fp_disabled(vcpu))
-			return EMULATE_DONE;
-		emulated = kvmppc_handle_load(run, vcpu,
-			KVM_MMIO_REG_FPR|rt, 8, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-#endif
-
 	default:
 		emulated = EMULATE_FAIL;
 		break;
-- 
cgit v1.2.3


From b01c78c297da45500c18adb99bcc1e08d96768d5 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 21 May 2018 13:24:24 +0800
Subject: KVM: PPC: Reimplement LOAD_VSX/STORE_VSX instruction mmio emulation
 with analyse_instr() input

This patch reimplements LOAD_VSX/STORE_VSX instruction MMIO emulation with
analyse_instr() input. It utilizes VSX_FPCONV/VSX_SPLAT/SIGNEXT exported
by analyse_instr() and handle accordingly.

When emulating VSX store, the VSX reg will need to be flushed so that
the right reg val can be retrieved before writing to IO MEM.

[paulus@ozlabs.org - mask the register number to 5 bits.]

Suggested-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/emulate_loadstore.c | 227 ++++++++++++++---------------------
 1 file changed, 91 insertions(+), 136 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 82f13c17e055..6dcec74ab249 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -157,6 +157,54 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
 
 			break;
+#endif
+#ifdef CONFIG_VSX
+		case LOAD_VSX: {
+			int io_size_each;
+
+			if (op.vsx_flags & VSX_CHECK_VEC) {
+				if (kvmppc_check_altivec_disabled(vcpu))
+					return EMULATE_DONE;
+			} else {
+				if (kvmppc_check_vsx_disabled(vcpu))
+					return EMULATE_DONE;
+			}
+
+			if (op.vsx_flags & VSX_FPCONV)
+				vcpu->arch.mmio_sp64_extend = 1;
+
+			if (op.element_size == 8)  {
+				if (op.vsx_flags & VSX_SPLAT)
+					vcpu->arch.mmio_vsx_copy_type =
+						KVMPPC_VSX_COPY_DWORD_LOAD_DUMP;
+				else
+					vcpu->arch.mmio_vsx_copy_type =
+						KVMPPC_VSX_COPY_DWORD;
+			} else if (op.element_size == 4) {
+				if (op.vsx_flags & VSX_SPLAT)
+					vcpu->arch.mmio_vsx_copy_type =
+						KVMPPC_VSX_COPY_WORD_LOAD_DUMP;
+				else
+					vcpu->arch.mmio_vsx_copy_type =
+						KVMPPC_VSX_COPY_WORD;
+			} else
+				break;
+
+			if (size < op.element_size) {
+				/* precision convert case: lxsspx, etc */
+				vcpu->arch.mmio_vsx_copy_nums = 1;
+				io_size_each = size;
+			} else { /* lxvw4x, lxvd2x, etc */
+				vcpu->arch.mmio_vsx_copy_nums =
+					size/op.element_size;
+				io_size_each = op.element_size;
+			}
+
+			emulated = kvmppc_handle_vsx_load(run, vcpu,
+					KVM_MMIO_REG_VSX | (op.reg & 0x1f),
+					io_size_each, 1, op.type & SIGNEXT);
+			break;
+		}
 #endif
 		case STORE:
 			/* if need byte reverse, op.val has been reversed by
@@ -192,6 +240,49 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
 
 			break;
+#endif
+#ifdef CONFIG_VSX
+		case STORE_VSX: {
+			int io_size_each;
+
+			if (op.vsx_flags & VSX_CHECK_VEC) {
+				if (kvmppc_check_altivec_disabled(vcpu))
+					return EMULATE_DONE;
+			} else {
+				if (kvmppc_check_vsx_disabled(vcpu))
+					return EMULATE_DONE;
+			}
+
+			if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+				vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+						MSR_VSX);
+
+			if (op.vsx_flags & VSX_FPCONV)
+				vcpu->arch.mmio_sp64_extend = 1;
+
+			if (op.element_size == 8)
+				vcpu->arch.mmio_vsx_copy_type =
+						KVMPPC_VSX_COPY_DWORD;
+			else if (op.element_size == 4)
+				vcpu->arch.mmio_vsx_copy_type =
+						KVMPPC_VSX_COPY_WORD;
+			else
+				break;
+
+			if (size < op.element_size) {
+				/* precise conversion case, like stxsspx */
+				vcpu->arch.mmio_vsx_copy_nums = 1;
+				io_size_each = size;
+			} else { /* stxvw4x, stxvd2x, etc */
+				vcpu->arch.mmio_vsx_copy_nums =
+						size/op.element_size;
+				io_size_each = op.element_size;
+			}
+
+			emulated = kvmppc_handle_vsx_store(run, vcpu,
+					op.reg & 0x1f, io_size_each, 1);
+			break;
+		}
 #endif
 		case CACHEOP:
 			/* Do nothing. The guest is performing dcbi because
@@ -214,142 +305,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	switch (get_op(inst)) {
 	case 31:
 		switch (get_xop(inst)) {
-#ifdef CONFIG_VSX
-		case OP_31_XOP_LXSDX:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 1;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
-			emulated = kvmppc_handle_vsx_load(run, vcpu,
-				KVM_MMIO_REG_VSX|rt, 8, 1, 0);
-			break;
-
-		case OP_31_XOP_LXSSPX:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 1;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
-			vcpu->arch.mmio_sp64_extend = 1;
-			emulated = kvmppc_handle_vsx_load(run, vcpu,
-				KVM_MMIO_REG_VSX|rt, 4, 1, 0);
-			break;
-
-		case OP_31_XOP_LXSIWAX:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 1;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
-			emulated = kvmppc_handle_vsx_load(run, vcpu,
-				KVM_MMIO_REG_VSX|rt, 4, 1, 1);
-			break;
-
-		case OP_31_XOP_LXSIWZX:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 1;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
-			emulated = kvmppc_handle_vsx_load(run, vcpu,
-				KVM_MMIO_REG_VSX|rt, 4, 1, 0);
-			break;
-
-		case OP_31_XOP_LXVD2X:
-		/*
-		 * In this case, the official load/store process is like this:
-		 * Step1, exit from vm by page fault isr, then kvm save vsr.
-		 * Please see guest_exit_cont->store_fp_state->SAVE_32VSRS
-		 * as reference.
-		 *
-		 * Step2, copy data between memory and VCPU
-		 * Notice: for LXVD2X/STXVD2X/LXVW4X/STXVW4X, we use
-		 * 2copies*8bytes or 4copies*4bytes
-		 * to simulate one copy of 16bytes.
-		 * Also there is an endian issue here, we should notice the
-		 * layout of memory.
-		 * Please see MARCO of LXVD2X_ROT/STXVD2X_ROT as more reference.
-		 * If host is little-endian, kvm will call XXSWAPD for
-		 * LXVD2X_ROT/STXVD2X_ROT.
-		 * So, if host is little-endian,
-		 * the postion of memeory should be swapped.
-		 *
-		 * Step3, return to guest, kvm reset register.
-		 * Please see kvmppc_hv_entry->load_fp_state->REST_32VSRS
-		 * as reference.
-		 */
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 2;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
-			emulated = kvmppc_handle_vsx_load(run, vcpu,
-				KVM_MMIO_REG_VSX|rt, 8, 1, 0);
-			break;
-
-		case OP_31_XOP_LXVW4X:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 4;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
-			emulated = kvmppc_handle_vsx_load(run, vcpu,
-				KVM_MMIO_REG_VSX|rt, 4, 1, 0);
-			break;
-
-		case OP_31_XOP_LXVDSX:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 1;
-			vcpu->arch.mmio_vsx_copy_type =
-				 KVMPPC_VSX_COPY_DWORD_LOAD_DUMP;
-			emulated = kvmppc_handle_vsx_load(run, vcpu,
-				KVM_MMIO_REG_VSX|rt, 8, 1, 0);
-			break;
-
-		case OP_31_XOP_STXSDX:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 1;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
-			emulated = kvmppc_handle_vsx_store(run, vcpu,
-						 rs, 8, 1);
-			break;
-
-		case OP_31_XOP_STXSSPX:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 1;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
-			vcpu->arch.mmio_sp64_extend = 1;
-			emulated = kvmppc_handle_vsx_store(run, vcpu,
-						 rs, 4, 1);
-			break;
-
-		case OP_31_XOP_STXSIWX:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_offset = 1;
-			vcpu->arch.mmio_vsx_copy_nums = 1;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
-			emulated = kvmppc_handle_vsx_store(run, vcpu,
-							 rs, 4, 1);
-			break;
-
-		case OP_31_XOP_STXVD2X:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 2;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
-			emulated = kvmppc_handle_vsx_store(run, vcpu,
-							 rs, 8, 1);
-			break;
-
-		case OP_31_XOP_STXVW4X:
-			if (kvmppc_check_vsx_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.mmio_vsx_copy_nums = 4;
-			vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
-			emulated = kvmppc_handle_vsx_store(run, vcpu,
-							 rs, 4, 1);
-			break;
-#endif /* CONFIG_VSX */
-
 #ifdef CONFIG_ALTIVEC
 		case OP_31_XOP_LVX:
 			if (kvmppc_check_altivec_disabled(vcpu))
-- 
cgit v1.2.3


From da2a32b876e979d74f84746ae8d066e1d54b568f Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 21 May 2018 13:24:25 +0800
Subject: KVM: PPC: Expand mmio_vsx_copy_type to cover VMX load/store element
 types

VSX MMIO emulation uses mmio_vsx_copy_type to represent VSX emulated
element size/type, such as KVMPPC_VSX_COPY_DWORD_LOAD, etc. This
patch expands mmio_vsx_copy_type to cover VMX copy type, such as
KVMPPC_VMX_COPY_BYTE(stvebx/lvebx), etc. As a result,
mmio_vsx_copy_type is also renamed to mmio_copy_type.

It is a preparation for reimplementing VMX MMIO emulation.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_host.h  |  9 +++++++--
 arch/powerpc/kvm/emulate_loadstore.c | 14 +++++++-------
 arch/powerpc/kvm/powerpc.c           | 10 +++++-----
 3 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 4bade292892f..fe506c86404a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -455,6 +455,11 @@ struct mmio_hpte_cache {
 #define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP	3
 #define KVMPPC_VSX_COPY_WORD_LOAD_DUMP	4
 
+#define KVMPPC_VMX_COPY_BYTE		8
+#define KVMPPC_VMX_COPY_HWORD		9
+#define KVMPPC_VMX_COPY_WORD		10
+#define KVMPPC_VMX_COPY_DWORD		11
+
 struct openpic;
 
 /* W0 and W1 of a XIVE thread management context */
@@ -677,16 +682,16 @@ struct kvm_vcpu_arch {
 	 * Number of simulations for vsx.
 	 * If we use 2*8bytes to simulate 1*16bytes,
 	 * then the number should be 2 and
-	 * mmio_vsx_copy_type=KVMPPC_VSX_COPY_DWORD.
+	 * mmio_copy_type=KVMPPC_VSX_COPY_DWORD.
 	 * If we use 4*4bytes to simulate 1*16bytes,
 	 * the number should be 4 and
 	 * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD.
 	 */
 	u8 mmio_vsx_copy_nums;
 	u8 mmio_vsx_offset;
-	u8 mmio_vsx_copy_type;
 	u8 mmio_vsx_tx_sx_enabled;
 	u8 mmio_vmx_copy_nums;
+	u8 mmio_copy_type;
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 6dcec74ab249..324cfbfa6fa2 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -109,7 +109,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
 	vcpu->arch.mmio_vsx_copy_nums = 0;
 	vcpu->arch.mmio_vsx_offset = 0;
-	vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
+	vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
 	vcpu->arch.mmio_sp64_extend = 0;
 	vcpu->arch.mmio_sign_extend = 0;
 	vcpu->arch.mmio_vmx_copy_nums = 0;
@@ -175,17 +175,17 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
 			if (op.element_size == 8)  {
 				if (op.vsx_flags & VSX_SPLAT)
-					vcpu->arch.mmio_vsx_copy_type =
+					vcpu->arch.mmio_copy_type =
 						KVMPPC_VSX_COPY_DWORD_LOAD_DUMP;
 				else
-					vcpu->arch.mmio_vsx_copy_type =
+					vcpu->arch.mmio_copy_type =
 						KVMPPC_VSX_COPY_DWORD;
 			} else if (op.element_size == 4) {
 				if (op.vsx_flags & VSX_SPLAT)
-					vcpu->arch.mmio_vsx_copy_type =
+					vcpu->arch.mmio_copy_type =
 						KVMPPC_VSX_COPY_WORD_LOAD_DUMP;
 				else
-					vcpu->arch.mmio_vsx_copy_type =
+					vcpu->arch.mmio_copy_type =
 						KVMPPC_VSX_COPY_WORD;
 			} else
 				break;
@@ -261,10 +261,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 				vcpu->arch.mmio_sp64_extend = 1;
 
 			if (op.element_size == 8)
-				vcpu->arch.mmio_vsx_copy_type =
+				vcpu->arch.mmio_copy_type =
 						KVMPPC_VSX_COPY_DWORD;
 			else if (op.element_size == 4)
-				vcpu->arch.mmio_vsx_copy_type =
+				vcpu->arch.mmio_copy_type =
 						KVMPPC_VSX_COPY_WORD;
 			else
 				break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8ce9e7ba5fed..1580bd24bc74 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1080,14 +1080,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		if (vcpu->kvm->arch.kvm_ops->giveup_ext)
 			vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VSX);
 
-		if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD)
+		if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_DWORD)
 			kvmppc_set_vsr_dword(vcpu, gpr);
-		else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD)
+		else if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_WORD)
 			kvmppc_set_vsr_word(vcpu, gpr);
-		else if (vcpu->arch.mmio_vsx_copy_type ==
+		else if (vcpu->arch.mmio_copy_type ==
 				KVMPPC_VSX_COPY_DWORD_LOAD_DUMP)
 			kvmppc_set_vsr_dword_dump(vcpu, gpr);
-		else if (vcpu->arch.mmio_vsx_copy_type ==
+		else if (vcpu->arch.mmio_copy_type ==
 				KVMPPC_VSX_COPY_WORD_LOAD_DUMP)
 			kvmppc_set_vsr_word_dump(vcpu, gpr);
 		break;
@@ -1260,7 +1260,7 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
 	u32 dword_offset, word_offset;
 	union kvmppc_one_reg reg;
 	int vsx_offset = 0;
-	int copy_type = vcpu->arch.mmio_vsx_copy_type;
+	int copy_type = vcpu->arch.mmio_copy_type;
 	int result = 0;
 
 	switch (copy_type) {
-- 
cgit v1.2.3


From acc9eb9305fecd958e2877c4e6cd3284d01c2e82 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Mon, 21 May 2018 13:24:26 +0800
Subject: KVM: PPC: Reimplement LOAD_VMX/STORE_VMX instruction mmio emulation
 with analyse_instr() input

This patch reimplements LOAD_VMX/STORE_VMX MMIO emulation with
analyse_instr() input. When emulating the store, the VMX reg will need to
be flushed so that the right reg val can be retrieved before writing to
IO MEM.

This patch also adds support for lvebx/lvehx/lvewx/stvebx/stvehx/stvewx
MMIO emulation. To meet the requirement of handling different element
sizes, kvmppc_handle_load128_by2x64()/kvmppc_handle_store128_by2x64()
were replaced with kvmppc_handle_vmx_load()/kvmppc_handle_vmx_store().

The framework used is similar to VSX instruction MMIO emulation.

Suggested-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_host.h  |   1 +
 arch/powerpc/include/asm/kvm_ppc.h   |  10 +-
 arch/powerpc/kvm/emulate_loadstore.c | 124 +++++++++++------
 arch/powerpc/kvm/powerpc.c           | 259 ++++++++++++++++++++++++++++-------
 4 files changed, 302 insertions(+), 92 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index fe506c86404a..8dc5e439b387 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -691,6 +691,7 @@ struct kvm_vcpu_arch {
 	u8 mmio_vsx_offset;
 	u8 mmio_vsx_tx_sx_enabled;
 	u8 mmio_vmx_copy_nums;
+	u8 mmio_vmx_offset;
 	u8 mmio_copy_type;
 	u8 osi_needed;
 	u8 osi_enabled;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 1f087c4cb386..e991821dd7fa 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -81,10 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				unsigned int rt, unsigned int bytes,
 			int is_default_endian, int mmio_sign_extend);
-extern int kvmppc_handle_load128_by2x64(struct kvm_run *run,
-		struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian);
-extern int kvmppc_handle_store128_by2x64(struct kvm_run *run,
-		struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian);
+extern int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+		unsigned int rt, unsigned int bytes, int is_default_endian);
+extern int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+		unsigned int rs, unsigned int bytes, int is_default_endian);
 extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			       u64 val, unsigned int bytes,
 			       int is_default_endian);
@@ -265,6 +265,8 @@ union kvmppc_one_reg {
 	vector128 vval;
 	u64	vsxval[2];
 	u32	vsx32val[4];
+	u16	vsx16val[8];
+	u8	vsx8val[16];
 	struct {
 		u64	addr;
 		u64	length;
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 324cfbfa6fa2..afde788be141 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -113,6 +113,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmio_sp64_extend = 0;
 	vcpu->arch.mmio_sign_extend = 0;
 	vcpu->arch.mmio_vmx_copy_nums = 0;
+	vcpu->arch.mmio_vmx_offset = 0;
 	vcpu->arch.mmio_host_swabbed = 0;
 
 	emulated = EMULATE_FAIL;
@@ -158,6 +159,46 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
 			break;
 #endif
+#ifdef CONFIG_ALTIVEC
+		case LOAD_VMX:
+			if (kvmppc_check_altivec_disabled(vcpu))
+				return EMULATE_DONE;
+
+			/* Hardware enforces alignment of VMX accesses */
+			vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1);
+			vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1);
+
+			if (size == 16) { /* lvx */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_DWORD;
+			} else if (size == 4) { /* lvewx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_WORD;
+			} else if (size == 2) { /* lvehx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_HWORD;
+			} else if (size == 1) { /* lvebx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_BYTE;
+			} else
+				break;
+
+			vcpu->arch.mmio_vmx_offset =
+				(vcpu->arch.vaddr_accessed & 0xf)/size;
+
+			if (size == 16) {
+				vcpu->arch.mmio_vmx_copy_nums = 2;
+				emulated = kvmppc_handle_vmx_load(run,
+						vcpu, KVM_MMIO_REG_VMX|op.reg,
+						8, 1);
+			} else {
+				vcpu->arch.mmio_vmx_copy_nums = 1;
+				emulated = kvmppc_handle_vmx_load(run, vcpu,
+						KVM_MMIO_REG_VMX|op.reg,
+						size, 1);
+			}
+			break;
+#endif
 #ifdef CONFIG_VSX
 		case LOAD_VSX: {
 			int io_size_each;
@@ -241,6 +282,48 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
 			break;
 #endif
+#ifdef CONFIG_ALTIVEC
+		case STORE_VMX:
+			if (kvmppc_check_altivec_disabled(vcpu))
+				return EMULATE_DONE;
+
+			/* Hardware enforces alignment of VMX accesses. */
+			vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1);
+			vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1);
+
+			if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+				vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+						MSR_VEC);
+			if (size == 16) { /* stvx */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_DWORD;
+			} else if (size == 4) { /* stvewx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_WORD;
+			} else if (size == 2) { /* stvehx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_HWORD;
+			} else if (size == 1) { /* stvebx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_BYTE;
+			} else
+				break;
+
+			vcpu->arch.mmio_vmx_offset =
+				(vcpu->arch.vaddr_accessed & 0xf)/size;
+
+			if (size == 16) {
+				vcpu->arch.mmio_vmx_copy_nums = 2;
+				emulated = kvmppc_handle_vmx_store(run,
+						vcpu, op.reg, 8, 1);
+			} else {
+				vcpu->arch.mmio_vmx_copy_nums = 1;
+				emulated = kvmppc_handle_vmx_store(run,
+						vcpu, op.reg, size, 1);
+			}
+
+			break;
+#endif
 #ifdef CONFIG_VSX
 		case STORE_VSX: {
 			int io_size_each;
@@ -298,47 +381,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 		}
 	}
 
-
-	if ((emulated == EMULATE_DONE) || (emulated == EMULATE_DO_MMIO))
-		goto out;
-
-	switch (get_op(inst)) {
-	case 31:
-		switch (get_xop(inst)) {
-#ifdef CONFIG_ALTIVEC
-		case OP_31_XOP_LVX:
-			if (kvmppc_check_altivec_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.vaddr_accessed &= ~0xFULL;
-			vcpu->arch.paddr_accessed &= ~0xFULL;
-			vcpu->arch.mmio_vmx_copy_nums = 2;
-			emulated = kvmppc_handle_load128_by2x64(run, vcpu,
-					KVM_MMIO_REG_VMX|rt, 1);
-			break;
-
-		case OP_31_XOP_STVX:
-			if (kvmppc_check_altivec_disabled(vcpu))
-				return EMULATE_DONE;
-			vcpu->arch.vaddr_accessed &= ~0xFULL;
-			vcpu->arch.paddr_accessed &= ~0xFULL;
-			vcpu->arch.mmio_vmx_copy_nums = 2;
-			emulated = kvmppc_handle_store128_by2x64(run, vcpu,
-					rs, 1);
-			break;
-#endif /* CONFIG_ALTIVEC */
-
-		default:
-			emulated = EMULATE_FAIL;
-			break;
-		}
-		break;
-
-	default:
-		emulated = EMULATE_FAIL;
-		break;
-	}
-
-out:
 	if (emulated == EMULATE_FAIL) {
 		advance = 0;
 		kvmppc_core_queue_program(vcpu, 0);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1580bd24bc74..05eccdc10fdd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -953,30 +953,110 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
 #endif /* CONFIG_VSX */
 
 #ifdef CONFIG_ALTIVEC
+static inline int kvmppc_get_vmx_offset_generic(struct kvm_vcpu *vcpu,
+		int index, int element_size)
+{
+	int offset;
+	int elts = sizeof(vector128)/element_size;
+
+	if ((index < 0) || (index >= elts))
+		return -1;
+
+	if (kvmppc_need_byteswap(vcpu))
+		offset = elts - index - 1;
+	else
+		offset = index;
+
+	return offset;
+}
+
+static inline int kvmppc_get_vmx_dword_offset(struct kvm_vcpu *vcpu,
+		int index)
+{
+	return kvmppc_get_vmx_offset_generic(vcpu, index, 8);
+}
+
+static inline int kvmppc_get_vmx_word_offset(struct kvm_vcpu *vcpu,
+		int index)
+{
+	return kvmppc_get_vmx_offset_generic(vcpu, index, 4);
+}
+
+static inline int kvmppc_get_vmx_hword_offset(struct kvm_vcpu *vcpu,
+		int index)
+{
+	return kvmppc_get_vmx_offset_generic(vcpu, index, 2);
+}
+
+static inline int kvmppc_get_vmx_byte_offset(struct kvm_vcpu *vcpu,
+		int index)
+{
+	return kvmppc_get_vmx_offset_generic(vcpu, index, 1);
+}
+
+
 static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu,
-		u64 gpr)
+	u64 gpr)
 {
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vmx_dword_offset(vcpu,
+			vcpu->arch.mmio_vmx_offset);
 	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
-	u32 hi, lo;
-	u32 di;
 
-#ifdef __BIG_ENDIAN
-	hi = gpr >> 32;
-	lo = gpr & 0xffffffff;
-#else
-	lo = gpr >> 32;
-	hi = gpr & 0xffffffff;
-#endif
+	if (offset == -1)
+		return;
+
+	val.vval = VCPU_VSX_VR(vcpu, index);
+	val.vsxval[offset] = gpr;
+	VCPU_VSX_VR(vcpu, index) = val.vval;
+}
+
+static inline void kvmppc_set_vmx_word(struct kvm_vcpu *vcpu,
+	u32 gpr32)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vmx_word_offset(vcpu,
+			vcpu->arch.mmio_vmx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (offset == -1)
+		return;
+
+	val.vval = VCPU_VSX_VR(vcpu, index);
+	val.vsx32val[offset] = gpr32;
+	VCPU_VSX_VR(vcpu, index) = val.vval;
+}
 
-	di = 2 - vcpu->arch.mmio_vmx_copy_nums;		/* doubleword index */
-	if (di > 1)
+static inline void kvmppc_set_vmx_hword(struct kvm_vcpu *vcpu,
+	u16 gpr16)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vmx_hword_offset(vcpu,
+			vcpu->arch.mmio_vmx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (offset == -1)
 		return;
 
-	if (vcpu->arch.mmio_host_swabbed)
-		di = 1 - di;
+	val.vval = VCPU_VSX_VR(vcpu, index);
+	val.vsx16val[offset] = gpr16;
+	VCPU_VSX_VR(vcpu, index) = val.vval;
+}
+
+static inline void kvmppc_set_vmx_byte(struct kvm_vcpu *vcpu,
+	u8 gpr8)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vmx_byte_offset(vcpu,
+			vcpu->arch.mmio_vmx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
 
-	VCPU_VSX_VR(vcpu, index).u[di * 2] = hi;
-	VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo;
+	if (offset == -1)
+		return;
+
+	val.vval = VCPU_VSX_VR(vcpu, index);
+	val.vsx8val[offset] = gpr8;
+	VCPU_VSX_VR(vcpu, index) = val.vval;
 }
 #endif /* CONFIG_ALTIVEC */
 
@@ -1097,7 +1177,16 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
 		if (vcpu->kvm->arch.kvm_ops->giveup_ext)
 			vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VEC);
 
-		kvmppc_set_vmx_dword(vcpu, gpr);
+		if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_DWORD)
+			kvmppc_set_vmx_dword(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_WORD)
+			kvmppc_set_vmx_word(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type ==
+				KVMPPC_VMX_COPY_HWORD)
+			kvmppc_set_vmx_hword(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type ==
+				KVMPPC_VMX_COPY_BYTE)
+			kvmppc_set_vmx_byte(vcpu, gpr);
 		break;
 #endif
 	default:
@@ -1376,14 +1465,16 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
 #endif /* CONFIG_VSX */
 
 #ifdef CONFIG_ALTIVEC
-/* handle quadword load access in two halves */
-int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
-		unsigned int rt, int is_default_endian)
+int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+		unsigned int rt, unsigned int bytes, int is_default_endian)
 {
 	enum emulation_result emulated = EMULATE_DONE;
 
+	if (vcpu->arch.mmio_vsx_copy_nums > 2)
+		return EMULATE_FAIL;
+
 	while (vcpu->arch.mmio_vmx_copy_nums) {
-		emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
+		emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
 				is_default_endian, 0);
 
 		if (emulated != EMULATE_DONE)
@@ -1391,55 +1482,127 @@ int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 		vcpu->arch.paddr_accessed += run->mmio.len;
 		vcpu->arch.mmio_vmx_copy_nums--;
+		vcpu->arch.mmio_vmx_offset++;
 	}
 
 	return emulated;
 }
 
-static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val)
 {
-	vector128 vrs = VCPU_VSX_VR(vcpu, rs);
-	u32 di;
-	u64 w0, w1;
+	union kvmppc_one_reg reg;
+	int vmx_offset = 0;
+	int result = 0;
+
+	vmx_offset =
+		kvmppc_get_vmx_dword_offset(vcpu, vcpu->arch.mmio_vmx_offset);
 
-	di = 2 - vcpu->arch.mmio_vmx_copy_nums;		/* doubleword index */
-	if (di > 1)
+	if (vmx_offset == -1)
 		return -1;
 
-	if (kvmppc_need_byteswap(vcpu))
-		di = 1 - di;
+	reg.vval = VCPU_VSX_VR(vcpu, index);
+	*val = reg.vsxval[vmx_offset];
 
-	w0 = vrs.u[di * 2];
-	w1 = vrs.u[di * 2 + 1];
+	return result;
+}
 
-#ifdef __BIG_ENDIAN
-	*val = (w0 << 32) | w1;
-#else
-	*val = (w1 << 32) | w0;
-#endif
-	return 0;
+int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+	union kvmppc_one_reg reg;
+	int vmx_offset = 0;
+	int result = 0;
+
+	vmx_offset =
+		kvmppc_get_vmx_word_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+	if (vmx_offset == -1)
+		return -1;
+
+	reg.vval = VCPU_VSX_VR(vcpu, index);
+	*val = reg.vsx32val[vmx_offset];
+
+	return result;
+}
+
+int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+	union kvmppc_one_reg reg;
+	int vmx_offset = 0;
+	int result = 0;
+
+	vmx_offset =
+		kvmppc_get_vmx_hword_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+	if (vmx_offset == -1)
+		return -1;
+
+	reg.vval = VCPU_VSX_VR(vcpu, index);
+	*val = reg.vsx16val[vmx_offset];
+
+	return result;
+}
+
+int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+	union kvmppc_one_reg reg;
+	int vmx_offset = 0;
+	int result = 0;
+
+	vmx_offset =
+		kvmppc_get_vmx_byte_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+	if (vmx_offset == -1)
+		return -1;
+
+	reg.vval = VCPU_VSX_VR(vcpu, index);
+	*val = reg.vsx8val[vmx_offset];
+
+	return result;
 }
 
-/* handle quadword store in two halves */
-int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
-		unsigned int rs, int is_default_endian)
+int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+		unsigned int rs, unsigned int bytes, int is_default_endian)
 {
 	u64 val = 0;
+	unsigned int index = rs & KVM_MMIO_REG_MASK;
 	enum emulation_result emulated = EMULATE_DONE;
 
+	if (vcpu->arch.mmio_vsx_copy_nums > 2)
+		return EMULATE_FAIL;
+
 	vcpu->arch.io_gpr = rs;
 
 	while (vcpu->arch.mmio_vmx_copy_nums) {
-		if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1)
+		switch (vcpu->arch.mmio_copy_type) {
+		case KVMPPC_VMX_COPY_DWORD:
+			if (kvmppc_get_vmx_dword(vcpu, index, &val) == -1)
+				return EMULATE_FAIL;
+
+			break;
+		case KVMPPC_VMX_COPY_WORD:
+			if (kvmppc_get_vmx_word(vcpu, index, &val) == -1)
+				return EMULATE_FAIL;
+			break;
+		case KVMPPC_VMX_COPY_HWORD:
+			if (kvmppc_get_vmx_hword(vcpu, index, &val) == -1)
+				return EMULATE_FAIL;
+			break;
+		case KVMPPC_VMX_COPY_BYTE:
+			if (kvmppc_get_vmx_byte(vcpu, index, &val) == -1)
+				return EMULATE_FAIL;
+			break;
+		default:
 			return EMULATE_FAIL;
+		}
 
-		emulated = kvmppc_handle_store(run, vcpu, val, 8,
+		emulated = kvmppc_handle_store(run, vcpu, val, bytes,
 				is_default_endian);
 		if (emulated != EMULATE_DONE)
 			break;
 
 		vcpu->arch.paddr_accessed += run->mmio.len;
 		vcpu->arch.mmio_vmx_copy_nums--;
+		vcpu->arch.mmio_vmx_offset++;
 	}
 
 	return emulated;
@@ -1454,11 +1617,11 @@ static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu,
 	vcpu->arch.paddr_accessed += run->mmio.len;
 
 	if (!vcpu->mmio_is_write) {
-		emulated = kvmppc_handle_load128_by2x64(run, vcpu,
-				vcpu->arch.io_gpr, 1);
+		emulated = kvmppc_handle_vmx_load(run, vcpu,
+				vcpu->arch.io_gpr, run->mmio.len, 1);
 	} else {
-		emulated = kvmppc_handle_store128_by2x64(run, vcpu,
-				vcpu->arch.io_gpr, 1);
+		emulated = kvmppc_handle_vmx_store(run, vcpu,
+				vcpu->arch.io_gpr, run->mmio.len, 1);
 	}
 
 	switch (emulated) {
@@ -1602,8 +1765,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		}
 #endif
 #ifdef CONFIG_ALTIVEC
-		if (vcpu->arch.mmio_vmx_copy_nums > 0)
+		if (vcpu->arch.mmio_vmx_copy_nums > 0) {
 			vcpu->arch.mmio_vmx_copy_nums--;
+			vcpu->arch.mmio_vmx_offset++;
+		}
 
 		if (vcpu->arch.mmio_vmx_copy_nums > 0) {
 			r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run);
-- 
cgit v1.2.3


From 6514dc380d35570ef8b0cf107d388fe3169cca11 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 26 Apr 2018 16:09:12 -0700
Subject: kvm: nVMX: Use nested_run_pending rather than from_vmentry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When saving a vCPU's nested state, the vmcs02 is discarded. Only the
shadow vmcs12 is saved. The shadow vmcs12 contains all of the
information needed to reconstruct an equivalent vmcs02 on restore, but
we have to be able to deal with two contexts:

1. The nested state was saved immediately after an emulated VM-entry,
   before the vmcs02 was ever launched.

2. The nested state was saved some time after the first successful
   launch of the vmcs02.

Though it's an implementation detail rather than an architected bit,
vmx->nested_run_pending serves to distinguish between these two
cases. Hence, we save it as part of the vCPU's nested state. (Yes,
this is ugly.)

Even when restoring from a checkpoint, it may be necessary to build
the vmcs02 as if prepare_vmcs02 was called from nested_vmx_run. So,
the 'from_vmentry' argument should be dropped, and
vmx->nested_run_pending should be consulted instead. The nested state
restoration code then has to set vmx->nested_run_pending prior to
calling prepare_vmcs02. It's important that the restoration code set
vmx->nested_run_pending anyway, since the flag impacts things like
interrupt delivery as well.

Fixes: cf8b84f48a59 ("kvm: nVMX: Prepare for checkpointing L2 state")
Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ea098131dcce..95c05581a06f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10882,8 +10882,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 	return 0;
 }
 
-static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-			       bool from_vmentry)
+static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -11017,13 +11016,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
  * is assigned to entry_failure_code on failure.
  */
 static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-			  bool from_vmentry, u32 *entry_failure_code)
+			  u32 *entry_failure_code)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exec_control, vmcs12_exec_ctrl;
 
 	if (vmx->nested.dirty_vmcs12) {
-		prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
+		prepare_vmcs02_full(vcpu, vmcs12);
 		vmx->nested.dirty_vmcs12 = false;
 	}
 
@@ -11043,7 +11042,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	 * HOST_FS_BASE, HOST_GS_BASE.
 	 */
 
-	if (from_vmentry &&
+	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
 		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
 		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
@@ -11051,7 +11050,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
 		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
 	}
-	if (from_vmentry) {
+	if (vmx->nested.nested_run_pending) {
 		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			     vmcs12->vm_entry_intr_info_field);
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
@@ -11183,7 +11182,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 			~VM_ENTRY_IA32E_MODE) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
 
-	if (from_vmentry &&
+	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
 		vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
 		vcpu->arch.pat = vmcs12->guest_ia32_pat;
@@ -11251,7 +11250,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmx_set_cr4(vcpu, vmcs12->guest_cr4);
 	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
 
-	if (from_vmentry &&
+	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
 		vcpu->arch.efer = vmcs12->guest_ia32_efer;
 	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
@@ -11429,7 +11428,7 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	return 0;
 }
 
-static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -11449,7 +11448,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
 		vcpu->arch.tsc_offset += vmcs12->tsc_offset;
 
 	r = EXIT_REASON_INVALID_STATE;
-	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual))
+	if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
 		goto fail;
 
 	nested_get_vmcs12_pages(vcpu, vmcs12);
@@ -11551,20 +11550,22 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 * the nested entry.
 	 */
 
-	ret = enter_vmx_non_root_mode(vcpu, true);
-	if (ret)
+	vmx->nested.nested_run_pending = 1;
+	ret = enter_vmx_non_root_mode(vcpu);
+	if (ret) {
+		vmx->nested.nested_run_pending = 0;
 		return ret;
+	}
 
 	/*
 	 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
 	 * by event injection, halt vcpu.
 	 */
 	if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
-	    !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
+	    !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) {
+		vmx->nested.nested_run_pending = 0;
 		return kvm_vcpu_halt(vcpu);
-
-	vmx->nested.nested_run_pending = 1;
-
+	}
 	return 1;
 
 out:
@@ -12625,7 +12626,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
 
 	if (vmx->nested.smm.guest_mode) {
 		vcpu->arch.hflags &= ~HF_SMM_MASK;
-		ret = enter_vmx_non_root_mode(vcpu, false);
+		ret = enter_vmx_non_root_mode(vcpu);
 		vcpu->arch.hflags |= HF_SMM_MASK;
 		if (ret)
 			return ret;
-- 
cgit v1.2.3


From 899a31f509ee1c6f7008c3265d1f625bfcb23311 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 23 Apr 2018 10:04:26 +0200
Subject: KVM: x86: use timespec64 for KVM_HC_CLOCK_PAIRING
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hypercall was added using a struct timespec based implementation,
but we should not use timespec in new code.

This changes it to timespec64. There is no functional change
here since the implementation is only used in 64-bit kernels
that use the same definition for timespec and timespec64.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 182693f8bd71..ba55be9b5c27 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1764,7 +1764,7 @@ static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
 	return mode;
 }
 
-static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
+static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
 {
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 	unsigned long seq;
@@ -1797,7 +1797,7 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
 }
 
 /* returns true if host is using TSC based clocksource */
-static bool kvm_get_walltime_and_clockread(struct timespec *ts,
+static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
 					   u64 *tsc_timestamp)
 {
 	/* checked again under seqlock below */
@@ -6626,7 +6626,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
 			        unsigned long clock_type)
 {
 	struct kvm_clock_pairing clock_pairing;
-	struct timespec ts;
+	struct timespec64 ts;
 	u64 cycle;
 	int ret;
 
-- 
cgit v1.2.3


From b348e7933c41b973dd953c4265ad1a60222c8ccf Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Tue, 1 May 2018 15:40:27 -0700
Subject: KVM: nVMX: Restore the VMCS12 offsets for v4.0 fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changing the VMCS12 layout will break save/restore compatibility with
older kvm releases once the KVM_{GET,SET}_NESTED_STATE ioctls are
accepted upstream. Google has already been using these ioctls for some
time, and we implore the community not to disturb the existing layout.

Move the four most recently added fields to preserve the offsets of
the previously defined fields and reserve locations for the vmread and
vmwrite bitmaps, which will be used in the virtualization of VMCS
shadowing (to improve the performance of double-nesting).

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
[Kept the SDM order in vmcs_field_to_offset_table. - Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/vmx.h |  2 ++
 arch/x86/kvm/vmx.c         | 25 ++++++++++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 5db8b0b10766..425e6b8b9547 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -207,7 +207,9 @@ enum vmcs_field {
 	EPTP_LIST_ADDRESS               = 0x00002024,
 	EPTP_LIST_ADDRESS_HIGH          = 0x00002025,
 	VMREAD_BITMAP                   = 0x00002026,
+	VMREAD_BITMAP_HIGH              = 0x00002027,
 	VMWRITE_BITMAP                  = 0x00002028,
+	VMWRITE_BITMAP_HIGH             = 0x00002029,
 	XSS_EXIT_BITMAP                 = 0x0000202C,
 	XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
 	TSC_MULTIPLIER                  = 0x00002032,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 95c05581a06f..722026c9d478 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -242,7 +242,11 @@ struct shared_msr_entry {
  * underlying hardware which will be used to run L2.
  * This structure is packed to ensure that its layout is identical across
  * machines (necessary for live migration).
- * If there are changes in this struct, VMCS12_REVISION must be changed.
+ *
+ * IMPORTANT: Changing the layout of existing fields in this structure
+ * will break save/restore compatibility with older kvm releases. When
+ * adding new fields, either use space in the reserved padding* arrays
+ * or add the new fields to the end of the structure.
  */
 typedef u64 natural_width;
 struct __packed vmcs12 {
@@ -265,17 +269,14 @@ struct __packed vmcs12 {
 	u64 virtual_apic_page_addr;
 	u64 apic_access_addr;
 	u64 posted_intr_desc_addr;
-	u64 vm_function_control;
 	u64 ept_pointer;
 	u64 eoi_exit_bitmap0;
 	u64 eoi_exit_bitmap1;
 	u64 eoi_exit_bitmap2;
 	u64 eoi_exit_bitmap3;
-	u64 eptp_list_address;
 	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
-	u64 pml_address;
 	u64 guest_ia32_debugctl;
 	u64 guest_ia32_pat;
 	u64 guest_ia32_efer;
@@ -288,7 +289,12 @@ struct __packed vmcs12 {
 	u64 host_ia32_pat;
 	u64 host_ia32_efer;
 	u64 host_ia32_perf_global_ctrl;
-	u64 padding64[8]; /* room for future expansion */
+	u64 vmread_bitmap;
+	u64 vmwrite_bitmap;
+	u64 vm_function_control;
+	u64 eptp_list_address;
+	u64 pml_address;
+	u64 padding64[3]; /* room for future expansion */
 	/*
 	 * To allow migration of L1 (complete with its L2 guests) between
 	 * machines of different natural widths (32 or 64 bit), we cannot have
@@ -397,7 +403,6 @@ struct __packed vmcs12 {
 	u16 guest_ldtr_selector;
 	u16 guest_tr_selector;
 	u16 guest_intr_status;
-	u16 guest_pml_index;
 	u16 host_es_selector;
 	u16 host_cs_selector;
 	u16 host_ss_selector;
@@ -405,12 +410,16 @@ struct __packed vmcs12 {
 	u16 host_fs_selector;
 	u16 host_gs_selector;
 	u16 host_tr_selector;
+	u16 guest_pml_index;
 };
 
 /*
  * VMCS12_REVISION is an arbitrary id that should be changed if the content or
  * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
  * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
+ *
+ * IMPORTANT: Changing this value will break save/restore compatibility with
+ * older kvm releases.
  */
 #define VMCS12_REVISION 0x11e57ed0
 
@@ -762,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
 	FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
 	FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
+	FIELD64(PML_ADDRESS, pml_address),
 	FIELD64(TSC_OFFSET, tsc_offset),
 	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
 	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
@@ -773,10 +783,11 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
 	FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
 	FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
+	FIELD64(VMREAD_BITMAP, vmread_bitmap),
+	FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
 	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
-	FIELD64(PML_ADDRESS, pml_address),
 	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
 	FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
 	FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
-- 
cgit v1.2.3


From 21ebf53b2cb74ea78299f8238a4c51b97dff421e Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Tue, 1 May 2018 15:40:28 -0700
Subject: KVM: nVMX: Ensure that VMCS12 field offsets do not change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enforce the invariant that existing VMCS12 field offsets must not
change. Experience has shown that without strict enforcement, this
invariant will not be maintained.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
[Changed the code to use BUILD_BUG_ON_MSG instead of better, but GCC 4.6
 requiring _Static_assert. - Radim.]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 722026c9d478..c34437ad5d9c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -413,6 +413,162 @@ struct __packed vmcs12 {
 	u16 guest_pml_index;
 };
 
+/*
+ * For save/restore compatibility, the vmcs12 field offsets must not change.
+ */
+#define CHECK_OFFSET(field, loc)				\
+	BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc),	\
+		"Offset of " #field " in struct vmcs12 has changed.")
+
+static inline void vmx_check_vmcs12_offsets(void) {
+	CHECK_OFFSET(revision_id, 0);
+	CHECK_OFFSET(abort, 4);
+	CHECK_OFFSET(launch_state, 8);
+	CHECK_OFFSET(io_bitmap_a, 40);
+	CHECK_OFFSET(io_bitmap_b, 48);
+	CHECK_OFFSET(msr_bitmap, 56);
+	CHECK_OFFSET(vm_exit_msr_store_addr, 64);
+	CHECK_OFFSET(vm_exit_msr_load_addr, 72);
+	CHECK_OFFSET(vm_entry_msr_load_addr, 80);
+	CHECK_OFFSET(tsc_offset, 88);
+	CHECK_OFFSET(virtual_apic_page_addr, 96);
+	CHECK_OFFSET(apic_access_addr, 104);
+	CHECK_OFFSET(posted_intr_desc_addr, 112);
+	CHECK_OFFSET(ept_pointer, 120);
+	CHECK_OFFSET(eoi_exit_bitmap0, 128);
+	CHECK_OFFSET(eoi_exit_bitmap1, 136);
+	CHECK_OFFSET(eoi_exit_bitmap2, 144);
+	CHECK_OFFSET(eoi_exit_bitmap3, 152);
+	CHECK_OFFSET(xss_exit_bitmap, 160);
+	CHECK_OFFSET(guest_physical_address, 168);
+	CHECK_OFFSET(vmcs_link_pointer, 176);
+	CHECK_OFFSET(guest_ia32_debugctl, 184);
+	CHECK_OFFSET(guest_ia32_pat, 192);
+	CHECK_OFFSET(guest_ia32_efer, 200);
+	CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
+	CHECK_OFFSET(guest_pdptr0, 216);
+	CHECK_OFFSET(guest_pdptr1, 224);
+	CHECK_OFFSET(guest_pdptr2, 232);
+	CHECK_OFFSET(guest_pdptr3, 240);
+	CHECK_OFFSET(guest_bndcfgs, 248);
+	CHECK_OFFSET(host_ia32_pat, 256);
+	CHECK_OFFSET(host_ia32_efer, 264);
+	CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
+	CHECK_OFFSET(vmread_bitmap, 280);
+	CHECK_OFFSET(vmwrite_bitmap, 288);
+	CHECK_OFFSET(vm_function_control, 296);
+	CHECK_OFFSET(eptp_list_address, 304);
+	CHECK_OFFSET(pml_address, 312);
+	CHECK_OFFSET(cr0_guest_host_mask, 344);
+	CHECK_OFFSET(cr4_guest_host_mask, 352);
+	CHECK_OFFSET(cr0_read_shadow, 360);
+	CHECK_OFFSET(cr4_read_shadow, 368);
+	CHECK_OFFSET(cr3_target_value0, 376);
+	CHECK_OFFSET(cr3_target_value1, 384);
+	CHECK_OFFSET(cr3_target_value2, 392);
+	CHECK_OFFSET(cr3_target_value3, 400);
+	CHECK_OFFSET(exit_qualification, 408);
+	CHECK_OFFSET(guest_linear_address, 416);
+	CHECK_OFFSET(guest_cr0, 424);
+	CHECK_OFFSET(guest_cr3, 432);
+	CHECK_OFFSET(guest_cr4, 440);
+	CHECK_OFFSET(guest_es_base, 448);
+	CHECK_OFFSET(guest_cs_base, 456);
+	CHECK_OFFSET(guest_ss_base, 464);
+	CHECK_OFFSET(guest_ds_base, 472);
+	CHECK_OFFSET(guest_fs_base, 480);
+	CHECK_OFFSET(guest_gs_base, 488);
+	CHECK_OFFSET(guest_ldtr_base, 496);
+	CHECK_OFFSET(guest_tr_base, 504);
+	CHECK_OFFSET(guest_gdtr_base, 512);
+	CHECK_OFFSET(guest_idtr_base, 520);
+	CHECK_OFFSET(guest_dr7, 528);
+	CHECK_OFFSET(guest_rsp, 536);
+	CHECK_OFFSET(guest_rip, 544);
+	CHECK_OFFSET(guest_rflags, 552);
+	CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
+	CHECK_OFFSET(guest_sysenter_esp, 568);
+	CHECK_OFFSET(guest_sysenter_eip, 576);
+	CHECK_OFFSET(host_cr0, 584);
+	CHECK_OFFSET(host_cr3, 592);
+	CHECK_OFFSET(host_cr4, 600);
+	CHECK_OFFSET(host_fs_base, 608);
+	CHECK_OFFSET(host_gs_base, 616);
+	CHECK_OFFSET(host_tr_base, 624);
+	CHECK_OFFSET(host_gdtr_base, 632);
+	CHECK_OFFSET(host_idtr_base, 640);
+	CHECK_OFFSET(host_ia32_sysenter_esp, 648);
+	CHECK_OFFSET(host_ia32_sysenter_eip, 656);
+	CHECK_OFFSET(host_rsp, 664);
+	CHECK_OFFSET(host_rip, 672);
+	CHECK_OFFSET(pin_based_vm_exec_control, 744);
+	CHECK_OFFSET(cpu_based_vm_exec_control, 748);
+	CHECK_OFFSET(exception_bitmap, 752);
+	CHECK_OFFSET(page_fault_error_code_mask, 756);
+	CHECK_OFFSET(page_fault_error_code_match, 760);
+	CHECK_OFFSET(cr3_target_count, 764);
+	CHECK_OFFSET(vm_exit_controls, 768);
+	CHECK_OFFSET(vm_exit_msr_store_count, 772);
+	CHECK_OFFSET(vm_exit_msr_load_count, 776);
+	CHECK_OFFSET(vm_entry_controls, 780);
+	CHECK_OFFSET(vm_entry_msr_load_count, 784);
+	CHECK_OFFSET(vm_entry_intr_info_field, 788);
+	CHECK_OFFSET(vm_entry_exception_error_code, 792);
+	CHECK_OFFSET(vm_entry_instruction_len, 796);
+	CHECK_OFFSET(tpr_threshold, 800);
+	CHECK_OFFSET(secondary_vm_exec_control, 804);
+	CHECK_OFFSET(vm_instruction_error, 808);
+	CHECK_OFFSET(vm_exit_reason, 812);
+	CHECK_OFFSET(vm_exit_intr_info, 816);
+	CHECK_OFFSET(vm_exit_intr_error_code, 820);
+	CHECK_OFFSET(idt_vectoring_info_field, 824);
+	CHECK_OFFSET(idt_vectoring_error_code, 828);
+	CHECK_OFFSET(vm_exit_instruction_len, 832);
+	CHECK_OFFSET(vmx_instruction_info, 836);
+	CHECK_OFFSET(guest_es_limit, 840);
+	CHECK_OFFSET(guest_cs_limit, 844);
+	CHECK_OFFSET(guest_ss_limit, 848);
+	CHECK_OFFSET(guest_ds_limit, 852);
+	CHECK_OFFSET(guest_fs_limit, 856);
+	CHECK_OFFSET(guest_gs_limit, 860);
+	CHECK_OFFSET(guest_ldtr_limit, 864);
+	CHECK_OFFSET(guest_tr_limit, 868);
+	CHECK_OFFSET(guest_gdtr_limit, 872);
+	CHECK_OFFSET(guest_idtr_limit, 876);
+	CHECK_OFFSET(guest_es_ar_bytes, 880);
+	CHECK_OFFSET(guest_cs_ar_bytes, 884);
+	CHECK_OFFSET(guest_ss_ar_bytes, 888);
+	CHECK_OFFSET(guest_ds_ar_bytes, 892);
+	CHECK_OFFSET(guest_fs_ar_bytes, 896);
+	CHECK_OFFSET(guest_gs_ar_bytes, 900);
+	CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
+	CHECK_OFFSET(guest_tr_ar_bytes, 908);
+	CHECK_OFFSET(guest_interruptibility_info, 912);
+	CHECK_OFFSET(guest_activity_state, 916);
+	CHECK_OFFSET(guest_sysenter_cs, 920);
+	CHECK_OFFSET(host_ia32_sysenter_cs, 924);
+	CHECK_OFFSET(vmx_preemption_timer_value, 928);
+	CHECK_OFFSET(virtual_processor_id, 960);
+	CHECK_OFFSET(posted_intr_nv, 962);
+	CHECK_OFFSET(guest_es_selector, 964);
+	CHECK_OFFSET(guest_cs_selector, 966);
+	CHECK_OFFSET(guest_ss_selector, 968);
+	CHECK_OFFSET(guest_ds_selector, 970);
+	CHECK_OFFSET(guest_fs_selector, 972);
+	CHECK_OFFSET(guest_gs_selector, 974);
+	CHECK_OFFSET(guest_ldtr_selector, 976);
+	CHECK_OFFSET(guest_tr_selector, 978);
+	CHECK_OFFSET(guest_intr_status, 980);
+	CHECK_OFFSET(host_es_selector, 982);
+	CHECK_OFFSET(host_cs_selector, 984);
+	CHECK_OFFSET(host_ss_selector, 986);
+	CHECK_OFFSET(host_ds_selector, 988);
+	CHECK_OFFSET(host_fs_selector, 990);
+	CHECK_OFFSET(host_gs_selector, 992);
+	CHECK_OFFSET(host_tr_selector, 994);
+	CHECK_OFFSET(guest_pml_index, 996);
+}
+
 /*
  * VMCS12_REVISION is an arbitrary id that should be changed if the content or
  * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
@@ -12834,6 +12990,7 @@ static int __init vmx_init(void)
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
 			   crash_vmclear_local_loaded_vmcss);
 #endif
+	vmx_check_vmcs12_offsets();
 
 	return 0;
 }
-- 
cgit v1.2.3


From d1157b1074343fa3e21c083277fc4eae162ff1e4 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Tue, 15 May 2018 23:03:09 +0100
Subject: MIPS: ptrace: Make FPU context layout comments match reality

Correct comments across ptrace(2) handlers about an FPU register context
layout discrepancy between MIPS I and later ISAs, which was fixed with
`linux-mips.org' (LMO) commit 42533948caac ("Major pile of FP emulator
changes."), the fix corrected with LMO commit 849fa7a50dff ("R3k FPU
ptrace() handling fixes."), and then broken and fixed over and over
again, until last time fixed with commit 80cbfad79096 ("MIPS: Correct
MIPS I FP context layout").

NB running the GDB test suite for the relevant ABI/ISA and watching out
for regressions is advisable when poking around ptrace(2).

Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/19326/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/kernel/ptrace.c   | 4 ++--
 arch/mips/kernel/ptrace32.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0b23b1ad99e6..1098ca8b50e9 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -797,7 +797,7 @@ long arch_ptrace(struct task_struct *child, long request,
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
-				 * registers - unless we're using r2k_switch.S.
+				 * registers.
 				 */
 				tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
 						addr & 1);
@@ -892,7 +892,7 @@ long arch_ptrace(struct task_struct *child, long request,
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
-				 * registers - unless we're using r2k_switch.S.
+				 * registers.
 				 */
 				set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
 					  addr & 1, data);
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 2b9260f92ccd..c6fc496430e9 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -103,7 +103,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
-				 * registers - unless we're using r2k_switch.S.
+				 * registers.
 				 */
 				tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE],
 						addr & 1);
@@ -216,7 +216,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
-				 * registers - unless we're using r2k_switch.S.
+				 * registers.
 				 */
 				set_fpr32(&fregs[(addr & ~1) - FPR_BASE],
 					  addr & 1, data);
-- 
cgit v1.2.3


From a1d588e951afdf24689d905d3d83beb753f6c614 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 29 Mar 2018 15:04:01 -0700
Subject: KVM: x86: remove obsolete EXPORT... of handle_mmio_page_fault
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

handle_mmio_page_fault() was recently moved to be an internal-only
MMU function, i.e. it's static and no longer defined in kvm_host.h.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/mmu.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8af8c8f88bd7..f440d43c8d5a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3715,7 +3715,6 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 	 */
 	return RET_PF_RETRY;
 }
-EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
 
 static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
 					 u32 error_code, gfn_t gfn)
-- 
cgit v1.2.3


From 86bf20cb57b9570262338752c9df580328bc5632 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 May 2018 09:01:36 +0300
Subject: KVM: x86: prevent integer overflows in KVM_MEMORY_ENCRYPT_REG_REGION
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a fix from reviewing the code, but it looks like it might be
able to lead to an Oops.  It affects 32bit systems.

The KVM_MEMORY_ENCRYPT_REG_REGION ioctl uses a u64 for range->addr and
range->size but the high 32 bits would be truncated away on a 32 bit
system.  This is harmless but it's also harmless to prevent it.

Then in sev_pin_memory() the "uaddr + ulen" calculation can wrap around.
The wrap around can happen on 32 bit or 64 bit systems, but I was only
able to figure out a problem for 32 bit systems.  We would pick a number
which results in "npages" being zero.  The sev_pin_memory() would then
return ZERO_SIZE_PTR without allocating anything.

I made it illegal to call sev_pin_memory() with "ulen" set to zero.
Hopefully, that doesn't cause any problems.  I also changed the type of
"first" and "last" to long, just for cosmetic reasons.  Otherwise on a
64 bit system you're saving "uaddr >> 12" in an int and it truncates the
high 20 bits away.  The math works in the current code so far as I can
see but it's just weird.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
[Brijesh noted that the code is only reachable on X86_64.]
Reviewed-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/svm.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 220e5a89465a..de21d5c5168b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1762,7 +1762,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
 	unsigned long npages, npinned, size;
 	unsigned long locked, lock_limit;
 	struct page **pages;
-	int first, last;
+	unsigned long first, last;
+
+	if (ulen == 0 || uaddr + ulen < uaddr)
+		return NULL;
 
 	/* Calculate number of pages. */
 	first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
@@ -6925,6 +6928,9 @@ static int svm_register_enc_region(struct kvm *kvm,
 	if (!sev_guest(kvm))
 		return -ENOTTY;
 
+	if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
+		return -EINVAL;
+
 	region = kzalloc(sizeof(*region), GFP_KERNEL);
 	if (!region)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 6bce30c7d92734e2214c9d894c1229648806f567 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Tue, 22 May 2018 17:16:12 +0300
Subject: KVM: nVMX: Use vmx local var for referencing vpid02
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c34437ad5d9c..c4845bcea96a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11374,7 +11374,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
 			if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
 				vmx->nested.last_vpid = vmcs12->virtual_processor_id;
-				__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
+				__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
 			}
 		} else {
 			vmx_flush_tlb(vcpu, true);
-- 
cgit v1.2.3


From 6f1e03bcabcdbb199940dab0a60b1371cf95f6f9 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Tue, 22 May 2018 17:16:14 +0300
Subject: KVM: nVMX: Don't flush TLB when vmcs12 uses VPID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 5c614b3583e7 ("KVM: nVMX: nested VPID emulation"),
vmcs01 and vmcs02 don't share the same VPID. vmcs01 uses vmx->vpid
while vmcs02 uses vmx->nested.vpid02. This was done such that TLB
flush could be avoided when switching between L1 and L2.

However, the above mentioned commit only changed L2 VMEntry logic to
not flush TLB when switching from L1 to L2. It forgot to also remove
the TLB flush which is done when simulating a VMExit from L2 to L1.

To fix this issue, on VMExit from L2 to L1 we flush TLB only in case
vmcs01 enables VPID and vmcs01->vpid==vmcs02->vpid. This happens when
vmcs01 enables VPID and vmcs12 does not.

Fixes: 5c614b3583e7 ("KVM: nVMX: nested VPID emulation")

Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c4845bcea96a..98f05e2b0ecc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -12104,12 +12104,20 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 
 	load_vmcs12_mmu_host_state(vcpu, vmcs12);
 
-	if (enable_vpid) {
-		/*
-		 * Trivially support vpid by letting L2s share their parent
-		 * L1's vpid. TODO: move to a more elaborate solution, giving
-		 * each L2 its own vpid and exposing the vpid feature to L1.
-		 */
+	/*
+	 * If vmcs01 don't use VPID, CPU flushes TLB on every
+	 * VMEntry/VMExit. Thus, no need to flush TLB.
+	 *
+	 * If vmcs12 uses VPID, TLB entries populated by L2 are
+	 * tagged with vmx->nested.vpid02 while L1 entries are tagged
+	 * with vmx->vpid. Thus, no need to flush TLB.
+	 *
+	 * Therefore, flush TLB only in case vmcs01 uses VPID and
+	 * vmcs12 don't use VPID as in this case L1 & L2 TLB entries
+	 * are both tagged with vmx->vpid.
+	 */
+	if (enable_vpid &&
+	    !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) {
 		vmx_flush_tlb(vcpu, true);
 	}
 
-- 
cgit v1.2.3


From cd9a491f6ef731cdee07dd0a2fe003389424a393 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Tue, 22 May 2018 17:16:15 +0300
Subject: KVM: nVMX: Emulate L1 individual-address invvpid by L0
 individual-address invvpid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When vmcs12 uses VPID, all TLB entries populated by L2 are tagged with
vmx->nested.vpid02. Currently, INVVPID executed by L1 is emulated by L0
by using INVVPID single/global-context to flush all TLB entries
tagged with vmx->nested.vpid02 regardless of INVVPID type executed by
L1.

However, we can easily optimize the case of L1 INVVPID on an
individual-address. Just INVVPID given individual-address tagged with
vmx->nested.vpid02.

Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
[Squashed with a preparatory patch that added the !operand.vpid line.]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 98f05e2b0ecc..e50beb76d846 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1572,6 +1572,11 @@ static inline bool cpu_has_vmx_invept_global(void)
 	return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
 }
 
+static inline bool cpu_has_vmx_invvpid_individual_addr(void)
+{
+	return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
+}
+
 static inline bool cpu_has_vmx_invvpid_single(void)
 {
 	return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
@@ -8513,12 +8518,19 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 
 	switch (type) {
 	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
-		if (is_noncanonical_address(operand.gla, vcpu)) {
+		if (!operand.vpid ||
+		    is_noncanonical_address(operand.gla, vcpu)) {
 			nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
 			return kvm_skip_emulated_instruction(vcpu);
 		}
-		/* fall through */
+		if (cpu_has_vmx_invvpid_individual_addr() &&
+		    vmx->nested.vpid02) {
+			__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
+				vmx->nested.vpid02, operand.gla);
+		} else
+			__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+		break;
 	case VMX_VPID_EXTENT_SINGLE_CONTEXT:
 	case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
 		if (!operand.vpid) {
@@ -8526,15 +8538,16 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
 			return kvm_skip_emulated_instruction(vcpu);
 		}
+		__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
 		break;
 	case VMX_VPID_EXTENT_ALL_CONTEXT:
+		__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
 		break;
 	default:
 		WARN_ON_ONCE(1);
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
-	__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
 	nested_vmx_succeed(vcpu);
 
 	return kvm_skip_emulated_instruction(vcpu);
-- 
cgit v1.2.3


From 0ea3286e2df74c9ec2fadbf91170cd3edd14e3e5 Mon Sep 17 00:00:00 2001
From: Jingqi Liu <jingqi.liu@intel.com>
Date: Tue, 22 May 2018 17:01:27 +0800
Subject: KVM: x86: Expose CLDEMOTE CPU feature to guest VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The CLDEMOTE instruction hints to hardware that the cache line that
contains the linear address should be moved("demoted") from
the cache(s) closest to the processor core to a level more distant
from the processor core. This may accelerate subsequent accesses
to the line by other cores in the same coherence domain,
especially if the line was written by the core that demotes the line.

This patch exposes the cldemote feature to the guest.

The release document ref below link:
https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf
This patch has a dependency on https://lkml.org/lkml/2018/4/23/928

Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
Reviewed-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/cpuid.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 82055b90a8b3..72d8c492d71d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -403,7 +403,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	const u32 kvm_cpuid_7_0_ecx_x86_features =
 		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
-		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
+		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+		F(CLDEMOTE);
 
 	/* cpuid 7.0.edx*/
 	const u32 kvm_cpuid_7_0_edx_x86_features =
-- 
cgit v1.2.3


From d8ad71fa38a96128ebb0462539fee6cb9391d17b Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 21 May 2018 18:25:43 +0100
Subject: arm64: fpsimd: Fix TIF_FOREIGN_FPSTATE after invalidating cpu regs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fpsimd_last_state.st is set to NULL as a way of indicating that
current's FPSIMD registers are no longer loaded in the cpu.  In
particular, this is done when the kernel temporarily uses or
clobbers the FPSIMD registers for its own purposes, as in CPU PM or
kernel-mode NEON, resulting in them being populated with garbage
data not belonging to a task.

Commit 17eed27b02da ("arm64/sve: KVM: Prevent guests from using
SVE") factors this operation out as a new helper
fpsimd_flush_cpu_state() to make it clearer what is being done
here, and on SVE systems this helper is now used, via
kvm_fpsimd_flush_cpu_state(), to invalidate the registers after KVM
has run a vcpu.  The reason for this is that KVM does not yet
understand how to restore the full host SVE registers itself after
loading the guest FPSIMD context into them.

This exposes a particular problem: if fpsimd_last_state.st is set
to NULL without also setting TIF_FOREIGN_FPSTATE, the kernel may
continue to think that current's FPSIMD registers are live even
though they have actually been clobbered.

Prior to the aforementioned commit, the only path where
fpsimd_last_state.st is set to NULL without setting
TIF_FOREIGN_FPSTATE is when kernel_neon_begin() is called by a
kernel thread (where current->mm can be NULL).  This does not
matter, because the only harm is that at context-switch time
fpsimd_thread_switch() may unnecessarily save the FPSIMD registers
back to current's thread_struct (even though kernel threads are not
considered to have any FPSIMD context of their own and the
registers will never be reloaded).

Note that although CPU_PM_ENTER lacks the TIF_FOREIGN_FPSTATE
setting, every CPU passing through that path must subsequently pass
through CPU_PM_EXIT before it can re-enter the kernel proper.
CPU_PM_EXIT sets the flag.

The sve_flush_cpu_state() function added by commit 17eed27b02da
also lacks the proper maintenance of TIF_FOREIGN_FPSTATE.  This may
cause the bits of a host task's SVE registers that do not alias the
FPSIMD register file to spontaneously appear zeroed if a KVM vcpu
runs in the same task in the meantime.  Although this effect is
hidden by the fact that the non-FPSIMD bits of the SVE registers
are zeroed by a syscall anyway, it is doubtless a bad idea to rely
on these different code paths interacting correctly under future
maintenance.

This patch makes TIF_FOREIGN_FPSTATE an unconditional side-effect
of fpsimd_flush_cpu_state(), and removes the set_thread_flag()
calls that become redundant as a result.  This ensures that
TIF_FOREIGN_FPSTATE cannot remain clear if the FPSIMD state in the
FPSIMD registers is invalid.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 87a35364e750..12e1c967c7b5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1067,6 +1067,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
 static inline void fpsimd_flush_cpu_state(void)
 {
 	__this_cpu_write(fpsimd_last_state.st, NULL);
+	set_thread_flag(TIF_FOREIGN_FPSTATE);
 }
 
 /*
@@ -1121,10 +1122,8 @@ void kernel_neon_begin(void)
 	__this_cpu_write(kernel_neon_busy, true);
 
 	/* Save unsaved task fpsimd state, if any: */
-	if (current->mm) {
+	if (current->mm)
 		task_fpsimd_save();
-		set_thread_flag(TIF_FOREIGN_FPSTATE);
-	}
 
 	/* Invalidate any task state remaining in the fpsimd regs: */
 	fpsimd_flush_cpu_state();
@@ -1251,8 +1250,6 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
 		fpsimd_flush_cpu_state();
 		break;
 	case CPU_PM_EXIT:
-		if (current->mm)
-			set_thread_flag(TIF_FOREIGN_FPSTATE);
 		break;
 	case CPU_PM_ENTER_FAILED:
 	default:
-- 
cgit v1.2.3


From 09d1223a62798846d223c25debeb55bf3d0d4905 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Wed, 11 Apr 2018 17:59:06 +0100
Subject: arm64: Use update{,_tsk}_thread_flag()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch uses the new update_thread_flag() helpers to simplify a
couple of if () set; else clear; constructs.

No functional change.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 12e1c967c7b5..9d853732f9f4 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -618,10 +618,8 @@ int sve_set_vector_length(struct task_struct *task,
 	task->thread.sve_vl = vl;
 
 out:
-	if (flags & PR_SVE_VL_INHERIT)
-		set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
-	else
-		clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+	update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
+			       flags & PR_SVE_VL_INHERIT);
 
 	return 0;
 }
@@ -910,12 +908,12 @@ void fpsimd_thread_switch(struct task_struct *next)
 		 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
 		 * upon the next return to userland.
 		 */
-		if (__this_cpu_read(fpsimd_last_state.st) ==
-			&next->thread.uw.fpsimd_state
-		    && next->thread.fpsimd_cpu == smp_processor_id())
-			clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
-		else
-			set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
+		bool wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+					&next->thread.uw.fpsimd_state;
+		bool wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+
+		update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+				       wrong_task || wrong_cpu);
 	}
 }
 
-- 
cgit v1.2.3


From ceda9fff70e8b5939fa8882d1c497e55472a727f Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 16 Feb 2018 16:35:32 +0000
Subject: KVM: arm64: Convert lazy FPSIMD context switch trap to C
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To make the lazy FPSIMD context switch trap code easier to hack on,
this patch converts it to C.

This is not amazingly efficient, but the trap should typically only
be taken once per host context switch.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp/entry.S  | 57 +++++++++++++++++----------------------------
 arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++
 2 files changed, 46 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e41a161d313a..40f349bc1079 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -172,40 +172,27 @@ ENTRY(__fpsimd_guest_restore)
 	// x1: vcpu
 	// x2-x29,lr: vcpu regs
 	// vcpu x0-x1 on the stack
-	stp	x2, x3, [sp, #-16]!
-	stp	x4, lr, [sp, #-16]!
-
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x2, cptr_el2
-	bic	x2, x2, #CPTR_EL2_TFP
-	msr	cptr_el2, x2
-alternative_else
-	mrs	x2, cpacr_el1
-	orr	x2, x2, #CPACR_EL1_FPEN
-	msr	cpacr_el1, x2
-alternative_endif
-	isb
-
-	mov	x3, x1
-
-	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x0
-	add	x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	bl	__fpsimd_save_state
-
-	add	x2, x3, #VCPU_CONTEXT
-	add	x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	bl	__fpsimd_restore_state
-
-	// Skip restoring fpexc32 for AArch64 guests
-	mrs	x1, hcr_el2
-	tbnz	x1, #HCR_RW_SHIFT, 1f
-	ldr	x4, [x3, #VCPU_FPEXC32_EL2]
-	msr	fpexc32_el2, x4
-1:
-	ldp	x4, lr, [sp], #16
-	ldp	x2, x3, [sp], #16
-	ldp	x0, x1, [sp], #16
-
+	stp	x2, x3, [sp, #-144]!
+	stp	x4, x5, [sp, #16]
+	stp	x6, x7, [sp, #32]
+	stp	x8, x9, [sp, #48]
+	stp	x10, x11, [sp, #64]
+	stp	x12, x13, [sp, #80]
+	stp	x14, x15, [sp, #96]
+	stp	x16, x17, [sp, #112]
+	stp	x18, lr, [sp, #128]
+
+	bl	__hyp_switch_fpsimd
+
+	ldp	x4, x5, [sp, #16]
+	ldp	x6, x7, [sp, #32]
+	ldp	x8, x9, [sp, #48]
+	ldp	x10, x11, [sp, #64]
+	ldp	x12, x13, [sp, #80]
+	ldp	x14, x15, [sp, #96]
+	ldp	x16, x17, [sp, #112]
+	ldp	x18, lr, [sp, #128]
+	ldp	x0, x1, [sp, #144]
+	ldp	x2, x3, [sp], #160
 	eret
 ENDPROC(__fpsimd_guest_restore)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index d9645236e474..c0796c4d93a5 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	}
 }
 
+void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
+				    struct kvm_vcpu *vcpu)
+{
+	kvm_cpu_context_t *host_ctxt;
+
+	if (has_vhe())
+		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
+			     cpacr_el1);
+	else
+		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+			     cptr_el2);
+
+	isb();
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	__fpsimd_save_state(&host_ctxt->gp_regs.fp_regs);
+	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+
+	/* Skip restoring fpexc32 for AArch64 guests */
+	if (!(read_sysreg(hcr_el2) & HCR_RW))
+		write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
+			     fpexc32_el2);
+}
+
 /*
  * Return true when we were able to fixup the guest exit and should return to
  * the guest, false when we should restore the host state and return to the
-- 
cgit v1.2.3


From d179761519d9fe57ece975eaf8eec131547b9da3 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 6 Apr 2018 14:55:59 +0100
Subject: arm64: fpsimd: Generalise context saving for non-task contexts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for allowing non-task (i.e., KVM vcpu) FPSIMD
contexts to be handled by the fpsimd common code, this patch adapts
task_fpsimd_save() to save back the currently loaded context,
removing the explicit dependency on current.

The relevant storage to write back to in memory is now found by
examining the fpsimd_last_state percpu struct.

fpsimd_save() does nothing unless TIF_FOREIGN_FPSTATE is clear, and
fpsimd_last_state is updated under local_bh_disable() or
local_irq_disable() everywhere that TIF_FOREIGN_FPSTATE is cleared:
thus, fpsimd_save() will write back to the correct storage for the
loaded context.

No functional change.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 9d853732f9f4..2d9a9e8ed826 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -270,13 +270,16 @@ static void task_fpsimd_load(void)
 }
 
 /*
- * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
- * with respect to the CPU registers.
+ * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
+ * date with respect to the CPU registers.
  *
  * Softirqs (and preemption) must be disabled.
  */
-static void task_fpsimd_save(void)
+static void fpsimd_save(void)
 {
+	struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
+	/* set by fpsimd_bind_to_cpu() */
+
 	WARN_ON(!in_softirq() && !irqs_disabled());
 
 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -291,10 +294,9 @@ static void task_fpsimd_save(void)
 				return;
 			}
 
-			sve_save_state(sve_pffr(current),
-				       &current->thread.uw.fpsimd_state.fpsr);
+			sve_save_state(sve_pffr(current), &st->fpsr);
 		} else
-			fpsimd_save_state(&current->thread.uw.fpsimd_state);
+			fpsimd_save_state(st);
 	}
 }
 
@@ -598,7 +600,7 @@ int sve_set_vector_length(struct task_struct *task,
 	if (task == current) {
 		local_bh_disable();
 
-		task_fpsimd_save();
+		fpsimd_save();
 		set_thread_flag(TIF_FOREIGN_FPSTATE);
 	}
 
@@ -837,7 +839,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 
 	local_bh_disable();
 
-	task_fpsimd_save();
+	fpsimd_save();
 	fpsimd_to_sve(current);
 
 	/* Force ret_to_user to reload the registers: */
@@ -898,7 +900,7 @@ void fpsimd_thread_switch(struct task_struct *next)
 	 * 'current'.
 	 */
 	if (current->mm)
-		task_fpsimd_save();
+		fpsimd_save();
 
 	if (next->mm) {
 		/*
@@ -980,7 +982,7 @@ void fpsimd_preserve_current_state(void)
 		return;
 
 	local_bh_disable();
-	task_fpsimd_save();
+	fpsimd_save();
 	local_bh_enable();
 }
 
@@ -1121,7 +1123,7 @@ void kernel_neon_begin(void)
 
 	/* Save unsaved task fpsimd state, if any: */
 	if (current->mm)
-		task_fpsimd_save();
+		fpsimd_save();
 
 	/* Invalidate any task state remaining in the fpsimd regs: */
 	fpsimd_flush_cpu_state();
@@ -1244,7 +1246,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
 	switch (cmd) {
 	case CPU_PM_ENTER:
 		if (current->mm)
-			task_fpsimd_save();
+			fpsimd_save();
 		fpsimd_flush_cpu_state();
 		break;
 	case CPU_PM_EXIT:
-- 
cgit v1.2.3


From 66e48a0d29bdedc574c8fc0af7a5d112b594ced6 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 24 May 2018 15:54:30 +0100
Subject: arm64: fpsimd: Avoid FPSIMD context leakage for the init task
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The init task is started with thread_flags equal to 0, which means
that TIF_FOREIGN_FPSTATE is initially clear.

It is theoretically possible (if unlikely) that the init task could
reach userspace without ever being scheduled out.  If this occurs,
data left in the FPSIMD registers by the kernel could be exposed.

This patch fixes this anomaly by ensuring that the init task's
initial TIF_FOREIGN_FPSTATE is set.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Fixes: 005f78cd8849 ("arm64: defer reloading a task's FPSIMD state to userland resume")
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/thread_info.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 740aa03c5f0d..af271f9a6c9f 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -45,12 +45,6 @@ struct thread_info {
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
 };
 
-#define INIT_THREAD_INFO(tsk)						\
-{									\
-	.preempt_count	= INIT_PREEMPT_COUNT,				\
-	.addr_limit	= KERNEL_DS,					\
-}
-
 #define thread_saved_pc(tsk)	\
 	((unsigned long)(tsk->thread.cpu_context.pc))
 #define thread_saved_sp(tsk)	\
@@ -117,5 +111,12 @@ void arch_release_task_struct(struct task_struct *tsk);
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
 				 _TIF_NOHZ)
 
+#define INIT_THREAD_INFO(tsk)						\
+{									\
+	.flags		= _TIF_FOREIGN_FPSTATE,				\
+	.preempt_count	= INIT_PREEMPT_COUNT,				\
+	.addr_limit	= KERNEL_DS,					\
+}
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
-- 
cgit v1.2.3


From df3fb96820455ef70a51630d1be336d4f2602111 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 21 May 2018 19:08:15 +0100
Subject: arm64: fpsimd: Eliminate task->mm checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the FPSIMD handling code uses the condition task->mm ==
NULL as a hint that task has no FPSIMD register context.

The ->mm check is only there to filter out tasks that cannot
possibly have FPSIMD context loaded, for optimisation purposes.
Also, TIF_FOREIGN_FPSTATE must always be checked anyway before
saving FPSIMD context back to memory.  For these reasons, the ->mm
checks are not useful, providing that TIF_FOREIGN_FPSTATE is
maintained in a consistent way for all threads.

The context switch logic is already deliberately optimised to defer
reloads of the regs until ret_to_user (or sigreturn as a special
case), and save them only if they have been previously loaded.
These paths are the only places where the wrong_task and wrong_cpu
conditions can be made false, by calling fpsimd_bind_task_to_cpu().
Kernel threads by definition never reach these paths.  As a result,
the wrong_task and wrong_cpu tests in fpsimd_thread_switch() will
always yield true for kernel threads.

This patch removes the redundant checks and special-case code,
ensuring that TIF_FOREIGN_FPSTATE is set whenever a kernel thread
is scheduled in, and ensures that this flag is set for the init
task.  The fpsimd_flush_task_state() call already present in
copy_thread() ensures the same for any new task.

With TIF_FOREIGN_FPSTATE always set for kernel threads, this patch
ensures that no extra context save work is added for kernel
threads, and eliminates the redundant context saving that may
currently occur for kernel threads that have acquired an mm via
use_mm().

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/processor.h |  4 +++-
 arch/arm64/kernel/fpsimd.c         | 40 +++++++++++++++-----------------------
 2 files changed, 19 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 767598932549..36d64f83cdfb 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -156,7 +156,9 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 /* Sync TPIDR_EL0 back to thread_struct for current */
 void tls_preserve_current_state(void);
 
-#define INIT_THREAD  {	}
+#define INIT_THREAD {				\
+	.fpsimd_cpu = NR_CPUS,			\
+}
 
 static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
 {
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 2d9a9e8ed826..d736b6c412ef 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -892,31 +892,25 @@ asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 
 void fpsimd_thread_switch(struct task_struct *next)
 {
+	bool wrong_task, wrong_cpu;
+
 	if (!system_supports_fpsimd())
 		return;
+
+	/* Save unsaved fpsimd state, if any: */
+	fpsimd_save();
+
 	/*
-	 * Save the current FPSIMD state to memory, but only if whatever is in
-	 * the registers is in fact the most recent userland FPSIMD state of
-	 * 'current'.
+	 * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+	 * state.  For kernel threads, FPSIMD registers are never loaded
+	 * and wrong_task and wrong_cpu will always be true.
 	 */
-	if (current->mm)
-		fpsimd_save();
-
-	if (next->mm) {
-		/*
-		 * If we are switching to a task whose most recent userland
-		 * FPSIMD state is already in the registers of *this* cpu,
-		 * we can skip loading the state from memory. Otherwise, set
-		 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
-		 * upon the next return to userland.
-		 */
-		bool wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+	wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
 					&next->thread.uw.fpsimd_state;
-		bool wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+	wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
 
-		update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
-				       wrong_task || wrong_cpu);
-	}
+	update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+			       wrong_task || wrong_cpu);
 }
 
 void fpsimd_flush_thread(void)
@@ -1121,9 +1115,8 @@ void kernel_neon_begin(void)
 
 	__this_cpu_write(kernel_neon_busy, true);
 
-	/* Save unsaved task fpsimd state, if any: */
-	if (current->mm)
-		fpsimd_save();
+	/* Save unsaved fpsimd state, if any: */
+	fpsimd_save();
 
 	/* Invalidate any task state remaining in the fpsimd regs: */
 	fpsimd_flush_cpu_state();
@@ -1245,8 +1238,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
 {
 	switch (cmd) {
 	case CPU_PM_ENTER:
-		if (current->mm)
-			fpsimd_save();
+		fpsimd_save();
 		fpsimd_flush_cpu_state();
 		break;
 	case CPU_PM_EXIT:
-- 
cgit v1.2.3


From 0cff8e776f8f58f494ed16b28baf209c1b73b079 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Wed, 9 May 2018 14:27:41 +0100
Subject: arm64/sve: Refactor user SVE trap maintenance for external use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for optimising the way KVM manages switching the
guest and host FPSIMD state, it is necessary to provide a means for
code outside arch/arm64/kernel/fpsimd.c to restore the user trap
configuration for SVE correctly for the current task.

Rather than requiring external code to duplicate the maintenance
explicitly, this patch moves the trap maintenenace to
fpsimd_bind_to_cpu(), since it is logically part of the work of
associating the current task with the cpu.

Because fpsimd_bind_to_cpu() is rather a cryptic name to publish
alongside fpsimd_bind_state_to_cpu(), the former function is
renamed to fpsimd_bind_task_to_cpu() to make its purpose more
explicit.

This patch makes appropriate changes to ensure that
fpsimd_bind_task_to_cpu() is always called alongside
task_fpsimd_load(), so that the trap maintenance continues to be
done in every situation where it was done prior to this patch.

As a side-effect, the metadata updates done by
fpsimd_bind_task_to_cpu() now change from conditional to
unconditional in the "already bound" case of sigreturn.  This is
harmless, and a couple of extra stores on this slow path will not
impact performance.  I consider this a reasonable price to pay for
a slightly cleaner interface.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index d736b6c412ef..d5f659f476a8 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -257,16 +257,6 @@ static void task_fpsimd_load(void)
 			       sve_vq_from_vl(current->thread.sve_vl) - 1);
 	else
 		fpsimd_load_state(&current->thread.uw.fpsimd_state);
-
-	if (system_supports_sve()) {
-		/* Toggle SVE trapping for userspace if needed */
-		if (test_thread_flag(TIF_SVE))
-			sve_user_enable();
-		else
-			sve_user_disable();
-
-		/* Serialised by exception return to user */
-	}
 }
 
 /*
@@ -278,7 +268,7 @@ static void task_fpsimd_load(void)
 static void fpsimd_save(void)
 {
 	struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
-	/* set by fpsimd_bind_to_cpu() */
+	/* set by fpsimd_bind_task_to_cpu() */
 
 	WARN_ON(!in_softirq() && !irqs_disabled());
 
@@ -996,7 +986,7 @@ void fpsimd_signal_preserve_current_state(void)
  * Associate current's FPSIMD context with this cpu
  * Preemption must be disabled when calling this function.
  */
-static void fpsimd_bind_to_cpu(void)
+static void fpsimd_bind_task_to_cpu(void)
 {
 	struct fpsimd_last_state_struct *last =
 		this_cpu_ptr(&fpsimd_last_state);
@@ -1004,6 +994,16 @@ static void fpsimd_bind_to_cpu(void)
 	last->st = &current->thread.uw.fpsimd_state;
 	last->sve_in_use = test_thread_flag(TIF_SVE);
 	current->thread.fpsimd_cpu = smp_processor_id();
+
+	if (system_supports_sve()) {
+		/* Toggle SVE trapping for userspace if needed */
+		if (test_thread_flag(TIF_SVE))
+			sve_user_enable();
+		else
+			sve_user_disable();
+
+		/* Serialised by exception return to user */
+	}
 }
 
 /*
@@ -1020,7 +1020,7 @@ void fpsimd_restore_current_state(void)
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		task_fpsimd_load();
-		fpsimd_bind_to_cpu();
+		fpsimd_bind_task_to_cpu();
 	}
 
 	local_bh_enable();
@@ -1043,9 +1043,9 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 		fpsimd_to_sve(current);
 
 	task_fpsimd_load();
+	fpsimd_bind_task_to_cpu();
 
-	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
-		fpsimd_bind_to_cpu();
+	clear_thread_flag(TIF_FOREIGN_FPSTATE);
 
 	local_bh_enable();
 }
-- 
cgit v1.2.3


From fa89d31c53061139bd66f9de6e55340ac7bd5480 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Tue, 8 May 2018 14:47:23 +0100
Subject: KVM: arm64: Repurpose vcpu_arch.debug_flags for general-purpose flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In struct vcpu_arch, the debug_flags field is used to store
debug-related flags about the vcpu state.

Since we are about to add some more flags related to FPSIMD and
SVE, it makes sense to add them to the existing flags field rather
than adding new fields.  Since there is only one debug_flags flag
defined so far, there is plenty of free space for expansion.

In preparation for adding more flags, this patch renames the
debug_flags field to simply "flags", and updates comments
appropriately.

The flag definitions are also moved to <asm/kvm_host.h>, since
their presence in <asm/kvm_asm.h> was for purely historical
reasons:  these definitions are not used from asm any more, and not
very likely to be as more Hyp asm is migrated to C.

KVM_ARM64_DEBUG_DIRTY_SHIFT has not been used since commit
1ea66d27e7b0 ("arm64: KVM: Move away from the assembly version of
the world switch"), so this patch gets rid of that too.

No functional change.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@arm.com>
[maz: fixed minor conflict]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_asm.h  | 3 ---
 arch/arm64/include/asm/kvm_host.h | 7 +++++--
 arch/arm64/kvm/debug.c            | 8 ++++----
 arch/arm64/kvm/hyp/debug-sr.c     | 6 +++---
 arch/arm64/kvm/hyp/sysreg-sr.c    | 4 ++--
 arch/arm64/kvm/sys_regs.c         | 9 ++++-----
 6 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index a9ceeec5a76f..821a7032c0f7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -30,9 +30,6 @@
 /* The hyp-stub will return this for any kvm_call_hyp() call */
 #define ARM_EXCEPTION_HYP_GONE	  HVC_STUB_ERR
 
-#define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
-#define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
-
 #ifndef __ASSEMBLY__
 
 #include <linux/mm.h>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 469de8acd06f..146c16794d32 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -216,8 +216,8 @@ struct kvm_vcpu_arch {
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
-	/* Guest debug state */
-	u64 debug_flags;
+	/* Miscellaneous vcpu state flags */
+	u64 flags;
 
 	/*
 	 * We maintain more than a single set of debug registers to support
@@ -293,6 +293,9 @@ struct kvm_vcpu_arch {
 	bool sysregs_loaded_on_cpu;
 };
 
+/* vcpu_arch flags field values: */
+#define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
+
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
 
 /*
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index a1f4ebdfe6d3..00d422336a45 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -103,7 +103,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
  *
  * Additionally, KVM only traps guest accesses to the debug registers if
  * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
- * flag on vcpu->arch.debug_flags).  Since the guest must not interfere
+ * flag on vcpu->arch.flags).  Since the guest must not interfere
  * with the hardware state when debugging the guest, we must ensure that
  * trapping is enabled whenever we are debugging the guest using the
  * debug registers.
@@ -111,7 +111,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
 
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 {
-	bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
+	bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY);
 	unsigned long mdscr;
 
 	trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
@@ -184,7 +184,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 			vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
 
 			vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
-			vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+			vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
 			trap_debug = true;
 
 			trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
@@ -206,7 +206,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 
 	/* If KDE or MDE are set, perform a full save/restore cycle. */
 	if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
-		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+		vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
 
 	trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
 	trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 3e717f66f011..50009766e5e5 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -163,7 +163,7 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
 	if (!has_vhe())
 		__debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
 
-	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+	if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
 		return;
 
 	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
@@ -185,7 +185,7 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
 	if (!has_vhe())
 		__debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
 
-	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+	if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
 		return;
 
 	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
@@ -196,7 +196,7 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
 	__debug_save_state(vcpu, guest_dbg, guest_ctxt);
 	__debug_restore_state(vcpu, host_dbg, host_ctxt);
 
-	vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+	vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
 }
 
 u32 __hyp_text __kvm_get_mdcr_el2(void)
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index b3894df6bf1a..35bc16832efe 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -196,7 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
 	sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
 	sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
 
-	if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+	if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
 		sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
 }
 
@@ -218,7 +218,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
 	write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
 	write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
 
-	if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+	if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
 		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
 }
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 6e3b969391fd..a4363735d3f8 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -31,7 +31,6 @@
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
@@ -338,7 +337,7 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
 {
 	if (p->is_write) {
 		vcpu_write_sys_reg(vcpu, p->regval, r->reg);
-		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+		vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
 		p->regval = vcpu_read_sys_reg(vcpu, r->reg);
 	}
@@ -369,7 +368,7 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu,
 	}
 
 	*dbg_reg = val;
-	vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
 }
 
 static void dbg_to_reg(struct kvm_vcpu *vcpu,
@@ -1441,7 +1440,7 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
 {
 	if (p->is_write) {
 		vcpu_cp14(vcpu, r->reg) = p->regval;
-		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+		vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
 		p->regval = vcpu_cp14(vcpu, r->reg);
 	}
@@ -1473,7 +1472,7 @@ static bool trap_xvr(struct kvm_vcpu *vcpu,
 		val |= p->regval << 32;
 		*dbg_reg = val;
 
-		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+		vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
 		p->regval = *dbg_reg >> 32;
 	}
-- 
cgit v1.2.3


From e6b673b741ea0d7cd275ad40748bfc225accc423 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 6 Apr 2018 14:55:59 +0100
Subject: KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch refactors KVM to align the host and guest FPSIMD
save/restore logic with each other for arm64.  This reduces the
number of redundant save/restore operations that must occur, and
reduces the common-case IRQ blackout time during guest exit storms
by saving the host state lazily and optimising away the need to
restore the host state before returning to the run loop.

Four hooks are defined in order to enable this:

 * kvm_arch_vcpu_run_map_fp():
   Called on PID change to map necessary bits of current to Hyp.

 * kvm_arch_vcpu_load_fp():
   Set up FP/SIMD for entering the KVM run loop (parse as
   "vcpu_load fp").

 * kvm_arch_vcpu_ctxsync_fp():
   Get FP/SIMD into a safe state for re-enabling interrupts after a
   guest exit back to the run loop.

   For arm64 specifically, this involves updating the host kernel's
   FPSIMD context tracking metadata so that kernel-mode NEON use
   will cause the vcpu's FPSIMD state to be saved back correctly
   into the vcpu struct.  This must be done before re-enabling
   interrupts because kernel-mode NEON may be used by softirqs.

 * kvm_arch_vcpu_put_fp():
   Save guest FP/SIMD state back to memory and dissociate from the
   CPU ("vcpu_put fp").

Also, the arm64 FPSIMD context switch code is updated to enable it
to save back FPSIMD state for a vcpu, not just current.  A few
helpers drive this:

 * fpsimd_bind_state_to_cpu(struct user_fpsimd_state *fp):
   mark this CPU as having context fp (which may belong to a vcpu)
   currently loaded in its registers.  This is the non-task
   equivalent of the static function fpsimd_bind_to_cpu() in
   fpsimd.c.

 * task_fpsimd_save():
   exported to allow KVM to save the guest's FPSIMD state back to
   memory on exit from the run loop.

 * fpsimd_flush_state():
   invalidate any context's FPSIMD state that is currently loaded.
   Used to disassociate the vcpu from the CPU regs on run loop exit.

These changes allow the run loop to enable interrupts (and thus
softirqs that may use kernel-mode NEON) without having to save the
guest's FPSIMD state eagerly.

Some new vcpu_arch fields are added to make all this work.  Because
host FPSIMD state can now be saved back directly into current's
thread_struct as appropriate, host_cpu_context is no longer used
for preserving the FPSIMD state.  However, it is still needed for
preserving other things such as the host's system registers.  To
avoid ABI churn, the redundant storage space in host_cpu_context is
not removed for now.

arch/arm is not addressed by this patch and continues to use its
current save/restore logic.  It could provide implementations of
the helpers later if desired.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_host.h   |   8 +++
 arch/arm64/include/asm/fpsimd.h   |   6 +++
 arch/arm64/include/asm/kvm_host.h |  21 ++++++++
 arch/arm64/kernel/fpsimd.c        |  19 +++++--
 arch/arm64/kvm/Kconfig            |   1 +
 arch/arm64/kvm/Makefile           |   2 +-
 arch/arm64/kvm/fpsimd.c           | 111 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/switch.c       |  51 +++++++++---------
 8 files changed, 188 insertions(+), 31 deletions(-)
 create mode 100644 arch/arm64/kvm/fpsimd.c

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c7c28c885a19..ac870b2cd5d1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -303,6 +303,14 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 
+/*
+ * VFP/NEON switching is all done by the hyp switch code, so no need to
+ * coordinate with host context handling for this state:
+ */
+static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
+
 /* All host FP/SIMD state is restored on guest exit, so nothing to save: */
 static inline void kvm_fpsimd_flush_cpu_state(void) {}
 
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index aa7162ae93e3..3e00f701cb9c 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -41,6 +41,8 @@ struct task_struct;
 extern void fpsimd_save_state(struct user_fpsimd_state *state);
 extern void fpsimd_load_state(struct user_fpsimd_state *state);
 
+extern void fpsimd_save(void);
+
 extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
 
@@ -49,7 +51,11 @@ extern void fpsimd_preserve_current_state(void);
 extern void fpsimd_restore_current_state(void);
 extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
 
+extern void fpsimd_bind_task_to_cpu(void);
+extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state);
+
 extern void fpsimd_flush_task_state(struct task_struct *target);
+extern void fpsimd_flush_cpu_state(void);
 extern void sve_flush_cpu_state(void);
 
 /* Maximum VL that SVE VL-agnostic software can transparently support */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 146c16794d32..b3fe7301bdbe 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
+#include <asm/thread_info.h>
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
@@ -238,6 +239,10 @@ struct kvm_vcpu_arch {
 
 	/* Pointer to host CPU context */
 	kvm_cpu_context_t *host_cpu_context;
+
+	struct thread_info *host_thread_info;	/* hyp VA */
+	struct user_fpsimd_state *host_fpsimd_state;	/* hyp VA */
+
 	struct {
 		/* {Break,watch}point registers */
 		struct kvm_guest_debug_arch regs;
@@ -295,6 +300,9 @@ struct kvm_vcpu_arch {
 
 /* vcpu_arch flags field values: */
 #define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
+#define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
+#define KVM_ARM64_FP_HOST		(1 << 2) /* host FP regs loaded */
+#define KVM_ARM64_HOST_SVE_IN_USE	(1 << 3) /* backup for host TIF_SVE */
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
 
@@ -423,6 +431,19 @@ static inline void __cpu_init_stage2(void)
 		  "PARange is %d bits, unsupported configuration!", parange);
 }
 
+/* Guest/host FPSIMD coordination helpers */
+int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+
+#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
+static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
+{
+	return kvm_arch_vcpu_run_map_fp(vcpu);
+}
+#endif
+
 /*
  * All host FP/SIMD state is restored on guest exit, so nothing needs
  * doing here except in the SVE case:
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index d5f659f476a8..794dd990da82 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -265,10 +265,10 @@ static void task_fpsimd_load(void)
  *
  * Softirqs (and preemption) must be disabled.
  */
-static void fpsimd_save(void)
+void fpsimd_save(void)
 {
 	struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
-	/* set by fpsimd_bind_task_to_cpu() */
+	/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
 
 	WARN_ON(!in_softirq() && !irqs_disabled());
 
@@ -986,7 +986,7 @@ void fpsimd_signal_preserve_current_state(void)
  * Associate current's FPSIMD context with this cpu
  * Preemption must be disabled when calling this function.
  */
-static void fpsimd_bind_task_to_cpu(void)
+void fpsimd_bind_task_to_cpu(void)
 {
 	struct fpsimd_last_state_struct *last =
 		this_cpu_ptr(&fpsimd_last_state);
@@ -1006,6 +1006,17 @@ static void fpsimd_bind_task_to_cpu(void)
 	}
 }
 
+void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
+{
+	struct fpsimd_last_state_struct *last =
+		this_cpu_ptr(&fpsimd_last_state);
+
+	WARN_ON(!in_softirq() && !irqs_disabled());
+
+	last->st = st;
+	last->sve_in_use = false;
+}
+
 /*
  * Load the userland FPSIMD state of 'current' from memory, but only if the
  * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
@@ -1058,7 +1069,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
 	t->thread.fpsimd_cpu = NR_CPUS;
 }
 
-static inline void fpsimd_flush_cpu_state(void)
+void fpsimd_flush_cpu_state(void)
 {
 	__this_cpu_write(fpsimd_last_state.st, NULL);
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a2e3a5af1113..47b23bf617c7 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -39,6 +39,7 @@ config KVM
 	select HAVE_KVM_IRQ_ROUTING
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
+	select HAVE_KVM_VCPU_RUN_PID_CHANGE
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 93afff91cb7c..0f2a135ba15b 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -19,7 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
 kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
 
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
new file mode 100644
index 000000000000..365933a98a7c
--- /dev/null
+++ b/arch/arm64/kvm/fpsimd.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/arm64/kvm/fpsimd.c: Guest/host FPSIMD context coordination helpers
+ *
+ * Copyright 2018 Arm Limited
+ * Author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <linux/bottom_half.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * Called on entry to KVM_RUN unless this vcpu previously ran at least
+ * once and the most recent prior KVM_RUN for this vcpu was called from
+ * the same task as current (highly likely).
+ *
+ * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu),
+ * such that on entering hyp the relevant parts of current are already
+ * mapped.
+ */
+int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	struct thread_info *ti = &current->thread_info;
+	struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
+
+	/*
+	 * Make sure the host task thread flags and fpsimd state are
+	 * visible to hyp:
+	 */
+	ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
+	if (ret)
+		goto error;
+
+	ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
+	if (ret)
+		goto error;
+
+	vcpu->arch.host_thread_info = kern_hyp_va(ti);
+	vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
+error:
+	return ret;
+}
+
+/*
+ * Prepare vcpu for saving the host's FPSIMD state and loading the guest's.
+ * The actual loading is done by the FPSIMD access trap taken to hyp.
+ *
+ * Here, we just set the correct metadata to indicate that the FPSIMD
+ * state in the cpu regs (if any) belongs to current on the host.
+ *
+ * TIF_SVE is backed up here, since it may get clobbered with guest state.
+ * This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
+ */
+void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(system_supports_sve());
+	BUG_ON(!current->mm);
+
+	vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE);
+	vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+	if (test_thread_flag(TIF_SVE))
+		vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
+}
+
+/*
+ * If the guest FPSIMD state was loaded, update the host's context
+ * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
+ * so that they will be written back if the kernel clobbers them due to
+ * kernel-mode NEON before re-entry into the guest.
+ */
+void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
+{
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs);
+		clear_thread_flag(TIF_FOREIGN_FPSTATE);
+		clear_thread_flag(TIF_SVE);
+	}
+}
+
+/*
+ * Write back the vcpu FPSIMD regs if they are dirty, and invalidate the
+ * cpu FPSIMD regs so that they can't be spuriously reused if this vcpu
+ * disappears and another task or vcpu appears that recycles the same
+ * struct fpsimd_state.
+ */
+void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
+{
+	local_bh_disable();
+
+	update_thread_flag(TIF_SVE,
+			   vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+
+	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+		/* Clean guest FP state to memory and invalidate cpu view */
+		fpsimd_save();
+		fpsimd_flush_cpu_state();
+	} else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		/* Ensure user trap controls are correctly restored */
+		fpsimd_bind_task_to_cpu();
+	}
+
+	local_bh_enable();
+}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index c0796c4d93a5..118f3002b9ce 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -23,19 +23,21 @@
 
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 #include <asm/fpsimd.h>
 #include <asm/debug-monitors.h>
+#include <asm/thread_info.h>
 
-static bool __hyp_text __fpsimd_enabled_nvhe(void)
+/* Check whether the FP regs were dirtied while in the host-side run loop: */
+static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
 {
-	return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
-}
+	if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+		vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+				      KVM_ARM64_FP_HOST);
 
-static bool fpsimd_enabled_vhe(void)
-{
-	return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
+	return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
 }
 
 /* Save the 32-bit only FPSIMD system register state */
@@ -92,7 +94,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
 
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_EL1_TTA;
-	val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+	val &= ~CPACR_EL1_ZEN;
+	if (!update_fp_enabled(vcpu))
+		val &= ~CPACR_EL1_FPEN;
+
 	write_sysreg(val, cpacr_el1);
 
 	write_sysreg(kvm_get_hyp_vector(), vbar_el1);
@@ -105,7 +110,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
 	__activate_traps_common(vcpu);
 
 	val = CPTR_EL2_DEFAULT;
-	val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+	val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
+	if (!update_fp_enabled(vcpu))
+		val |= CPTR_EL2_TFP;
+
 	write_sysreg(val, cptr_el2);
 }
 
@@ -321,8 +329,6 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
 				    struct kvm_vcpu *vcpu)
 {
-	kvm_cpu_context_t *host_ctxt;
-
 	if (has_vhe())
 		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
 			     cpacr_el1);
@@ -332,14 +338,19 @@ void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
 
 	isb();
 
-	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
-	__fpsimd_save_state(&host_ctxt->gp_regs.fp_regs);
+	if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+		__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+	}
+
 	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
 
 	/* Skip restoring fpexc32 for AArch64 guests */
 	if (!(read_sysreg(hcr_el2) & HCR_RW))
 		write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
 			     fpexc32_el2);
+
+	vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
 }
 
 /*
@@ -418,7 +429,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
 	struct kvm_cpu_context *guest_ctxt;
-	bool fp_enabled;
 	u64 exit_code;
 
 	host_ctxt = vcpu->arch.host_cpu_context;
@@ -440,19 +450,14 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 		/* And we're baaack! */
 	} while (fixup_guest_exit(vcpu, &exit_code));
 
-	fp_enabled = fpsimd_enabled_vhe();
-
 	sysreg_save_guest_state_vhe(guest_ctxt);
 
 	__deactivate_traps(vcpu);
 
 	sysreg_restore_host_state_vhe(host_ctxt);
 
-	if (fp_enabled) {
-		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
-		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
 		__fpsimd_save_fpexc32(vcpu);
-	}
 
 	__debug_switch_to_host(vcpu);
 
@@ -464,7 +469,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
 	struct kvm_cpu_context *guest_ctxt;
-	bool fp_enabled;
 	u64 exit_code;
 
 	vcpu = kern_hyp_va(vcpu);
@@ -496,8 +500,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 		/* And we're baaack! */
 	} while (fixup_guest_exit(vcpu, &exit_code));
 
-	fp_enabled = __fpsimd_enabled_nvhe();
-
 	__sysreg_save_state_nvhe(guest_ctxt);
 	__sysreg32_save_state(vcpu);
 	__timer_disable_traps(vcpu);
@@ -508,11 +510,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 
 	__sysreg_restore_state_nvhe(host_ctxt);
 
-	if (fp_enabled) {
-		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
-		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
 		__fpsimd_save_fpexc32(vcpu);
-	}
 
 	/*
 	 * This must come after restoring the host sysregs, since a non-VHE
-- 
cgit v1.2.3


From 31dc52b3c8faf47bf3ff5ced661488a20e5d1811 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 12 Apr 2018 16:47:20 +0100
Subject: arm64/sve: Move read_zcr_features() out of cpufeature.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Having read_zcr_features() inline in cpufeature.h results in that
header requiring #includes which make it hard to include
<asm/fpsimd.h> elsewhere without triggering header inclusion
cycles.

This is not a hot-path function and arguably should not be in
cpufeature.h in the first place, so this patch moves it to
fpsimd.c, compiled conditionally if CONFIG_ARM64_SVE=y.

This allows some SVE-related #includes to be dropped from
cpufeature.h, which will ease future maintenance.

A couple of missing #includes of <asm/fpsimd.h> are exposed by this
change under arch/arm64/.  This patch adds the missing #includes as
necessary.

No functional change.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 29 -----------------------------
 arch/arm64/include/asm/fpsimd.h     |  2 ++
 arch/arm64/include/asm/processor.h  |  1 +
 arch/arm64/kernel/fpsimd.c          | 28 ++++++++++++++++++++++++++++
 arch/arm64/kernel/ptrace.c          |  1 +
 5 files changed, 32 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 09b0f2a80c8f..0a6b7133195e 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -11,9 +11,7 @@
 
 #include <asm/cpucaps.h>
 #include <asm/cputype.h>
-#include <asm/fpsimd.h>
 #include <asm/hwcap.h>
-#include <asm/sigcontext.h>
 #include <asm/sysreg.h>
 
 /*
@@ -510,33 +508,6 @@ static inline bool system_supports_sve(void)
 		cpus_have_const_cap(ARM64_SVE);
 }
 
-/*
- * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
- * vector length.
- *
- * Use only if SVE is present.
- * This function clobbers the SVE vector length.
- */
-static inline u64 read_zcr_features(void)
-{
-	u64 zcr;
-	unsigned int vq_max;
-
-	/*
-	 * Set the maximum possible VL, and write zeroes to all other
-	 * bits to see if they stick.
-	 */
-	sve_kernel_enable(NULL);
-	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
-
-	zcr = read_sysreg_s(SYS_ZCR_EL1);
-	zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
-	vq_max = sve_vq_from_vl(sve_get_vl());
-	zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
-	return zcr;
-}
-
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 3e00f701cb9c..fb60b22b8bbf 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -69,6 +69,8 @@ extern unsigned int sve_get_vl(void);
 struct arm64_cpu_capabilities;
 extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
 
+extern u64 read_zcr_features(void);
+
 extern int __ro_after_init sve_max_vl;
 
 #ifdef CONFIG_ARM64_SVE
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 36d64f83cdfb..9231b8762ca6 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -40,6 +40,7 @@
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
 #include <asm/hw_breakpoint.h>
 #include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 794dd990da82..6c01ee2062c4 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -37,6 +37,7 @@
 #include <linux/sched/task_stack.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/stddef.h>
 #include <linux/sysctl.h>
 
 #include <asm/esr.h>
@@ -755,6 +756,33 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
 	isb();
 }
 
+/*
+ * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
+ * vector length.
+ *
+ * Use only if SVE is present.
+ * This function clobbers the SVE vector length.
+ */
+u64 read_zcr_features(void)
+{
+	u64 zcr;
+	unsigned int vq_max;
+
+	/*
+	 * Set the maximum possible VL, and write zeroes to all other
+	 * bits to see if they stick.
+	 */
+	sve_kernel_enable(NULL);
+	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
+
+	zcr = read_sysreg_s(SYS_ZCR_EL1);
+	zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
+	vq_max = sve_vq_from_vl(sve_get_vl());
+	zcr |= vq_max - 1; /* set LEN field to maximum effective value */
+
+	return zcr;
+}
+
 void __init sve_setup(void)
 {
 	u64 zcr;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 7ff81fed46e1..78889c4546d7 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -44,6 +44,7 @@
 #include <asm/compat.h>
 #include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
+#include <asm/fpsimd.h>
 #include <asm/pgtable.h>
 #include <asm/stacktrace.h>
 #include <asm/syscall.h>
-- 
cgit v1.2.3


From 2cf97d46dafbbbbc9a9a3dc5eca01020c6be22d8 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 12 Apr 2018 17:04:39 +0100
Subject: arm64/sve: Switch sve_pffr() argument from task to thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sve_pffr(), which is used to derive the base address used for
low-level SVE save/restore routines, currently takes the relevant
task_struct as an argument.

The only accessed fields are actually part of thread_struct, so
this patch changes the argument type accordingly.  This is done in
preparation for moving this function to a header, where we do not
want to have to include <linux/sched.h> due to the consequent
circular #include problems.

No functional change.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 6c01ee2062c4..842b2ad08bec 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -44,6 +44,7 @@
 #include <asm/fpsimd.h>
 #include <asm/cpufeature.h>
 #include <asm/cputype.h>
+#include <asm/processor.h>
 #include <asm/simd.h>
 #include <asm/sigcontext.h>
 #include <asm/sysreg.h>
@@ -167,10 +168,9 @@ static size_t sve_ffr_offset(int vl)
 	return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
 }
 
-static void *sve_pffr(struct task_struct *task)
+static void *sve_pffr(struct thread_struct *thread)
 {
-	return (char *)task->thread.sve_state +
-		sve_ffr_offset(task->thread.sve_vl);
+	return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
 }
 
 static void change_cpacr(u64 val, u64 mask)
@@ -253,7 +253,7 @@ static void task_fpsimd_load(void)
 	WARN_ON(!in_softirq() && !irqs_disabled());
 
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
-		sve_load_state(sve_pffr(current),
+		sve_load_state(sve_pffr(&current->thread),
 			       &current->thread.uw.fpsimd_state.fpsr,
 			       sve_vq_from_vl(current->thread.sve_vl) - 1);
 	else
@@ -285,7 +285,7 @@ void fpsimd_save(void)
 				return;
 			}
 
-			sve_save_state(sve_pffr(current), &st->fpsr);
+			sve_save_state(sve_pffr(&current->thread), &st->fpsr);
 		} else
 			fpsimd_save_state(st);
 	}
-- 
cgit v1.2.3


From 9a6e594869b29ccec4f99db83c071e4f2dbfc11f Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 12 Apr 2018 17:32:35 +0100
Subject: arm64/sve: Move sve_pffr() to fpsimd.h and make inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to make sve_save_state()/sve_load_state() more easily
reusable and to get rid of a potential branch on context switch
critical paths, this patch makes sve_pffr() inline and moves it to
fpsimd.h.

<asm/processor.h> must be included in fpsimd.h in order to make
this work, and this creates an #include cycle that is tricky to
avoid without modifying core code, due to the way the PR_SVE_*()
prctl helpers are included in the core prctl implementation.

Instead of breaking the cycle, this patch defers inclusion of
<asm/fpsimd.h> in <asm/processor.h> until the point where it is
actually needed: i.e., immediately before the prctl definitions.

No functional change.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/fpsimd.h    | 13 +++++++++++++
 arch/arm64/include/asm/processor.h | 12 +++++++++++-
 arch/arm64/kernel/fpsimd.c         | 12 ------------
 3 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index fb60b22b8bbf..fa92747a49c8 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -18,6 +18,8 @@
 
 #include <asm/ptrace.h>
 #include <asm/errno.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
 
 #ifndef __ASSEMBLY__
 
@@ -61,6 +63,17 @@ extern void sve_flush_cpu_state(void);
 /* Maximum VL that SVE VL-agnostic software can transparently support */
 #define SVE_VL_ARCH_MAX 0x100
 
+/* Offset of FFR in the SVE register dump */
+static inline size_t sve_ffr_offset(int vl)
+{
+	return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
+}
+
+static inline void *sve_pffr(struct thread_struct *thread)
+{
+	return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
+}
+
 extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
 			   unsigned long vq_minus_1);
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9231b8762ca6..c99e657fdd57 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -40,7 +40,6 @@
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <asm/fpsimd.h>
 #include <asm/hw_breakpoint.h>
 #include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
@@ -247,6 +246,17 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused);
 void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
 void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
 
+/*
+ * Not at the top of the file due to a direct #include cycle between
+ * <asm/fpsimd.h> and <asm/processor.h>.  Deferring this #include
+ * ensures that contents of processor.h are visible to fpsimd.h even if
+ * processor.h is included first.
+ *
+ * These prctl helpers are the only things in this file that require
+ * fpsimd.h.  The core code expects them to be in this header.
+ */
+#include <asm/fpsimd.h>
+
 /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
 #define SVE_SET_VL(arg)	sve_set_current_vl(arg)
 #define SVE_GET_VL()	sve_get_current_vl()
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 842b2ad08bec..e60c3a28380f 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -161,18 +161,6 @@ static void sve_free(struct task_struct *task)
 	__sve_free(task);
 }
 
-
-/* Offset of FFR in the SVE register dump */
-static size_t sve_ffr_offset(int vl)
-{
-	return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
-}
-
-static void *sve_pffr(struct thread_struct *thread)
-{
-	return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
-}
-
 static void change_cpacr(u64 val, u64 mask)
 {
 	u64 cpacr = read_sysreg(CPACR_EL1);
-- 
cgit v1.2.3


From 85acda3b4a27ee3e20c54783a44f307b51912c2b Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 20 Apr 2018 16:20:43 +0100
Subject: KVM: arm64: Save host SVE context as appropriate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds SVE context saving to the hyp FPSIMD context switch
path.  This means that it is no longer necessary to save the host
SVE state in advance of entering the guest, when in use.

In order to avoid adding pointless complexity to the code, VHE is
assumed if SVE is in use.  VHE is an architectural prerequisite for
SVE, so there is no good reason to turn CONFIG_ARM64_VHE off in
kernels that support both SVE and KVM.

Historically, software models exist that can expose the
architecturally invalid configuration of SVE without VHE, so if
this situation is detected at kvm_init() time then KVM will be
disabled.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_host.h   |  1 +
 arch/arm64/Kconfig                |  7 +++++++
 arch/arm64/include/asm/kvm_host.h | 13 +++++++++++++
 arch/arm64/kvm/fpsimd.c           |  1 -
 arch/arm64/kvm/hyp/switch.c       | 20 +++++++++++++++++++-
 5 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ac870b2cd5d1..3b85bbb4b23e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -280,6 +280,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
+static inline bool kvm_arch_check_sve_has_vhe(void) { return true; }
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index eb2cf4938f6d..b0d3820081c8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1130,6 +1130,7 @@ endmenu
 config ARM64_SVE
 	bool "ARM Scalable Vector Extension support"
 	default y
+	depends on !KVM || ARM64_VHE
 	help
 	  The Scalable Vector Extension (SVE) is an extension to the AArch64
 	  execution state which complements and extends the SIMD functionality
@@ -1155,6 +1156,12 @@ config ARM64_SVE
 	  booting the kernel.  If unsure and you are not observing these
 	  symptoms, you should assume that it is safe to say Y.
 
+	  CPUs that support SVE are architecturally required to support the
+	  Virtualization Host Extensions (VHE), so the kernel makes no
+	  provision for supporting SVE alongside KVM without VHE enabled.
+	  Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
+	  KVM in the same kernel image.
+
 config ARM64_MODULE_PLTS
 	bool
 	select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b3fe7301bdbe..fda9289f3b9c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -405,6 +405,19 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
 }
 
+static inline bool kvm_arch_check_sve_has_vhe(void)
+{
+	/*
+	 * The Arm architecture specifies that implementation of SVE
+	 * requires VHE also to be implemented.  The KVM code for arm64
+	 * relies on this when SVE is present:
+	 */
+	if (system_supports_sve())
+		return has_vhe();
+	else
+		return true;
+}
+
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 365933a98a7c..dc6ecfa5a2d2 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -59,7 +59,6 @@ error:
  */
 void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 {
-	BUG_ON(system_supports_sve());
 	BUG_ON(!current->mm);
 
 	vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE);
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 118f3002b9ce..a6a8c7d9157d 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -21,6 +21,7 @@
 
 #include <kvm/arm_psci.h>
 
+#include <asm/cpufeature.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
@@ -28,6 +29,7 @@
 #include <asm/kvm_mmu.h>
 #include <asm/fpsimd.h>
 #include <asm/debug-monitors.h>
+#include <asm/processor.h>
 #include <asm/thread_info.h>
 
 /* Check whether the FP regs were dirtied while in the host-side run loop: */
@@ -329,6 +331,8 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
 				    struct kvm_vcpu *vcpu)
 {
+	struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
+
 	if (has_vhe())
 		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
 			     cpacr_el1);
@@ -339,7 +343,21 @@ void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
 	isb();
 
 	if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
-		__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+		/*
+		 * In the SVE case, VHE is assumed: it is enforced by
+		 * Kconfig and kvm_arch_init().
+		 */
+		if (system_supports_sve() &&
+		    (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+			struct thread_struct *thread = container_of(
+				host_fpsimd,
+				struct thread_struct, uw.fpsimd_state);
+
+			sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
+		} else {
+			__fpsimd_save_state(host_fpsimd);
+		}
+
 		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
 	}
 
-- 
cgit v1.2.3


From 21cdd7fd76e3259b06d78c909e9caeb084c04b65 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 20 Apr 2018 17:39:16 +0100
Subject: KVM: arm64: Remove eager host SVE state saving
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that the host SVE context can be saved on demand from Hyp,
there is no longer any need to save this state in advance before
entering the guest.

This patch removes the relevant call to
kvm_fpsimd_flush_cpu_state().

Since the problem that function was intended to solve now no longer
exists, the function and its dependencies are also deleted.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_host.h   |  3 ---
 arch/arm64/include/asm/kvm_host.h | 10 ----------
 arch/arm64/kernel/fpsimd.c        | 21 ---------------------
 3 files changed, 34 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 3b85bbb4b23e..f079a2039c8a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -312,9 +312,6 @@ static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
 
-/* All host FP/SIMD state is restored on guest exit, so nothing to save: */
-static inline void kvm_fpsimd_flush_cpu_state(void) {}
-
 static inline void kvm_arm_vhe_guest_enter(void) {}
 static inline void kvm_arm_vhe_guest_exit(void) {}
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index fda9289f3b9c..a4ca202ff3f2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -457,16 +457,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
 }
 #endif
 
-/*
- * All host FP/SIMD state is restored on guest exit, so nothing needs
- * doing here except in the SVE case:
-*/
-static inline void kvm_fpsimd_flush_cpu_state(void)
-{
-	if (system_supports_sve())
-		sve_flush_cpu_state();
-}
-
 static inline void kvm_arm_vhe_guest_enter(void)
 {
 	local_daif_mask();
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e60c3a28380f..7074c4cd0e0e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -120,7 +120,6 @@
  */
 struct fpsimd_last_state_struct {
 	struct user_fpsimd_state *st;
-	bool sve_in_use;
 };
 
 static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -1008,7 +1007,6 @@ void fpsimd_bind_task_to_cpu(void)
 		this_cpu_ptr(&fpsimd_last_state);
 
 	last->st = &current->thread.uw.fpsimd_state;
-	last->sve_in_use = test_thread_flag(TIF_SVE);
 	current->thread.fpsimd_cpu = smp_processor_id();
 
 	if (system_supports_sve()) {
@@ -1030,7 +1028,6 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
 	WARN_ON(!in_softirq() && !irqs_disabled());
 
 	last->st = st;
-	last->sve_in_use = false;
 }
 
 /*
@@ -1091,24 +1088,6 @@ void fpsimd_flush_cpu_state(void)
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 }
 
-/*
- * Invalidate any task SVE state currently held in this CPU's regs.
- *
- * This is used to prevent the kernel from trying to reuse SVE register data
- * that is detroyed by KVM guest enter/exit.  This function should go away when
- * KVM SVE support is implemented.  Don't use it for anything else.
- */
-#ifdef CONFIG_ARM64_SVE
-void sve_flush_cpu_state(void)
-{
-	struct fpsimd_last_state_struct const *last =
-		this_cpu_ptr(&fpsimd_last_state);
-
-	if (last->st && last->sve_in_use)
-		fpsimd_flush_cpu_state();
-}
-#endif /* CONFIG_ARM64_SVE */
-
 #ifdef CONFIG_KERNEL_MODE_NEON
 
 DEFINE_PER_CPU(bool, kernel_neon_busy);
-- 
cgit v1.2.3


From ba4f4cb0e661ed4c68057d4dd831f54b99770b09 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Wed, 2 May 2018 13:23:07 +0100
Subject: KVM: arm64: Remove redundant *exit_code changes in
 fpsimd_guest_exit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In fixup_guest_exit(), there are a couple of cases where after
checking what the exit code was, we assign it explicitly with the
value it already had.

Assuming this is not indicative of a bug, these assignments are not
needed.

This patch removes the redundant assignments, and simplifies some
if-nesting that becomes trivial as a result.

No functional change.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp/switch.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index a6a8c7d9157d..18d0faa8c806 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -403,12 +403,8 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 		if (valid) {
 			int ret = __vgic_v2_perform_cpuif_access(vcpu);
 
-			if (ret == 1) {
-				if (__skip_instr(vcpu))
-					return true;
-				else
-					*exit_code = ARM_EXCEPTION_TRAP;
-			}
+			if (ret ==  1 && __skip_instr(vcpu))
+				return true;
 
 			if (ret == -1) {
 				/* Promote an illegal access to an
@@ -430,12 +426,8 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 	     kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
 		int ret = __vgic_v3_perform_cpuif_access(vcpu);
 
-		if (ret == 1) {
-			if (__skip_instr(vcpu))
-				return true;
-			else
-				*exit_code = ARM_EXCEPTION_TRAP;
-		}
+		if (ret == 1 && __skip_instr(vcpu))
+			return true;
 	}
 
 	/* Return to the host kernel and handle the exit */
-- 
cgit v1.2.3


From 7846b3119e24fe8d726535d6aa7489253797700c Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Wed, 2 May 2018 13:36:48 +0100
Subject: KVM: arm64: Fold redundant exit code checks out of fixup_guest_exit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The entire tail of fixup_guest_exit() is contained in if statements
of the form if (x && *exit_code == ARM_EXCEPTION_TRAP).  As a result,
we can check just once and bail out of the function early, allowing
the remaining if conditions to be simplified.

The only awkward case is where *exit_code is changed to
ARM_EXCEPTION_EL1_SERROR in the case of an illegal GICv2 CPU
interface access: in that case, the GICv3 trap handling code is
skipped using a goto.  This avoids pointlessly evaluating the
static branch check for the GICv3 case, even though we can't have
vgic_v2_cpuif_trap and vgic_v3_cpuif_trap true simultaneously
unless we have a GICv3 and GICv2 on the host: that sounds stupid,
but I haven't satisfied myself that it can't happen.

No functional change.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp/switch.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 18d0faa8c806..4fbee9502162 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -387,11 +387,13 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 	 * same PC once the SError has been injected, and replay the
 	 * trapping instruction.
 	 */
-	if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+	if (*exit_code != ARM_EXCEPTION_TRAP)
+		goto exit;
+
+	if (!__populate_fault_info(vcpu))
 		return true;
 
-	if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
-	    *exit_code == ARM_EXCEPTION_TRAP) {
+	if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
 		bool valid;
 
 		valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
@@ -417,11 +419,12 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 					*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
 				*exit_code = ARM_EXCEPTION_EL1_SERROR;
 			}
+
+			goto exit;
 		}
 	}
 
 	if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
-	    *exit_code == ARM_EXCEPTION_TRAP &&
 	    (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
 	     kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
 		int ret = __vgic_v3_perform_cpuif_access(vcpu);
@@ -430,6 +433,7 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 			return true;
 	}
 
+exit:
 	/* Return to the host kernel and handle the exit */
 	return false;
 }
-- 
cgit v1.2.3


From cf412b0070221032c02c4564cd11dc83238b2ad2 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Wed, 2 May 2018 14:18:02 +0100
Subject: KVM: arm64: Invoke FPSIMD context switch trap from C
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion of the FPSIMD context switch trap code to C has added
some overhead to calling it, due to the need to save registers that
the procedure call standard defines as caller-saved.

So, perhaps it is no longer worth invoking this trap handler quite
so early.

Instead, we can invoke it from fixup_guest_exit(), with little
likelihood of increasing the overhead much further.

As a convenience, this patch gives __hyp_switch_fpsimd() the same
return semantics fixup_guest_exit().  For now there is no
possibility of a spurious FPSIMD trap, so the function always
returns true, but this allows it to be tail-called with a single
return statement.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp/entry.S     | 30 ------------------------------
 arch/arm64/kvm/hyp/hyp-entry.S | 19 -------------------
 arch/arm64/kvm/hyp/switch.c    | 15 +++++++++++++--
 3 files changed, 13 insertions(+), 51 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 40f349bc1079..fad1e164fe48 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -166,33 +166,3 @@ abort_guest_exit_end:
 	orr	x0, x0, x5
 1:	ret
 ENDPROC(__guest_exit)
-
-ENTRY(__fpsimd_guest_restore)
-	// x0: esr
-	// x1: vcpu
-	// x2-x29,lr: vcpu regs
-	// vcpu x0-x1 on the stack
-	stp	x2, x3, [sp, #-144]!
-	stp	x4, x5, [sp, #16]
-	stp	x6, x7, [sp, #32]
-	stp	x8, x9, [sp, #48]
-	stp	x10, x11, [sp, #64]
-	stp	x12, x13, [sp, #80]
-	stp	x14, x15, [sp, #96]
-	stp	x16, x17, [sp, #112]
-	stp	x18, lr, [sp, #128]
-
-	bl	__hyp_switch_fpsimd
-
-	ldp	x4, x5, [sp, #16]
-	ldp	x6, x7, [sp, #32]
-	ldp	x8, x9, [sp, #48]
-	ldp	x10, x11, [sp, #64]
-	ldp	x12, x13, [sp, #80]
-	ldp	x14, x15, [sp, #96]
-	ldp	x16, x17, [sp, #112]
-	ldp	x18, lr, [sp, #128]
-	ldp	x0, x1, [sp, #144]
-	ldp	x2, x3, [sp], #160
-	eret
-ENDPROC(__fpsimd_guest_restore)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index bffece27b5c1..753b9d213651 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -113,25 +113,6 @@ el1_hvc_guest:
 
 el1_trap:
 	get_vcpu_ptr	x1, x0
-
-	mrs		x0, esr_el2
-	lsr		x0, x0, #ESR_ELx_EC_SHIFT
-	/*
-	 * x0: ESR_EC
-	 * x1: vcpu pointer
-	 */
-
-	/*
-	 * We trap the first access to the FP/SIMD to save the host context
-	 * and restore the guest context lazily.
-	 * If FP/SIMD is not implemented, handle the trap and inject an
-	 * undefined instruction exception to the guest.
-	 */
-alternative_if_not ARM64_HAS_NO_FPSIMD
-	cmp	x0, #ESR_ELx_EC_FP_ASIMD
-	b.eq	__fpsimd_guest_restore
-alternative_else_nop_endif
-
 	mov	x0, #ARM_EXCEPTION_TRAP
 	b	__guest_exit
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 4fbee9502162..2d45bd719a5d 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -328,8 +328,7 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	}
 }
 
-void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
-				    struct kvm_vcpu *vcpu)
+static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
 {
 	struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
 
@@ -369,6 +368,8 @@ void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
 			     fpexc32_el2);
 
 	vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+
+	return true;
 }
 
 /*
@@ -390,6 +391,16 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 	if (*exit_code != ARM_EXCEPTION_TRAP)
 		goto exit;
 
+	/*
+	 * We trap the first access to the FP/SIMD to save the host context
+	 * and restore the guest context lazily.
+	 * If FP/SIMD is not implemented, handle the trap and inject an
+	 * undefined instruction exception to the guest.
+	 */
+	if (system_supports_fpsimd() &&
+	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
+		return __hyp_switch_fpsimd(vcpu);
+
 	if (!__populate_fault_info(vcpu))
 		return true;
 
-- 
cgit v1.2.3


From 6e4076735d5eb45fc00e8ad0338f7974ef7147a4 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 22 May 2018 09:55:16 +0200
Subject: KVM: arm/arm64: Add KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION

This new attribute allows the userspace to set the base address
of a reditributor region, relaxing the constraint of having all
consecutive redistibutor frames contiguous.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/uapi/asm/kvm.h   | 1 +
 arch/arm64/include/uapi/asm/kvm.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index caae4843cb70..16e006f708ca 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -91,6 +91,7 @@ struct kvm_regs {
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
 #define KVM_VGIC_ITS_ADDR_TYPE		4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION	5
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 04b3256f8e6d..4e76630dd655 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -91,6 +91,7 @@ struct kvm_regs {
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
 #define KVM_VGIC_ITS_ADDR_TYPE		4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION	5
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
-- 
cgit v1.2.3


From c9c92bee533073e2c3999200cfcff2a606ac8534 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 16 May 2018 17:21:24 +0200
Subject: x86/hyper-v: move struct hv_flush_pcpu{,ex} definitions to common
 header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hyper-V TLB flush hypercalls definitions will be required for KVM so move
them hyperv-tlfs.h. Structures also need to be renamed as '_pcpu' suffix is
irrelevant for a general-purpose definition.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/hyperv/mmu.c              | 28 ++++++----------------------
 arch/x86/include/asm/hyperv-tlfs.h | 16 ++++++++++++++++
 arch/x86/include/asm/mshyperv.h    |  2 +-
 3 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 5f053d7d1bd9..de27615c51ea 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -13,22 +13,6 @@
 #define CREATE_TRACE_POINTS
 #include <asm/trace/hyperv.h>
 
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_flush_pcpu {
-	u64 address_space;
-	u64 flags;
-	u64 processor_mask;
-	u64 gva_list[];
-};
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_flush_pcpu_ex {
-	u64 address_space;
-	u64 flags;
-	struct hv_vpset hv_vp_set;
-	u64 gva_list[];
-};
-
 /* Each gva in gva_list encodes up to 4096 pages to flush */
 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
 
@@ -67,8 +51,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 				    const struct flush_tlb_info *info)
 {
 	int cpu, vcpu, gva_n, max_gvas;
-	struct hv_flush_pcpu **flush_pcpu;
-	struct hv_flush_pcpu *flush;
+	struct hv_tlb_flush **flush_pcpu;
+	struct hv_tlb_flush *flush;
 	u64 status = U64_MAX;
 	unsigned long flags;
 
@@ -82,7 +66,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 
 	local_irq_save(flags);
 
-	flush_pcpu = (struct hv_flush_pcpu **)
+	flush_pcpu = (struct hv_tlb_flush **)
 		     this_cpu_ptr(hyperv_pcpu_input_arg);
 
 	flush = *flush_pcpu;
@@ -152,8 +136,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 				       const struct flush_tlb_info *info)
 {
 	int nr_bank = 0, max_gvas, gva_n;
-	struct hv_flush_pcpu_ex **flush_pcpu;
-	struct hv_flush_pcpu_ex *flush;
+	struct hv_tlb_flush_ex **flush_pcpu;
+	struct hv_tlb_flush_ex *flush;
 	u64 status = U64_MAX;
 	unsigned long flags;
 
@@ -167,7 +151,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 
 	local_irq_save(flags);
 
-	flush_pcpu = (struct hv_flush_pcpu_ex **)
+	flush_pcpu = (struct hv_tlb_flush_ex **)
 		     this_cpu_ptr(hyperv_pcpu_input_arg);
 
 	flush = *flush_pcpu;
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index ff3f56bdd5d7..b8c89265baf0 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -741,4 +741,20 @@ struct ipi_arg_ex {
 	struct hv_vpset vp_set;
 };
 
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+	u64 address_space;
+	u64 flags;
+	u64 processor_mask;
+	u64 gva_list[];
+};
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+	u64 address_space;
+	u64 flags;
+	struct hv_vpset hv_vp_set;
+	u64 gva_list[];
+};
+
 #endif
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 997192131b7b..3cd14311edfa 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -269,7 +269,7 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
 		return 0;
 
 	/*
-	 * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+	 * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex
 	 * structs are not cleared between calls, we risk flushing unneeded
 	 * vCPUs otherwise.
 	 */
-- 
cgit v1.2.3


From 142c95da92e847312f4d32cc8870719fe335d121 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 16 May 2018 17:21:26 +0200
Subject: KVM: x86: hyperv: use defines when parsing hypercall parameters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid open-coding offsets for hypercall input parameters, we already
have defines for them.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/hyperv.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 5708e951a5c6..dcfeae2deafa 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1341,9 +1341,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 #endif
 
 	code = param & 0xffff;
-	fast = (param >> 16) & 0x1;
-	rep_cnt = (param >> 32) & 0xfff;
-	rep_idx = (param >> 48) & 0xfff;
+	fast = !!(param & HV_HYPERCALL_FAST_BIT);
+	rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
+	rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
 
 	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
 
-- 
cgit v1.2.3


From 56b9ae78303a963dc7ea85b20e99379efb346cd8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 16 May 2018 17:21:27 +0200
Subject: KVM: x86: hyperv: do rep check for each hypercall separately
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prepare to support TLB flush hypercalls, some of which are REP hypercalls.
Also, return HV_STATUS_INVALID_HYPERCALL_INPUT as it seems more
appropriate.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/hyperv.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index dcfeae2deafa..edb1ac44d628 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1311,7 +1311,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
 	u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
 	uint16_t code, rep_idx, rep_cnt;
-	bool fast, longmode;
+	bool fast, longmode, rep;
 
 	/*
 	 * hypercall generates UD from non zero cpl and real mode
@@ -1344,28 +1344,31 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	fast = !!(param & HV_HYPERCALL_FAST_BIT);
 	rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
 	rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
+	rep = !!(rep_cnt || rep_idx);
 
 	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
 
-	/* Hypercall continuation is not supported yet */
-	if (rep_cnt || rep_idx) {
-		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
-		goto set_result;
-	}
-
 	switch (code) {
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
+		if (unlikely(rep)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		}
 		kvm_vcpu_on_spin(vcpu, true);
 		break;
 	case HVCALL_SIGNAL_EVENT:
+		if (unlikely(rep)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		}
 		ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
 		if (ret != HV_STATUS_INVALID_PORT_ID)
 			break;
 		/* maybe userspace knows this conn_id: fall through */
 	case HVCALL_POST_MESSAGE:
 		/* don't bother userspace if it has no way to handle it */
-		if (!vcpu_to_synic(vcpu)->active) {
-			ret = HV_STATUS_INVALID_HYPERCALL_CODE;
+		if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
 		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
-- 
cgit v1.2.3


From e2f11f42824bf2d906468a94888718ae24bf0270 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 16 May 2018 17:21:29 +0200
Subject: KVM: x86: hyperv: simplistic
 HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement HvFlushVirtualAddress{List,Space} hypercalls in a simplistic way:
do full TLB flush with KVM_REQ_TLB_FLUSH and kick vCPUs which are currently
IN_GUEST_MODE.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/hyperv.c           | 58 ++++++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/trace.h            | 24 +++++++++++++++++
 3 files changed, 82 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b27de80f5870..0ebe659f2802 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -477,6 +477,7 @@ struct kvm_vcpu_hv {
 	struct kvm_hyperv_exit exit;
 	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
 	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
+	cpumask_t tlb_lush;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index edb1ac44d628..0d916606519d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1242,6 +1242,49 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		return kvm_hv_get_msr(vcpu, msr, pdata);
 }
 
+static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
+			    u16 rep_cnt)
+{
+	struct kvm *kvm = current_vcpu->kvm;
+	struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
+	struct hv_tlb_flush flush;
+	struct kvm_vcpu *vcpu;
+	unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
+	int i;
+
+	if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
+		return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+	trace_kvm_hv_flush_tlb(flush.processor_mask, flush.address_space,
+			       flush.flags);
+
+	cpumask_clear(&hv_current->tlb_lush);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+		if (!(flush.flags & HV_FLUSH_ALL_PROCESSORS) &&
+		    (hv->vp_index >= 64 ||
+		    !(flush.processor_mask & BIT_ULL(hv->vp_index))))
+			continue;
+
+		/*
+		 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
+		 * can't analyze it here, flush TLB regardless of the specified
+		 * address space.
+		 */
+		__set_bit(i, vcpu_bitmap);
+	}
+
+	kvm_make_vcpus_request_mask(kvm,
+				    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
+				    vcpu_bitmap, &hv_current->tlb_lush);
+
+	/* We always do full TLB flush, set rep_done = rep_cnt. */
+	return (u64)HV_STATUS_SUCCESS |
+		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+}
+
 bool kvm_hv_hypercall_enabled(struct kvm *kvm)
 {
 	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@ -1379,12 +1422,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		vcpu->arch.complete_userspace_io =
 				kvm_hv_hypercall_complete_userspace;
 		return 0;
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+		if (unlikely(fast || !rep_cnt || rep_idx)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		}
+		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt);
+		break;
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+		if (unlikely(fast || rep)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		}
+		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt);
+		break;
 	default:
 		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
 		break;
 	}
 
-set_result:
 	kvm_hv_hypercall_set_result(vcpu, ret);
 	return 1;
 }
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 9807c314c478..47a4fd758743 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1367,6 +1367,30 @@ TRACE_EVENT(kvm_hv_timer_state,
 			__entry->vcpu_id,
 			__entry->hv_timer_in_use)
 );
+
+/*
+ * Tracepoint for kvm_hv_flush_tlb.
+ */
+TRACE_EVENT(kvm_hv_flush_tlb,
+	TP_PROTO(u64 processor_mask, u64 address_space, u64 flags),
+	TP_ARGS(processor_mask, address_space, flags),
+
+	TP_STRUCT__entry(
+		__field(u64, processor_mask)
+		__field(u64, address_space)
+		__field(u64, flags)
+	),
+
+	TP_fast_assign(
+		__entry->processor_mask = processor_mask;
+		__entry->address_space = address_space;
+		__entry->flags = flags;
+	),
+
+	TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx",
+		  __entry->processor_mask, __entry->address_space,
+		  __entry->flags)
+);
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From c70126764bf09c5dd95527808b647ec347b8a822 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 16 May 2018 17:21:30 +0200
Subject: KVM: x86: hyperv: simplistic
 HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}_EX implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement HvFlushVirtualAddress{List,Space}Ex hypercalls in the same way
we've implemented non-EX counterparts.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
[Initialized valid_bank_mask to silence misguided GCC warnigs. - Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/hyperv.c | 110 ++++++++++++++++++++++++++++++++++++++++++++------
 arch/x86/kvm/trace.h  |  27 +++++++++++++
 2 files changed, 125 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 0d916606519d..14e0d0ae4e0a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1242,31 +1242,102 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		return kvm_hv_get_msr(vcpu, msr, pdata);
 }
 
+static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
+{
+	int i = 0, j;
+
+	if (!(valid_bank_mask & BIT_ULL(bank_no)))
+		return -1;
+
+	for (j = 0; j < bank_no; j++)
+		if (valid_bank_mask & BIT_ULL(j))
+			i++;
+
+	return i;
+}
+
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
-			    u16 rep_cnt)
+			    u16 rep_cnt, bool ex)
 {
 	struct kvm *kvm = current_vcpu->kvm;
 	struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
+	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
 	struct kvm_vcpu *vcpu;
 	unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
-	int i;
+	unsigned long valid_bank_mask = 0;
+	u64 sparse_banks[64];
+	int sparse_banks_len, i;
+	bool all_cpus;
 
-	if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
-		return HV_STATUS_INVALID_HYPERCALL_INPUT;
+	if (!ex) {
+		if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
+			return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
-	trace_kvm_hv_flush_tlb(flush.processor_mask, flush.address_space,
-			       flush.flags);
+		trace_kvm_hv_flush_tlb(flush.processor_mask,
+				       flush.address_space, flush.flags);
+
+		sparse_banks[0] = flush.processor_mask;
+		all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+	} else {
+		if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+					    sizeof(flush_ex))))
+			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+		trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
+					  flush_ex.hv_vp_set.format,
+					  flush_ex.address_space,
+					  flush_ex.flags);
+
+		valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
+		all_cpus = flush_ex.hv_vp_set.format !=
+			HV_GENERIC_SET_SPARSE_4K;
+
+		sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+			sizeof(sparse_banks[0]);
+
+		if (!sparse_banks_len && !all_cpus)
+			goto ret_success;
+
+		if (!all_cpus &&
+		    kvm_read_guest(kvm,
+				   ingpa + offsetof(struct hv_tlb_flush_ex,
+						    hv_vp_set.bank_contents),
+				   sparse_banks,
+				   sparse_banks_len))
+			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+	}
 
 	cpumask_clear(&hv_current->tlb_lush);
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+		int bank = hv->vp_index / 64, sbank = 0;
+
+		if (!all_cpus) {
+			/* Banks >64 can't be represented */
+			if (bank >= 64)
+				continue;
+
+			/* Non-ex hypercalls can only address first 64 vCPUs */
+			if (!ex && bank)
+				continue;
+
+			if (ex) {
+				/*
+				 * Check is the bank of this vCPU is in sparse
+				 * set and get the sparse bank number.
+				 */
+				sbank = get_sparse_bank_no(valid_bank_mask,
+							   bank);
+
+				if (sbank < 0)
+					continue;
+			}
 
-		if (!(flush.flags & HV_FLUSH_ALL_PROCESSORS) &&
-		    (hv->vp_index >= 64 ||
-		    !(flush.processor_mask & BIT_ULL(hv->vp_index))))
-			continue;
+			if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
+				continue;
+		}
 
 		/*
 		 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
@@ -1280,6 +1351,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
 				    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
 				    vcpu_bitmap, &hv_current->tlb_lush);
 
+ret_success:
 	/* We always do full TLB flush, set rep_done = rep_cnt. */
 	return (u64)HV_STATUS_SUCCESS |
 		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
@@ -1427,14 +1499,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt);
+		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
 		break;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
 		if (unlikely(fast || rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt);
+		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+		break;
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+		if (unlikely(fast || !rep_cnt || rep_idx)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		}
+		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+		break;
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+		if (unlikely(fast || rep)) {
+			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		}
+		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
 		break;
 	default:
 		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 47a4fd758743..0f997683404f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1391,6 +1391,33 @@ TRACE_EVENT(kvm_hv_flush_tlb,
 		  __entry->processor_mask, __entry->address_space,
 		  __entry->flags)
 );
+
+/*
+ * Tracepoint for kvm_hv_flush_tlb_ex.
+ */
+TRACE_EVENT(kvm_hv_flush_tlb_ex,
+	TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags),
+	TP_ARGS(valid_bank_mask, format, address_space, flags),
+
+	TP_STRUCT__entry(
+		__field(u64, valid_bank_mask)
+		__field(u64, format)
+		__field(u64, address_space)
+		__field(u64, flags)
+	),
+
+	TP_fast_assign(
+		__entry->valid_bank_mask = valid_bank_mask;
+		__entry->format = format;
+		__entry->address_space = address_space;
+		__entry->flags = flags;
+	),
+
+	TP_printk("valid_bank_mask 0x%llx format 0x%llx "
+		  "address_space 0x%llx flags 0x%llx",
+		  __entry->valid_bank_mask, __entry->format,
+		  __entry->address_space, __entry->flags)
+);
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From c1aea9196ef4f6b64a8ef7e62a933f7e4164aed9 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 16 May 2018 17:21:31 +0200
Subject: KVM: x86: hyperv: declare KVM_CAP_HYPERV_TLBFLUSH capability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need a new capability to indicate support for the newly added
HvFlushVirtualAddress{List,Space}{,Ex} hypercalls. Upon seeing this
capability, userspace is supposed to announce PV TLB flush features
by setting the appropriate CPUID bits (if needed).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b7bf9ac9b6d1..22bd20fedd6d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2871,6 +2871,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_SYNIC2:
 	case KVM_CAP_HYPERV_VP_INDEX:
 	case KVM_CAP_HYPERV_EVENTFD:
+	case KVM_CAP_HYPERV_TLBFLUSH:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
-- 
cgit v1.2.3


From 9617a0b33569e79567300765d659c88c1f556c5d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Wed, 30 May 2018 15:47:17 +1000
Subject: KVM: PPC: Book3S PR: Allow KVM_PPC_CONFIGURE_V3_MMU to succeed

Currently, PR KVM does not implement the configure_mmu operation, and
so the KVM_PPC_CONFIGURE_V3_MMU ioctl always fails with an EINVAL
error.  This causes recent kernels to fail to boot as a PR KVM guest
on POWER9, since recent kernels booted in HPT mode do the
H_REGISTER_PROC_TBL hypercall, which causes userspace (QEMU) to do
KVM_PPC_CONFIGURE_V3_MMU, which fails.

This implements a minimal configure_mmu operation for PR KVM.  It
succeeds only if the MMU is being configured for HPT mode and no
process table is being registered.  This is enough to get recent
kernels to boot as a PR KVM guest.

Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c74a8885427d..b1aff9f83ed0 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1687,6 +1687,17 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
 
 	return 0;
 }
+
+static int kvm_configure_mmu_pr(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return -ENODEV;
+	/* Require flags and process table base and size to all be zero. */
+	if (cfg->flags || cfg->process_table)
+		return -EINVAL;
+	return 0;
+}
+
 #else
 static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
 					 struct kvm_ppc_smmu_info *info)
@@ -1788,6 +1799,7 @@ static struct kvmppc_ops kvm_ops_pr = {
 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
 #ifdef CONFIG_PPC_BOOK3S_64
 	.hcall_implemented = kvmppc_hcall_impl_pr,
+	.configure_mmu = kvm_configure_mmu_pr,
 #endif
 	.giveup_ext = kvmppc_giveup_ext,
 };
-- 
cgit v1.2.3


From 7b0e827c6970e8ca77c60ae87592204c39e41245 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Wed, 30 May 2018 20:07:52 +1000
Subject: KVM: PPC: Book3S HV: Factor fake-suspend handling out of
 kvmppc_save/restore_tm

This splits out the handling of "fake suspend" mode, part of the
hypervisor TM assist code for POWER9, and puts almost all of it in
new kvmppc_save_tm_hv and kvmppc_restore_tm_hv functions.  The new
functions branch to kvmppc_save/restore_tm if the CPU does not
require hypervisor TM assistance.

With this, it will be more straightforward to move kvmppc_save_tm and
kvmppc_restore_tm to another file and use them for transactional
memory support in PR KVM.  Additionally, it also makes the code a
bit clearer and reduces the number of feature sections.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 195 +++++++++++++++++++++-----------
 1 file changed, 126 insertions(+), 69 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5e6e493e065e..bfca999695f1 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -795,7 +795,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
-	bl	kvmppc_restore_tm
+	bl	kvmppc_restore_tm_hv
 91:
 #endif
 
@@ -1779,7 +1779,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
-	bl	kvmppc_save_tm
+	bl	kvmppc_save_tm_hv
 91:
 #endif
 
@@ -2683,7 +2683,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
 	ld	r9, HSTATE_KVM_VCPU(r13)
-	bl	kvmppc_save_tm
+	bl	kvmppc_save_tm_hv
 91:
 #endif
 
@@ -2801,7 +2801,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
-	bl	kvmppc_restore_tm
+	bl	kvmppc_restore_tm_hv
 91:
 #endif
 
@@ -3126,7 +3126,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
  * This can modify all checkpointed registers, but
  * restores r1, r2 and r9 (vcpu pointer) before exit.
  */
-kvmppc_save_tm:
+kvmppc_save_tm_hv:
+	/* See if we need to handle fake suspend mode */
+BEGIN_FTR_SECTION
+	b	kvmppc_save_tm
+END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+
+	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
+	cmpwi	r0, 0
+	beq	kvmppc_save_tm
+
+	/* The following code handles the fake_suspend = 1 case */
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
 	stdu	r1, -PPC_MIN_STKFRM(r1)
@@ -3137,59 +3147,37 @@ kvmppc_save_tm:
 	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
 	mtmsrd	r8
 
-	ld	r5, VCPU_MSR(r9)
-	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
-	beq	1f	/* TM not active in guest. */
-
-	std	r1, HSTATE_HOST_R1(r13)
-	li	r3, TM_CAUSE_KVM_RESCHED
-
-BEGIN_FTR_SECTION
-	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
-	cmpwi	r0, 0
-	beq	3f
 	rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
 	beq	4f
-BEGIN_FTR_SECTION_NESTED(96)
+BEGIN_FTR_SECTION
 	bl	pnv_power9_force_smt4_catch
-END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
 	nop
-	b	6f
-3:
-	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
-	mfspr	r6, SPRN_TEXASR
-	std	r6, VCPU_ORIG_TEXASR(r9)
-6:
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 
-	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
+	std	r1, HSTATE_HOST_R1(r13)
+
+	/* Clear the MSR RI since r1, r13 may be foobar. */
 	li	r5, 0
 	mtmsrd	r5, 1
 
-	/* All GPRs are volatile at this point. */
+	/* We have to treclaim here because that's the only way to do S->N */
+	li	r3, TM_CAUSE_KVM_RESCHED
 	TRECLAIM(R3)
 
-	/* Temporarily store r13 and r9 so we have some regs to play with */
-	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-	std	r9, PACATMSCRATCH(r13)
-
-	/* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */
-BEGIN_FTR_SECTION
-	lbz	r9, HSTATE_FAKE_SUSPEND(r13)
-	cmpwi	r9, 0
-	beq	2f
 	/*
 	 * We were in fake suspend, so we are not going to save the
 	 * register state as the guest checkpointed state (since
 	 * we already have it), therefore we can now use any volatile GPR.
 	 */
-	/* Reload stack pointer and TOC. */
+	/* Reload PACA pointer, stack pointer and TOC. */
+	GET_PACA(r13)
 	ld	r1, HSTATE_HOST_R1(r13)
 	ld	r2, PACATOC(r13)
+
 	/* Set MSR RI now we have r1 and r13 back. */
 	li	r5, MSR_RI
 	mtmsrd	r5, 1
+
 	HMT_MEDIUM
 	ld	r6, HSTATE_DSCR(r13)
 	mtspr	SPRN_DSCR, r6
@@ -3204,12 +3192,53 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
 	li	r0, PSSCR_FAKE_SUSPEND
 	andc	r3, r3, r0
 	mtspr	SPRN_PSSCR, r3
-	ld	r9, HSTATE_KVM_VCPU(r13)
+
 	/* Don't save TEXASR, use value from last exit in real suspend state */
-	b	11f
-2:
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	mfspr	r5, SPRN_TFHAR
+	mfspr	r6, SPRN_TFIAR
+	std	r5, VCPU_TFHAR(r9)
+	std	r6, VCPU_TFIAR(r9)
+
+	addi	r1, r1, PPC_MIN_STKFRM
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+kvmppc_save_tm:
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+
+	/* Turn on TM. */
+	mfmsr	r8
+	li	r0, 1
+	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r8
+
+	ld	r5, VCPU_MSR(r9)
+	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	beq	1f	/* TM not active in guest. */
+
+	std	r1, HSTATE_HOST_R1(r13)
+	li	r3, TM_CAUSE_KVM_RESCHED
+
+BEGIN_FTR_SECTION
+	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
+	mfspr	r6, SPRN_TEXASR
+	std	r6, VCPU_ORIG_TEXASR(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 
+	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/* All GPRs are volatile at this point. */
+	TRECLAIM(R3)
+
+	/* Temporarily store r13 and r9 so we have some regs to play with */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
+	std	r9, PACATMSCRATCH(r13)
 	ld	r9, HSTATE_KVM_VCPU(r13)
 
 	/* Get a few more GPRs free. */
@@ -3288,7 +3317,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	std	r5, VCPU_TFHAR(r9)
 	std	r6, VCPU_TFIAR(r9)
 
-	addi	r1, r1, PPC_MIN_STKFRM
 	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
@@ -3299,6 +3327,61 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
  * This potentially modifies all checkpointed registers.
  * It restores r1, r2, r4 from the PACA.
  */
+kvmppc_restore_tm_hv:
+	/*
+	 * If we are doing TM emulation for the guest on a POWER9 DD2,
+	 * then we don't actually do a trechkpt -- we either set up
+	 * fake-suspend mode, or emulate a TM rollback.
+	 */
+BEGIN_FTR_SECTION
+	b	kvmppc_restore_tm
+END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+
+	li	r0, 0
+	stb	r0, HSTATE_FAKE_SUSPEND(r13)
+
+	/* Turn on TM so we can restore TM SPRs */
+	mfmsr	r5
+	li	r0, 1
+	rldimi	r5, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r5
+
+	/*
+	 * The user may change these outside of a transaction, so they must
+	 * always be context switched.
+	 */
+	ld	r5, VCPU_TFHAR(r4)
+	ld	r6, VCPU_TFIAR(r4)
+	ld	r7, VCPU_TEXASR(r4)
+	mtspr	SPRN_TFHAR, r5
+	mtspr	SPRN_TFIAR, r6
+	mtspr	SPRN_TEXASR, r7
+
+	ld	r5, VCPU_MSR(r4)
+	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	beqlr		/* TM not active in guest */
+
+	/* Make sure the failure summary is set */
+	oris	r7, r7, (TEXASR_FS)@h
+	mtspr	SPRN_TEXASR, r7
+
+	cmpwi	r5, 1		/* check for suspended state */
+	bgt	10f
+	stb	r5, HSTATE_FAKE_SUSPEND(r13)
+	b	9f		/* and return */
+10:	stdu	r1, -PPC_MIN_STKFRM(r1)
+	/* guest is in transactional state, so simulate rollback */
+	mr	r3, r4
+	bl	kvmhv_emulate_tm_rollback
+	nop
+	ld      r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
+	addi	r1, r1, PPC_MIN_STKFRM
+9:	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
 kvmppc_restore_tm:
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
@@ -3323,8 +3406,6 @@ kvmppc_restore_tm:
 	mtspr	SPRN_TFIAR, r6
 	mtspr	SPRN_TEXASR, r7
 
-	li	r0, 0
-	stb	r0, HSTATE_FAKE_SUSPEND(r13)
 	ld	r5, VCPU_MSR(r4)
 	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
 	beqlr		/* TM not active in guest */
@@ -3338,15 +3419,6 @@ kvmppc_restore_tm:
 	oris	r7, r7, (TEXASR_FS)@h
 	mtspr	SPRN_TEXASR, r7
 
-	/*
-	 * If we are doing TM emulation for the guest on a POWER9 DD2,
-	 * then we don't actually do a trechkpt -- we either set up
-	 * fake-suspend mode, or emulate a TM rollback.
-	 */
-BEGIN_FTR_SECTION
-	b	.Ldo_tm_fake_load
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
-
 	/*
 	 * We need to load up the checkpointed state for the guest.
 	 * We need to do this early as it will blow away any GPRs, VSRs and
@@ -3419,25 +3491,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	/* Set the MSR RI since we have our registers back. */
 	li	r5, MSR_RI
 	mtmsrd	r5, 1
-9:
 	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
-
-.Ldo_tm_fake_load:
-	cmpwi	r5, 1		/* check for suspended state */
-	bgt	10f
-	stb	r5, HSTATE_FAKE_SUSPEND(r13)
-	b	9b		/* and return */
-10:	stdu	r1, -PPC_MIN_STKFRM(r1)
-	/* guest is in transactional state, so simulate rollback */
-	mr	r3, r4
-	bl	kvmhv_emulate_tm_rollback
-	nop
-	ld      r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
-	addi	r1, r1, PPC_MIN_STKFRM
-	b	9b
-#endif
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 /*
  * We come here if we get any exception or interrupt while we are
-- 
cgit v1.2.3


From 009c872a8bc4d38f487a9bd62423d019e4322517 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:47 +0800
Subject: KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to
 separate file

It is a simple patch just for moving kvmppc_save_tm/kvmppc_restore_tm()
functionalities to tm.S. There is no logic change. The reconstruct of
those APIs will be done in later patches to improve readability.

It is for preparation of reusing those APIs on both HV/PR PPC KVM.

Some slight change during move the functions includes:
- surrounds some HV KVM specific code with CONFIG_KVM_BOOK3S_HV_POSSIBLE
for compilation.
- use _GLOBAL() to define kvmppc_save_tm/kvmppc_restore_tm()

[paulus@ozlabs.org - rebased on top of 7b0e827c6970 ("KVM: PPC: Book3S HV:
 Factor fake-suspend handling out of kvmppc_save/restore_tm", 2018-05-30)]

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/Makefile               |   3 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 231 --------------------------
 arch/powerpc/kvm/tm.S                   | 279 ++++++++++++++++++++++++++++++++
 3 files changed, 282 insertions(+), 231 deletions(-)
 create mode 100644 arch/powerpc/kvm/tm.S

(limited to 'arch')

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4b19da8c87ae..f872c04bb5b1 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -63,6 +63,9 @@ kvm-pr-y := \
 	book3s_64_mmu.o \
 	book3s_32_mmu.o
 
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+	tm.o
+
 ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	book3s_rmhandlers.o
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bfca999695f1..8e016598692e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -39,8 +39,6 @@ BEGIN_FTR_SECTION;				\
 	extsw	reg, reg;			\
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
-#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
-
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE	1
 #define NAPPING_NOVCPU	2
@@ -3205,122 +3203,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
 	mtlr	r0
 	blr
 
-kvmppc_save_tm:
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-
-	/* Turn on TM. */
-	mfmsr	r8
-	li	r0, 1
-	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
-	mtmsrd	r8
-
-	ld	r5, VCPU_MSR(r9)
-	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
-	beq	1f	/* TM not active in guest. */
-
-	std	r1, HSTATE_HOST_R1(r13)
-	li	r3, TM_CAUSE_KVM_RESCHED
-
-BEGIN_FTR_SECTION
-	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
-	mfspr	r6, SPRN_TEXASR
-	std	r6, VCPU_ORIG_TEXASR(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
-
-	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
-	li	r5, 0
-	mtmsrd	r5, 1
-
-	/* All GPRs are volatile at this point. */
-	TRECLAIM(R3)
-
-	/* Temporarily store r13 and r9 so we have some regs to play with */
-	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-	std	r9, PACATMSCRATCH(r13)
-	ld	r9, HSTATE_KVM_VCPU(r13)
-
-	/* Get a few more GPRs free. */
-	std	r29, VCPU_GPRS_TM(29)(r9)
-	std	r30, VCPU_GPRS_TM(30)(r9)
-	std	r31, VCPU_GPRS_TM(31)(r9)
-
-	/* Save away PPR and DSCR soon so don't run with user values. */
-	mfspr	r31, SPRN_PPR
-	HMT_MEDIUM
-	mfspr	r30, SPRN_DSCR
-	ld	r29, HSTATE_DSCR(r13)
-	mtspr	SPRN_DSCR, r29
-
-	/* Save all but r9, r13 & r29-r31 */
-	reg = 0
-	.rept	29
-	.if (reg != 9) && (reg != 13)
-	std	reg, VCPU_GPRS_TM(reg)(r9)
-	.endif
-	reg = reg + 1
-	.endr
-	/* ... now save r13 */
-	GET_SCRATCH0(r4)
-	std	r4, VCPU_GPRS_TM(13)(r9)
-	/* ... and save r9 */
-	ld	r4, PACATMSCRATCH(r13)
-	std	r4, VCPU_GPRS_TM(9)(r9)
-
-	/* Reload stack pointer and TOC. */
-	ld	r1, HSTATE_HOST_R1(r13)
-	ld	r2, PACATOC(r13)
-
-	/* Set MSR RI now we have r1 and r13 back. */
-	li	r5, MSR_RI
-	mtmsrd	r5, 1
-
-	/* Save away checkpinted SPRs. */
-	std	r31, VCPU_PPR_TM(r9)
-	std	r30, VCPU_DSCR_TM(r9)
-	mflr	r5
-	mfcr	r6
-	mfctr	r7
-	mfspr	r8, SPRN_AMR
-	mfspr	r10, SPRN_TAR
-	mfxer	r11
-	std	r5, VCPU_LR_TM(r9)
-	stw	r6, VCPU_CR_TM(r9)
-	std	r7, VCPU_CTR_TM(r9)
-	std	r8, VCPU_AMR_TM(r9)
-	std	r10, VCPU_TAR_TM(r9)
-	std	r11, VCPU_XER_TM(r9)
-
-	/* Restore r12 as trap number. */
-	lwz	r12, VCPU_TRAP(r9)
-
-	/* Save FP/VSX. */
-	addi	r3, r9, VCPU_FPRS_TM
-	bl	store_fp_state
-	addi	r3, r9, VCPU_VRS_TM
-	bl	store_vr_state
-	mfspr	r6, SPRN_VRSAVE
-	stw	r6, VCPU_VRSAVE_TM(r9)
-1:
-	/*
-	 * We need to save these SPRs after the treclaim so that the software
-	 * error code is recorded correctly in the TEXASR.  Also the user may
-	 * change these outside of a transaction, so they must always be
-	 * context switched.
-	 */
-	mfspr	r7, SPRN_TEXASR
-	std	r7, VCPU_TEXASR(r9)
-11:
-	mfspr	r5, SPRN_TFHAR
-	mfspr	r6, SPRN_TFIAR
-	std	r5, VCPU_TFHAR(r9)
-	std	r6, VCPU_TFIAR(r9)
-
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
-
 /*
  * Restore transactional state and TM-related registers.
  * Called with r4 pointing to the vcpu struct.
@@ -3381,119 +3263,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
 9:	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
-
-kvmppc_restore_tm:
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-
-	/* Turn on TM/FP/VSX/VMX so we can restore them. */
-	mfmsr	r5
-	li	r6, MSR_TM >> 32
-	sldi	r6, r6, 32
-	or	r5, r5, r6
-	ori	r5, r5, MSR_FP
-	oris	r5, r5, (MSR_VEC | MSR_VSX)@h
-	mtmsrd	r5
-
-	/*
-	 * The user may change these outside of a transaction, so they must
-	 * always be context switched.
-	 */
-	ld	r5, VCPU_TFHAR(r4)
-	ld	r6, VCPU_TFIAR(r4)
-	ld	r7, VCPU_TEXASR(r4)
-	mtspr	SPRN_TFHAR, r5
-	mtspr	SPRN_TFIAR, r6
-	mtspr	SPRN_TEXASR, r7
-
-	ld	r5, VCPU_MSR(r4)
-	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
-	beqlr		/* TM not active in guest */
-	std	r1, HSTATE_HOST_R1(r13)
-
-	/* Make sure the failure summary is set, otherwise we'll program check
-	 * when we trechkpt.  It's possible that this might have been not set
-	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
-	 * host.
-	 */
-	oris	r7, r7, (TEXASR_FS)@h
-	mtspr	SPRN_TEXASR, r7
-
-	/*
-	 * We need to load up the checkpointed state for the guest.
-	 * We need to do this early as it will blow away any GPRs, VSRs and
-	 * some SPRs.
-	 */
-
-	mr	r31, r4
-	addi	r3, r31, VCPU_FPRS_TM
-	bl	load_fp_state
-	addi	r3, r31, VCPU_VRS_TM
-	bl	load_vr_state
-	mr	r4, r31
-	lwz	r7, VCPU_VRSAVE_TM(r4)
-	mtspr	SPRN_VRSAVE, r7
-
-	ld	r5, VCPU_LR_TM(r4)
-	lwz	r6, VCPU_CR_TM(r4)
-	ld	r7, VCPU_CTR_TM(r4)
-	ld	r8, VCPU_AMR_TM(r4)
-	ld	r9, VCPU_TAR_TM(r4)
-	ld	r10, VCPU_XER_TM(r4)
-	mtlr	r5
-	mtcr	r6
-	mtctr	r7
-	mtspr	SPRN_AMR, r8
-	mtspr	SPRN_TAR, r9
-	mtxer	r10
-
-	/*
-	 * Load up PPR and DSCR values but don't put them in the actual SPRs
-	 * till the last moment to avoid running with userspace PPR and DSCR for
-	 * too long.
-	 */
-	ld	r29, VCPU_DSCR_TM(r4)
-	ld	r30, VCPU_PPR_TM(r4)
-
-	std	r2, PACATMSCRATCH(r13) /* Save TOC */
-
-	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
-	li	r5, 0
-	mtmsrd	r5, 1
-
-	/* Load GPRs r0-r28 */
-	reg = 0
-	.rept	29
-	ld	reg, VCPU_GPRS_TM(reg)(r31)
-	reg = reg + 1
-	.endr
-
-	mtspr	SPRN_DSCR, r29
-	mtspr	SPRN_PPR, r30
-
-	/* Load final GPRs */
-	ld	29, VCPU_GPRS_TM(29)(r31)
-	ld	30, VCPU_GPRS_TM(30)(r31)
-	ld	31, VCPU_GPRS_TM(31)(r31)
-
-	/* TM checkpointed state is now setup.  All GPRs are now volatile. */
-	TRECHKPT
-
-	/* Now let's get back the state we need. */
-	HMT_MEDIUM
-	GET_PACA(r13)
-	ld	r29, HSTATE_DSCR(r13)
-	mtspr	SPRN_DSCR, r29
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	ld	r1, HSTATE_HOST_R1(r13)
-	ld	r2, PACATMSCRATCH(r13)
-
-	/* Set the MSR RI since we have our registers back. */
-	li	r5, MSR_RI
-	mtmsrd	r5, 1
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 /*
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
new file mode 100644
index 000000000000..ba97789c41ca
--- /dev/null
+++ b/arch/powerpc/kvm/tm.S
@@ -0,0 +1,279 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Derived from book3s_hv_rmhandlers.S, which is:
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ */
+
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/export.h>
+#include <asm/tm.h>
+#include <asm/cputable.h>
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
+
+/*
+ * Save transactional state and TM-related registers.
+ * Called with r9 pointing to the vcpu struct.
+ * This can modify all checkpointed registers, but
+ * restores r1, r2 and r9 (vcpu pointer) before exit.
+ */
+_GLOBAL(kvmppc_save_tm)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+
+	/* Turn on TM. */
+	mfmsr	r8
+	li	r0, 1
+	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r8
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ld	r5, VCPU_MSR(r9)
+	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	beq	1f	/* TM not active in guest. */
+#endif
+
+	std	r1, HSTATE_HOST_R1(r13)
+	li	r3, TM_CAUSE_KVM_RESCHED
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+BEGIN_FTR_SECTION
+	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
+	mfspr	r6, SPRN_TEXASR
+	std	r6, VCPU_ORIG_TEXASR(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+#endif
+
+	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/* All GPRs are volatile at this point. */
+	TRECLAIM(R3)
+
+	/* Temporarily store r13 and r9 so we have some regs to play with */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
+	std	r9, PACATMSCRATCH(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ld	r9, HSTATE_KVM_VCPU(r13)
+#endif
+
+	/* Get a few more GPRs free. */
+	std	r29, VCPU_GPRS_TM(29)(r9)
+	std	r30, VCPU_GPRS_TM(30)(r9)
+	std	r31, VCPU_GPRS_TM(31)(r9)
+
+	/* Save away PPR and DSCR soon so don't run with user values. */
+	mfspr	r31, SPRN_PPR
+	HMT_MEDIUM
+	mfspr	r30, SPRN_DSCR
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ld	r29, HSTATE_DSCR(r13)
+	mtspr	SPRN_DSCR, r29
+#endif
+
+	/* Save all but r9, r13 & r29-r31 */
+	reg = 0
+	.rept	29
+	.if (reg != 9) && (reg != 13)
+	std	reg, VCPU_GPRS_TM(reg)(r9)
+	.endif
+	reg = reg + 1
+	.endr
+	/* ... now save r13 */
+	GET_SCRATCH0(r4)
+	std	r4, VCPU_GPRS_TM(13)(r9)
+	/* ... and save r9 */
+	ld	r4, PACATMSCRATCH(r13)
+	std	r4, VCPU_GPRS_TM(9)(r9)
+
+	/* Reload stack pointer and TOC. */
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r2, PACATOC(r13)
+
+	/* Set MSR RI now we have r1 and r13 back. */
+	li	r5, MSR_RI
+	mtmsrd	r5, 1
+
+	/* Save away checkpinted SPRs. */
+	std	r31, VCPU_PPR_TM(r9)
+	std	r30, VCPU_DSCR_TM(r9)
+	mflr	r5
+	mfcr	r6
+	mfctr	r7
+	mfspr	r8, SPRN_AMR
+	mfspr	r10, SPRN_TAR
+	mfxer	r11
+	std	r5, VCPU_LR_TM(r9)
+	stw	r6, VCPU_CR_TM(r9)
+	std	r7, VCPU_CTR_TM(r9)
+	std	r8, VCPU_AMR_TM(r9)
+	std	r10, VCPU_TAR_TM(r9)
+	std	r11, VCPU_XER_TM(r9)
+
+	/* Restore r12 as trap number. */
+	lwz	r12, VCPU_TRAP(r9)
+
+	/* Save FP/VSX. */
+	addi	r3, r9, VCPU_FPRS_TM
+	bl	store_fp_state
+	addi	r3, r9, VCPU_VRS_TM
+	bl	store_vr_state
+	mfspr	r6, SPRN_VRSAVE
+	stw	r6, VCPU_VRSAVE_TM(r9)
+1:
+	/*
+	 * We need to save these SPRs after the treclaim so that the software
+	 * error code is recorded correctly in the TEXASR.  Also the user may
+	 * change these outside of a transaction, so they must always be
+	 * context switched.
+	 */
+	mfspr	r7, SPRN_TEXASR
+	std	r7, VCPU_TEXASR(r9)
+11:
+	mfspr	r5, SPRN_TFHAR
+	mfspr	r6, SPRN_TFIAR
+	std	r5, VCPU_TFHAR(r9)
+	std	r6, VCPU_TFIAR(r9)
+
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+/*
+ * Restore transactional state and TM-related registers.
+ * Called with r4 pointing to the vcpu struct.
+ * This potentially modifies all checkpointed registers.
+ * It restores r1, r2, r4 from the PACA.
+ */
+_GLOBAL(kvmppc_restore_tm)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+
+	/* Turn on TM/FP/VSX/VMX so we can restore them. */
+	mfmsr	r5
+	li	r6, MSR_TM >> 32
+	sldi	r6, r6, 32
+	or	r5, r5, r6
+	ori	r5, r5, MSR_FP
+	oris	r5, r5, (MSR_VEC | MSR_VSX)@h
+	mtmsrd	r5
+
+	/*
+	 * The user may change these outside of a transaction, so they must
+	 * always be context switched.
+	 */
+	ld	r5, VCPU_TFHAR(r4)
+	ld	r6, VCPU_TFIAR(r4)
+	ld	r7, VCPU_TEXASR(r4)
+	mtspr	SPRN_TFHAR, r5
+	mtspr	SPRN_TFIAR, r6
+	mtspr	SPRN_TEXASR, r7
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ld	r5, VCPU_MSR(r4)
+	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	beqlr		/* TM not active in guest */
+#endif
+	std	r1, HSTATE_HOST_R1(r13)
+
+	/* Make sure the failure summary is set, otherwise we'll program check
+	 * when we trechkpt.  It's possible that this might have been not set
+	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+	 * host.
+	 */
+	oris	r7, r7, (TEXASR_FS)@h
+	mtspr	SPRN_TEXASR, r7
+
+	/*
+	 * We need to load up the checkpointed state for the guest.
+	 * We need to do this early as it will blow away any GPRs, VSRs and
+	 * some SPRs.
+	 */
+
+	mr	r31, r4
+	addi	r3, r31, VCPU_FPRS_TM
+	bl	load_fp_state
+	addi	r3, r31, VCPU_VRS_TM
+	bl	load_vr_state
+	mr	r4, r31
+	lwz	r7, VCPU_VRSAVE_TM(r4)
+	mtspr	SPRN_VRSAVE, r7
+
+	ld	r5, VCPU_LR_TM(r4)
+	lwz	r6, VCPU_CR_TM(r4)
+	ld	r7, VCPU_CTR_TM(r4)
+	ld	r8, VCPU_AMR_TM(r4)
+	ld	r9, VCPU_TAR_TM(r4)
+	ld	r10, VCPU_XER_TM(r4)
+	mtlr	r5
+	mtcr	r6
+	mtctr	r7
+	mtspr	SPRN_AMR, r8
+	mtspr	SPRN_TAR, r9
+	mtxer	r10
+
+	/*
+	 * Load up PPR and DSCR values but don't put them in the actual SPRs
+	 * till the last moment to avoid running with userspace PPR and DSCR for
+	 * too long.
+	 */
+	ld	r29, VCPU_DSCR_TM(r4)
+	ld	r30, VCPU_PPR_TM(r4)
+
+	std	r2, PACATMSCRATCH(r13) /* Save TOC */
+
+	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/* Load GPRs r0-r28 */
+	reg = 0
+	.rept	29
+	ld	reg, VCPU_GPRS_TM(reg)(r31)
+	reg = reg + 1
+	.endr
+
+	mtspr	SPRN_DSCR, r29
+	mtspr	SPRN_PPR, r30
+
+	/* Load final GPRs */
+	ld	29, VCPU_GPRS_TM(29)(r31)
+	ld	30, VCPU_GPRS_TM(30)(r31)
+	ld	31, VCPU_GPRS_TM(31)(r31)
+
+	/* TM checkpointed state is now setup.  All GPRs are now volatile. */
+	TRECHKPT
+
+	/* Now let's get back the state we need. */
+	HMT_MEDIUM
+	GET_PACA(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ld	r29, HSTATE_DSCR(r13)
+	mtspr	SPRN_DSCR, r29
+	ld	r4, HSTATE_KVM_VCPU(r13)
+#endif
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r2, PACATMSCRATCH(r13)
+
+	/* Set the MSR RI since we have our registers back. */
+	li	r5, MSR_RI
+	mtmsrd	r5, 1
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-- 
cgit v1.2.3


From 6f597c6b63b6f3675914b5ec8fcd008a58678650 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:48 +0800
Subject: KVM: PPC: Book3S PR: Add guest MSR parameter for
 kvmppc_save_tm()/kvmppc_restore_tm()

HV KVM and PR KVM need different MSR source to indicate whether
treclaim. or trecheckpoint. is necessary.

This patch add new parameter (guest MSR) for these kvmppc_save_tm/
kvmppc_restore_tm() APIs:
- For HV KVM, it is VCPU_MSR
- For PR KVM, it is current host MSR or VCPU_SHADOW_SRR1

This enhancement enables these 2 APIs to be reused by PR KVM later.
And the patch keeps HV KVM logic unchanged.

This patch also reworks kvmppc_save_tm()/kvmppc_restore_tm() to
have a clean ABI: r3 for vcpu and r4 for guest_msr.

During kvmppc_save_tm/kvmppc_restore_tm(), the R1 need to be saved
or restored. Currently the R1 is saved into HSTATE_HOST_R1. In PR
KVM, we are going to add a C function wrapper for
kvmppc_save_tm/kvmppc_restore_tm() where the R1 will be incremented
with added stackframe and save into HSTATE_HOST_R1. There are several
places in HV KVM to load HSTATE_HOST_R1 as R1, and we don't want to
bring risk or confusion by TM code.

This patch will use HSTATE_SCRATCH2 to save/restore R1 in
kvmppc_save_tm/kvmppc_restore_tm() to avoid future confusion, since
the r1 is actually a temporary/scratch value to be saved/stored.

[paulus@ozlabs.org - rebased on top of 7b0e827c6970 ("KVM: PPC: Book3S HV:
 Factor fake-suspend handling out of kvmppc_save/restore_tm", 2018-05-30)]

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 33 +++++++++------
 arch/powerpc/kvm/tm.S                   | 71 +++++++++++++++++----------------
 2 files changed, 57 insertions(+), 47 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8e016598692e..75e3bbf8c957 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -793,7 +793,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
+	mr      r3, r4
+	ld      r4, VCPU_MSR(r3)
 	bl	kvmppc_restore_tm_hv
+	ld	r4, HSTATE_KVM_VCPU(r13)
 91:
 #endif
 
@@ -1777,7 +1780,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
+	mr      r3, r9
+	ld      r4, VCPU_MSR(r3)
 	bl	kvmppc_save_tm_hv
+	ld	r9, HSTATE_KVM_VCPU(r13)
 91:
 #endif
 
@@ -2680,7 +2686,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
-	ld	r9, HSTATE_KVM_VCPU(r13)
+	ld	r3, HSTATE_KVM_VCPU(r13)
+	ld      r4, VCPU_MSR(r3)
 	bl	kvmppc_save_tm_hv
 91:
 #endif
@@ -2799,7 +2806,10 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
+	mr      r3, r4
+	ld      r4, VCPU_MSR(r3)
 	bl	kvmppc_restore_tm_hv
+	ld	r4, HSTATE_KVM_VCPU(r13)
 91:
 #endif
 
@@ -3120,9 +3130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 /*
  * Save transactional state and TM-related registers.
- * Called with r9 pointing to the vcpu struct.
+ * Called with r3 pointing to the vcpu struct and r4 containing
+ * the guest MSR value.
  * This can modify all checkpointed registers, but
- * restores r1, r2 and r9 (vcpu pointer) before exit.
+ * restores r1 and r2 before exit.
  */
 kvmppc_save_tm_hv:
 	/* See if we need to handle fake suspend mode */
@@ -3205,9 +3216,10 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
 
 /*
  * Restore transactional state and TM-related registers.
- * Called with r4 pointing to the vcpu struct.
+ * Called with r3 pointing to the vcpu struct
+ * and r4 containing the guest MSR value.
  * This potentially modifies all checkpointed registers.
- * It restores r1, r2, r4 from the PACA.
+ * It restores r1 and r2 from the PACA.
  */
 kvmppc_restore_tm_hv:
 	/*
@@ -3234,15 +3246,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
 	 * The user may change these outside of a transaction, so they must
 	 * always be context switched.
 	 */
-	ld	r5, VCPU_TFHAR(r4)
-	ld	r6, VCPU_TFIAR(r4)
-	ld	r7, VCPU_TEXASR(r4)
+	ld	r5, VCPU_TFHAR(r3)
+	ld	r6, VCPU_TFIAR(r3)
+	ld	r7, VCPU_TEXASR(r3)
 	mtspr	SPRN_TFHAR, r5
 	mtspr	SPRN_TFIAR, r6
 	mtspr	SPRN_TEXASR, r7
 
-	ld	r5, VCPU_MSR(r4)
-	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	rldicl. r5, r4, 64 - MSR_TS_S_LG, 62
 	beqlr		/* TM not active in guest */
 
 	/* Make sure the failure summary is set */
@@ -3255,10 +3266,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
 	b	9f		/* and return */
 10:	stdu	r1, -PPC_MIN_STKFRM(r1)
 	/* guest is in transactional state, so simulate rollback */
-	mr	r3, r4
 	bl	kvmhv_emulate_tm_rollback
 	nop
-	ld      r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
 	addi	r1, r1, PPC_MIN_STKFRM
 9:	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index ba97789c41ca..f027b5a0c0f0 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -26,9 +26,12 @@
 
 /*
  * Save transactional state and TM-related registers.
- * Called with r9 pointing to the vcpu struct.
+ * Called with:
+ * - r3 pointing to the vcpu struct
+ * - r4 points to the MSR with current TS bits:
+ * 	(For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
  * This can modify all checkpointed registers, but
- * restores r1, r2 and r9 (vcpu pointer) before exit.
+ * restores r1, r2 before exit.
  */
 _GLOBAL(kvmppc_save_tm)
 	mflr	r0
@@ -40,20 +43,17 @@ _GLOBAL(kvmppc_save_tm)
 	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
 	mtmsrd	r8
 
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	ld	r5, VCPU_MSR(r9)
-	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	rldicl. r4, r4, 64 - MSR_TS_S_LG, 62
 	beq	1f	/* TM not active in guest. */
-#endif
 
-	std	r1, HSTATE_HOST_R1(r13)
-	li	r3, TM_CAUSE_KVM_RESCHED
+	std	r1, HSTATE_SCRATCH2(r13)
+	std	r3, HSTATE_SCRATCH1(r13)
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 BEGIN_FTR_SECTION
 	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
 	mfspr	r6, SPRN_TEXASR
-	std	r6, VCPU_ORIG_TEXASR(r9)
+	std	r6, VCPU_ORIG_TEXASR(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 #endif
 
@@ -61,6 +61,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	li	r5, 0
 	mtmsrd	r5, 1
 
+	li	r3, TM_CAUSE_KVM_RESCHED
+
 	/* All GPRs are volatile at this point. */
 	TRECLAIM(R3)
 
@@ -68,9 +70,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	SET_SCRATCH0(r13)
 	GET_PACA(r13)
 	std	r9, PACATMSCRATCH(r13)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	ld	r9, HSTATE_KVM_VCPU(r13)
-#endif
+	ld	r9, HSTATE_SCRATCH1(r13)
 
 	/* Get a few more GPRs free. */
 	std	r29, VCPU_GPRS_TM(29)(r9)
@@ -102,7 +102,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	std	r4, VCPU_GPRS_TM(9)(r9)
 
 	/* Reload stack pointer and TOC. */
-	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r1, HSTATE_SCRATCH2(r13)
 	ld	r2, PACATOC(r13)
 
 	/* Set MSR RI now we have r1 and r13 back. */
@@ -156,9 +156,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 
 /*
  * Restore transactional state and TM-related registers.
- * Called with r4 pointing to the vcpu struct.
+ * Called with:
+ *  - r3 pointing to the vcpu struct.
+ *  - r4 is the guest MSR with desired TS bits:
+ * 	For HV KVM, it is VCPU_MSR
+ * 	For PR KVM, it is provided by caller
  * This potentially modifies all checkpointed registers.
- * It restores r1, r2, r4 from the PACA.
+ * It restores r1, r2 from the PACA.
  */
 _GLOBAL(kvmppc_restore_tm)
 	mflr	r0
@@ -177,19 +181,17 @@ _GLOBAL(kvmppc_restore_tm)
 	 * The user may change these outside of a transaction, so they must
 	 * always be context switched.
 	 */
-	ld	r5, VCPU_TFHAR(r4)
-	ld	r6, VCPU_TFIAR(r4)
-	ld	r7, VCPU_TEXASR(r4)
+	ld	r5, VCPU_TFHAR(r3)
+	ld	r6, VCPU_TFIAR(r3)
+	ld	r7, VCPU_TEXASR(r3)
 	mtspr	SPRN_TFHAR, r5
 	mtspr	SPRN_TFIAR, r6
 	mtspr	SPRN_TEXASR, r7
 
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	ld	r5, VCPU_MSR(r4)
+	mr	r5, r4
 	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
 	beqlr		/* TM not active in guest */
-#endif
-	std	r1, HSTATE_HOST_R1(r13)
+	std	r1, HSTATE_SCRATCH2(r13)
 
 	/* Make sure the failure summary is set, otherwise we'll program check
 	 * when we trechkpt.  It's possible that this might have been not set
@@ -205,21 +207,21 @@ _GLOBAL(kvmppc_restore_tm)
 	 * some SPRs.
 	 */
 
-	mr	r31, r4
+	mr	r31, r3
 	addi	r3, r31, VCPU_FPRS_TM
 	bl	load_fp_state
 	addi	r3, r31, VCPU_VRS_TM
 	bl	load_vr_state
-	mr	r4, r31
-	lwz	r7, VCPU_VRSAVE_TM(r4)
+	mr	r3, r31
+	lwz	r7, VCPU_VRSAVE_TM(r3)
 	mtspr	SPRN_VRSAVE, r7
 
-	ld	r5, VCPU_LR_TM(r4)
-	lwz	r6, VCPU_CR_TM(r4)
-	ld	r7, VCPU_CTR_TM(r4)
-	ld	r8, VCPU_AMR_TM(r4)
-	ld	r9, VCPU_TAR_TM(r4)
-	ld	r10, VCPU_XER_TM(r4)
+	ld	r5, VCPU_LR_TM(r3)
+	lwz	r6, VCPU_CR_TM(r3)
+	ld	r7, VCPU_CTR_TM(r3)
+	ld	r8, VCPU_AMR_TM(r3)
+	ld	r9, VCPU_TAR_TM(r3)
+	ld	r10, VCPU_XER_TM(r3)
 	mtlr	r5
 	mtcr	r6
 	mtctr	r7
@@ -232,8 +234,8 @@ _GLOBAL(kvmppc_restore_tm)
 	 * till the last moment to avoid running with userspace PPR and DSCR for
 	 * too long.
 	 */
-	ld	r29, VCPU_DSCR_TM(r4)
-	ld	r30, VCPU_PPR_TM(r4)
+	ld	r29, VCPU_DSCR_TM(r3)
+	ld	r30, VCPU_PPR_TM(r3)
 
 	std	r2, PACATMSCRATCH(r13) /* Save TOC */
 
@@ -265,9 +267,8 @@ _GLOBAL(kvmppc_restore_tm)
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	ld	r29, HSTATE_DSCR(r13)
 	mtspr	SPRN_DSCR, r29
-	ld	r4, HSTATE_KVM_VCPU(r13)
 #endif
-	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r1, HSTATE_SCRATCH2(r13)
 	ld	r2, PACATMSCRATCH(r13)
 
 	/* Set the MSR RI since we have our registers back. */
-- 
cgit v1.2.3


From 7f386af7bdb1a45bb04fb02d7b751809d63e5b09 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:49 +0800
Subject: KVM: PPC: Book3S PR: Turn on FP/VSX/VMX MSR bits in kvmppc_save_tm()

kvmppc_save_tm() invokes store_fp_state/store_vr_state(). So it is
mandatory to turn on FP/VSX/VMX MSR bits for its execution, just
like what kvmppc_restore_tm() did.

Previously HV KVM has turned the bits on outside of function
kvmppc_save_tm().  Now we include this bit change in kvmppc_save_tm()
so that the logic is cleaner. And PR KVM can reuse it later.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/tm.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index f027b5a0c0f0..2760d7acd371 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -41,6 +41,8 @@ _GLOBAL(kvmppc_save_tm)
 	mfmsr	r8
 	li	r0, 1
 	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	ori     r8, r8, MSR_FP
+	oris    r8, r8, (MSR_VEC | MSR_VSX)@h
 	mtmsrd	r8
 
 	rldicl. r4, r4, 64 - MSR_TS_S_LG, 62
-- 
cgit v1.2.3


From caa3be92bebc5b87a221900ac408aa99b0badf3d Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:50 +0800
Subject: KVM: PPC: Book3S PR: Add C function wrapper for
 _kvmppc_save/restore_tm()

Currently __kvmppc_save/restore_tm() APIs can only be invoked from
assembly function. This patch adds C function wrappers for them so
that they can be safely called from C function.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/asm-prototypes.h |  6 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  6 +-
 arch/powerpc/kvm/tm.S                     | 94 ++++++++++++++++++++++++++++++-
 3 files changed, 101 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index dfdcb2374c28..5da683bebc7f 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -141,7 +141,13 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
 void pnv_power9_force_smt4_catch(void);
 void pnv_power9_force_smt4_release(void);
 
+/* Transaction memory related */
 void tm_enable(void);
 void tm_disable(void);
 void tm_abort(uint8_t cause);
+
+struct kvm_vcpu;
+void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
+void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 75e3bbf8c957..af631d8303f6 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -3138,12 +3138,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 kvmppc_save_tm_hv:
 	/* See if we need to handle fake suspend mode */
 BEGIN_FTR_SECTION
-	b	kvmppc_save_tm
+	b	__kvmppc_save_tm
 END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
 
 	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
 	cmpwi	r0, 0
-	beq	kvmppc_save_tm
+	beq	__kvmppc_save_tm
 
 	/* The following code handles the fake_suspend = 1 case */
 	mflr	r0
@@ -3228,7 +3228,7 @@ kvmppc_restore_tm_hv:
 	 * fake-suspend mode, or emulate a TM rollback.
 	 */
 BEGIN_FTR_SECTION
-	b	kvmppc_restore_tm
+	b	__kvmppc_restore_tm
 END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 2760d7acd371..4a68dd4050a4 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -33,7 +33,7 @@
  * This can modify all checkpointed registers, but
  * restores r1, r2 before exit.
  */
-_GLOBAL(kvmppc_save_tm)
+_GLOBAL(__kvmppc_save_tm)
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
 
@@ -156,6 +156,52 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	mtlr	r0
 	blr
 
+/*
+ * _kvmppc_save_tm_pr() is a wrapper around __kvmppc_save_tm(), so that it can
+ * be invoked from C function by PR KVM only.
+ */
+_GLOBAL(_kvmppc_save_tm_pr)
+	mflr	r5
+	std	r5, PPC_LR_STKOFF(r1)
+	stdu    r1, -SWITCH_FRAME_SIZE(r1)
+	SAVE_NVGPRS(r1)
+
+	/* save MSR since TM/math bits might be impacted
+	 * by __kvmppc_save_tm().
+	 */
+	mfmsr	r5
+	SAVE_GPR(5, r1)
+
+	/* also save DSCR/CR so that it can be recovered later */
+	mfspr   r6, SPRN_DSCR
+	SAVE_GPR(6, r1)
+
+	mfcr    r7
+	stw     r7, _CCR(r1)
+
+	bl	__kvmppc_save_tm
+
+	ld      r7, _CCR(r1)
+	mtcr	r7
+
+	REST_GPR(6, r1)
+	mtspr   SPRN_DSCR, r6
+
+	/* need preserve current MSR's MSR_TS bits */
+	REST_GPR(5, r1)
+	mfmsr   r6
+	rldicl  r6, r6, 64 - MSR_TS_S_LG, 62
+	rldimi  r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+	mtmsrd  r5
+
+	REST_NVGPRS(r1)
+	addi    r1, r1, SWITCH_FRAME_SIZE
+	ld	r5, PPC_LR_STKOFF(r1)
+	mtlr	r5
+	blr
+
+EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
+
 /*
  * Restore transactional state and TM-related registers.
  * Called with:
@@ -166,7 +212,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
  * This potentially modifies all checkpointed registers.
  * It restores r1, r2 from the PACA.
  */
-_GLOBAL(kvmppc_restore_tm)
+_GLOBAL(__kvmppc_restore_tm)
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
 
@@ -279,4 +325,48 @@ _GLOBAL(kvmppc_restore_tm)
 	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
+
+/*
+ * _kvmppc_restore_tm_pr() is a wrapper around __kvmppc_restore_tm(), so that it
+ * can be invoked from C function by PR KVM only.
+ */
+_GLOBAL(_kvmppc_restore_tm_pr)
+	mflr	r5
+	std	r5, PPC_LR_STKOFF(r1)
+	stdu    r1, -SWITCH_FRAME_SIZE(r1)
+	SAVE_NVGPRS(r1)
+
+	/* save MSR to avoid TM/math bits change */
+	mfmsr	r5
+	SAVE_GPR(5, r1)
+
+	/* also save DSCR/CR so that it can be recovered later */
+	mfspr   r6, SPRN_DSCR
+	SAVE_GPR(6, r1)
+
+	mfcr    r7
+	stw     r7, _CCR(r1)
+
+	bl	__kvmppc_restore_tm
+
+	ld      r7, _CCR(r1)
+	mtcr	r7
+
+	REST_GPR(6, r1)
+	mtspr   SPRN_DSCR, r6
+
+	/* need preserve current MSR's MSR_TS bits */
+	REST_GPR(5, r1)
+	mfmsr   r6
+	rldicl  r6, r6, 64 - MSR_TS_S_LG, 62
+	rldimi  r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+	mtmsrd  r5
+
+	REST_NVGPRS(r1)
+	addi    r1, r1, SWITCH_FRAME_SIZE
+	ld	r5, PPC_LR_STKOFF(r1)
+	mtlr	r5
+	blr
+
+EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr);
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-- 
cgit v1.2.3


From 25ddda07b6e55a12065631e20f7f1e198230502f Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:51 +0800
Subject: KVM: PPC: Book3S PR: Transition to Suspended state when injecting
 interrupt

This patch simulates interrupt behavior per Power ISA while injecting
interrupt in PR KVM:
- When interrupt happens, transactional state should be suspended.

kvmppc_mmu_book3s_64_reset_msr() will be invoked when injecting an
interrupt. This patch performs this ISA logic in
kvmppc_mmu_book3s_64_reset_msr().

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index a93d719edc90..cf9d686e8162 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -38,7 +38,16 @@
 
 static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
 {
-	kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
+	unsigned long msr = vcpu->arch.intr_msr;
+	unsigned long cur_msr = kvmppc_get_msr(vcpu);
+
+	/* If transactional, change to suspend mode on IRQ delivery */
+	if (MSR_TM_TRANSACTIONAL(cur_msr))
+		msr |= MSR_TS_S;
+	else
+		msr |= cur_msr & MSR_TS_MASK;
+
+	kvmppc_set_msr(vcpu, msr);
 }
 
 static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
-- 
cgit v1.2.3


From 901938add3bd598bf641672a85e644ac07e77e9a Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:52 +0800
Subject: KVM: PPC: Book3S PR: Pass through MSR TM and TS bits to shadow_msr

PowerPC TM functionality needs MSR TM/TS bits support in hardware level.
Guest TM functionality can not be emulated with "fake" MSR (msr in magic
page) TS bits.

This patch syncs TM/TS bits in shadow_msr with the MSR value in magic
page, so that the MSR TS value which guest sees is consistent with actual
MSR bits running in guest.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b1aff9f83ed0..0a892be0abcb 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -312,7 +312,12 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 	ulong smsr = guest_msr;
 
 	/* Guest MSR values */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE |
+		MSR_TM | MSR_TS_MASK;
+#else
 	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
+#endif
 	/* Process MSR values */
 	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
 	/* External providers the guest reserved */
-- 
cgit v1.2.3


From 95757bfc72e9f08905bf6b68d5b8903db205e681 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:53 +0800
Subject: KVM: PPC: Book3S PR: Sync TM bits to shadow msr for problem state
 guest

MSR TS bits can be modified with non-privileged instruction such as
tbegin./tend.  That means guest can change MSR value "silently" without
notifying host.

It is necessary to sync the TM bits to host so that host can calculate
shadow msr correctly.

Note, privileged mode in the guest will always fail transactions so we
only take care of problem state mode in the guest.

The logic is put into kvmppc_copy_from_svcpu() so that
kvmppc_handle_exit_pr() can use correct MSR TM bits even when preemption
occurs.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 73 ++++++++++++++++++++++++++++++--------------
 1 file changed, 50 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 0a892be0abcb..9369cd321417 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -182,10 +182,36 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
 	svcpu_put(svcpu);
 }
 
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+{
+	ulong guest_msr = kvmppc_get_msr(vcpu);
+	ulong smsr = guest_msr;
+
+	/* Guest MSR values */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE |
+		MSR_TM | MSR_TS_MASK;
+#else
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
+#endif
+	/* Process MSR values */
+	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
+	/* External providers the guest reserved */
+	smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
+	/* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+	smsr |= MSR_ISF | MSR_HV;
+#endif
+	vcpu->arch.shadow_msr = smsr;
+}
+
 /* Copy data touched by real-mode code from shadow vcpu back to vcpu */
 void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	ulong old_msr;
+#endif
 
 	/*
 	 * Maybe we were already preempted and synced the svcpu from
@@ -228,6 +254,30 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
 	to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb;
 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
 		vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Unlike other MSR bits, MSR[TS]bits can be changed at guest without
+	 * notifying host:
+	 *  modified by unprivileged instructions like "tbegin"/"tend"/
+	 * "tresume"/"tsuspend" in PR KVM guest.
+	 *
+	 * It is necessary to sync here to calculate a correct shadow_msr.
+	 *
+	 * privileged guest's tbegin will be failed at present. So we
+	 * only take care of problem state guest.
+	 */
+	old_msr = kvmppc_get_msr(vcpu);
+	if (unlikely((old_msr & MSR_PR) &&
+		(vcpu->arch.shadow_srr1 & (MSR_TS_MASK)) !=
+				(old_msr & (MSR_TS_MASK)))) {
+		old_msr &= ~(MSR_TS_MASK);
+		old_msr |= (vcpu->arch.shadow_srr1 & (MSR_TS_MASK));
+		kvmppc_set_msr_fast(vcpu, old_msr);
+		kvmppc_recalc_shadow_msr(vcpu);
+	}
+#endif
+
 	svcpu->in_use = false;
 
 out:
@@ -306,29 +356,6 @@ static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
 
 /*****************************************/
 
-static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
-{
-	ulong guest_msr = kvmppc_get_msr(vcpu);
-	ulong smsr = guest_msr;
-
-	/* Guest MSR values */
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE |
-		MSR_TM | MSR_TS_MASK;
-#else
-	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
-#endif
-	/* Process MSR values */
-	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
-	/* External providers the guest reserved */
-	smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
-	/* 64-bit Process MSR values */
-#ifdef CONFIG_PPC_BOOK3S_64
-	smsr |= MSR_ISF | MSR_HV;
-#endif
-	vcpu->arch.shadow_msr = smsr;
-}
-
 static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 {
 	ulong old_msr = kvmppc_get_msr(vcpu);
-- 
cgit v1.2.3


From 401a89e9375c011de4e3271d50f27648b734a7cb Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:54 +0800
Subject: KVM: PPC: Book3S PR: Implement RFID TM behavior to suppress change
 from S0 to N0

According to ISA specification for RFID, in MSR TM disabled and TS
suspended state (S0), if the target MSR is TM disabled and TS state is
inactive (N0), rfid should suppress this update.

This patch makes the RFID emulation of PR KVM consistent with this.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_emulate.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 68d68983948e..2eb457bc7b6e 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -117,11 +117,28 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case 19:
 		switch (get_xop(inst)) {
 		case OP_19_XOP_RFID:
-		case OP_19_XOP_RFI:
+		case OP_19_XOP_RFI: {
+			unsigned long srr1 = kvmppc_get_srr1(vcpu);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+			unsigned long cur_msr = kvmppc_get_msr(vcpu);
+
+			/*
+			 * add rules to fit in ISA specification regarding TM
+			 * state transistion in TM disable/Suspended state,
+			 * and target TM state is TM inactive(00) state. (the
+			 * change should be suppressed).
+			 */
+			if (((cur_msr & MSR_TM) == 0) &&
+				((srr1 & MSR_TM) == 0) &&
+				MSR_TM_SUSPENDED(cur_msr) &&
+				!MSR_TM_ACTIVE(srr1))
+				srr1 |= MSR_TS_S;
+#endif
 			kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu));
-			kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu));
+			kvmppc_set_msr(vcpu, srr1);
 			*advance = 0;
 			break;
+		}
 
 		default:
 			emulated = EMULATE_FAIL;
-- 
cgit v1.2.3


From 36383a0862b68fc14b63dd6c93c64f1f82b6e8a9 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:55 +0800
Subject: KVM: PPC: Book3S PR: Avoid changing TS bits when exiting guest

PR KVM host usually runs with TM enabled in its host MSR value, and
with non-transactional TS value.

When a guest with TM active traps into PR KVM host, the rfid at the
tail of kvmppc_interrupt_pr() will try to switch TS bits from
S0 (Suspended & TM disabled) to N1 (Non-transactional & TM enabled).

That will leads to TM Bad Thing interrupt.

This patch manually sets target TS bits unchanged to avoid this
exception.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_segment.S | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 93a180ceefad..98ccc7ec5d48 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -383,6 +383,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	 */
 
 	PPC_LL	r6, HSTATE_HOST_MSR(r13)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * We don't want to change MSR[TS] bits via rfi here.
+	 * The actual TM handling logic will be in host with
+	 * recovered DR/IR bits after HSTATE_VMHANDLER.
+	 * And MSR_TM can be enabled in HOST_MSR so rfid may
+	 * not suppress this change and can lead to exception.
+	 * Manually set MSR to prevent TS state change here.
+	 */
+	mfmsr   r7
+	rldicl  r7, r7, 64 - MSR_TS_S_LG, 62
+	rldimi  r6, r7, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+#endif
 	PPC_LL	r8, HSTATE_VMHANDLER(r13)
 
 #ifdef CONFIG_PPC64
-- 
cgit v1.2.3


From de7ad932190c6af17bc9075ddd40a084990c5eb3 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:56 +0800
Subject: KVM: PPC: Book3S PR: Add new kvmppc_copyto/from_vcpu_tm APIs

This patch adds 2 new APIs: kvmppc_copyto_vcpu_tm() and
kvmppc_copyfrom_vcpu_tm().  These 2 APIs will be used to copy from/to TM
data between VCPU_TM/VCPU area.

PR KVM will use these APIs for treclaim. or trechkpt. emulation.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_emulate.c | 41 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 2eb457bc7b6e..f81a921e0865 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -87,6 +87,47 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
 	return true;
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
+{
+	memcpy(&vcpu->arch.gpr_tm[0], &vcpu->arch.regs.gpr[0],
+			sizeof(vcpu->arch.gpr_tm));
+	memcpy(&vcpu->arch.fp_tm, &vcpu->arch.fp,
+			sizeof(struct thread_fp_state));
+	memcpy(&vcpu->arch.vr_tm, &vcpu->arch.vr,
+			sizeof(struct thread_vr_state));
+	vcpu->arch.ppr_tm = vcpu->arch.ppr;
+	vcpu->arch.dscr_tm = vcpu->arch.dscr;
+	vcpu->arch.amr_tm = vcpu->arch.amr;
+	vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
+	vcpu->arch.tar_tm = vcpu->arch.tar;
+	vcpu->arch.lr_tm = vcpu->arch.regs.link;
+	vcpu->arch.cr_tm = vcpu->arch.cr;
+	vcpu->arch.xer_tm = vcpu->arch.regs.xer;
+	vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+
+static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
+{
+	memcpy(&vcpu->arch.regs.gpr[0], &vcpu->arch.gpr_tm[0],
+			sizeof(vcpu->arch.regs.gpr));
+	memcpy(&vcpu->arch.fp, &vcpu->arch.fp_tm,
+			sizeof(struct thread_fp_state));
+	memcpy(&vcpu->arch.vr, &vcpu->arch.vr_tm,
+			sizeof(struct thread_vr_state));
+	vcpu->arch.ppr = vcpu->arch.ppr_tm;
+	vcpu->arch.dscr = vcpu->arch.dscr_tm;
+	vcpu->arch.amr = vcpu->arch.amr_tm;
+	vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
+	vcpu->arch.tar = vcpu->arch.tar_tm;
+	vcpu->arch.regs.link = vcpu->arch.lr_tm;
+	vcpu->arch.cr = vcpu->arch.cr_tm;
+	vcpu->arch.regs.xer = vcpu->arch.xer_tm;
+	vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+#endif
+
 int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			      unsigned int inst, int *advance)
 {
-- 
cgit v1.2.3


From 66c33e796cf9d5f7150bf4c701786d0527b594b6 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:57 +0800
Subject: KVM: PPC: Book3S PR: Add kvmppc_save/restore_tm_sprs() APIs

This patch adds 2 new APIs, kvmppc_save_tm_sprs() and
kvmppc_restore_tm_sprs(), for the purpose of TEXASR/TFIAR/TFHAR
save/restore.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 9369cd321417..92e467ebadf0 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -42,6 +42,7 @@
 #include <linux/highmem.h>
 #include <linux/module.h>
 #include <linux/miscdevice.h>
+#include <asm/asm-prototypes.h>
 
 #include "book3s.h"
 
@@ -284,6 +285,27 @@ out:
 	svcpu_put(svcpu);
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
+{
+	tm_enable();
+	vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+	vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+	vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+	tm_disable();
+}
+
+static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
+{
+	tm_enable();
+	mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+	mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+	mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+	tm_disable();
+}
+
+#endif
+
 static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
 {
 	int r = 1; /* Indicate we want to get back into the guest */
-- 
cgit v1.2.3


From 8d2e2fc5e082a7b3f858cefb6e65700f28d2955e Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:58 +0800
Subject: KVM: PPC: Book3S PR: Add transaction memory save/restore skeleton

The transaction memory checkpoint area save/restore behavior is
triggered when VCPU qemu process is switching out/into CPU, i.e.
at kvmppc_core_vcpu_put_pr() and kvmppc_core_vcpu_load_pr().

MSR TM active state is determined by TS bits:
    active: 10(transactional) or 01 (suspended)
    inactive: 00 (non-transactional)
We don't "fake" TM functionality for guest. We "sync" guest virtual
MSR TM active state(10 or 01) with shadow MSR. That is to say,
we don't emulate a transactional guest with a TM inactive MSR.

TM SPR support(TFIAR/TFAR/TEXASR) has already been supported by
commit 9916d57e64a4 ("KVM: PPC: Book3S PR: Expose TM registers").
Math register support (FPR/VMX/VSX) will be done at subsequent
patch.

Whether TM context need to be saved/restored can be determined
by kvmppc_get_msr() TM active state:
	* TM active - save/restore TM context
	* TM inactive - no need to do so and only save/restore
TM SPRs.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Suggested-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h |  9 +++++++++
 arch/powerpc/include/asm/kvm_host.h   |  1 -
 arch/powerpc/kvm/book3s_pr.c          | 27 +++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 20d3d5a87296..fc15ad9dfc3b 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -257,6 +257,15 @@ extern int kvmppc_hcall_impl_pr(unsigned long cmd);
 extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
 extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
+#else
+static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {}
+#endif
+
 extern int kvm_irq_bypass;
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8dc5e439b387..fa4efa7e88f7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -627,7 +627,6 @@ struct kvm_vcpu_arch {
 
 	struct thread_vr_state vr_tm;
 	u32 vrsave_tm; /* also USPRG0 */
-
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 92e467ebadf0..a14721f034fb 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -43,6 +43,7 @@
 #include <linux/module.h>
 #include <linux/miscdevice.h>
 #include <asm/asm-prototypes.h>
+#include <asm/tm.h>
 
 #include "book3s.h"
 
@@ -115,6 +116,8 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 
 	if (kvmppc_is_split_real(vcpu))
 		kvmppc_fixup_split_real(vcpu);
+
+	kvmppc_restore_tm_pr(vcpu);
 }
 
 static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
@@ -134,6 +137,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+	kvmppc_save_tm_pr(vcpu);
 
 	/* Enable AIL if supported */
 	if (cpu_has_feature(CPU_FTR_HVMODE) &&
@@ -304,6 +308,29 @@ static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
 	tm_disable();
 }
 
+void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
+{
+	if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) {
+		kvmppc_save_tm_sprs(vcpu);
+		return;
+	}
+
+	preempt_disable();
+	_kvmppc_save_tm_pr(vcpu, mfmsr());
+	preempt_enable();
+}
+
+void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
+{
+	if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) {
+		kvmppc_restore_tm_sprs(vcpu);
+		return;
+	}
+
+	preempt_disable();
+	_kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu));
+	preempt_enable();
+}
 #endif
 
 static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 13989b65ebb74c05c577dbbcc111e1fdd7da763a Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:01:59 +0800
Subject: KVM: PPC: Book3S PR: Add math support for PR KVM HTM

The math registers will be saved into vcpu->arch.fp/vr and corresponding
vcpu->arch.fp_tm/vr_tm area.

We flush or giveup the math regs into vcpu->arch.fp/vr before saving
transaction. After transaction is restored, the math regs will be loaded
back into regs.

If there is a FP/VEC/VSX unavailable exception during transaction active
state, the math checkpoint content might be incorrect and we need to do
treclaim./load the correct checkpoint val/trechkpt. sequence to retry the
transaction. That will make our solution complicated. To solve this issue,
we always make the hardware guest MSR math bits (shadow_msr) consistent
with the MSR val which guest sees (kvmppc_get_msr()) when guest msr is
with tm enabled. Then all FP/VEC/VSX unavailable exception can be delivered
to guest and guest handles the exception by itself.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index a14721f034fb..dcb577fde9cd 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -308,6 +308,28 @@ static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
 	tm_disable();
 }
 
+/* loadup math bits which is enabled at kvmppc_get_msr() but not enabled at
+ * hardware.
+ */
+static void kvmppc_handle_lost_math_exts(struct kvm_vcpu *vcpu)
+{
+	ulong exit_nr;
+	ulong ext_diff = (kvmppc_get_msr(vcpu) & ~vcpu->arch.guest_owned_ext) &
+		(MSR_FP | MSR_VEC | MSR_VSX);
+
+	if (!ext_diff)
+		return;
+
+	if (ext_diff == MSR_FP)
+		exit_nr = BOOK3S_INTERRUPT_FP_UNAVAIL;
+	else if (ext_diff == MSR_VEC)
+		exit_nr = BOOK3S_INTERRUPT_ALTIVEC;
+	else
+		exit_nr = BOOK3S_INTERRUPT_VSX;
+
+	kvmppc_handle_ext(vcpu, exit_nr, ext_diff);
+}
+
 void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
 {
 	if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) {
@@ -315,6 +337,8 @@ void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
 		return;
 	}
 
+	kvmppc_giveup_ext(vcpu, MSR_VSX);
+
 	preempt_disable();
 	_kvmppc_save_tm_pr(vcpu, mfmsr());
 	preempt_enable();
@@ -324,12 +348,18 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
 {
 	if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) {
 		kvmppc_restore_tm_sprs(vcpu);
+		if (kvmppc_get_msr(vcpu) & MSR_TM)
+			kvmppc_handle_lost_math_exts(vcpu);
 		return;
 	}
 
 	preempt_disable();
 	_kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu));
 	preempt_enable();
+
+	if (kvmppc_get_msr(vcpu) & MSR_TM)
+		kvmppc_handle_lost_math_exts(vcpu);
+
 }
 #endif
 
@@ -468,6 +498,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 	/* Preload FPU if it's enabled */
 	if (kvmppc_get_msr(vcpu) & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (kvmppc_get_msr(vcpu) & MSR_TM)
+		kvmppc_handle_lost_math_exts(vcpu);
+#endif
 }
 
 void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
-- 
cgit v1.2.3


From 533082ae86e2f1ff6cb9eca7a25202a81fc0567e Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:00 +0800
Subject: KVM: PPC: Book3S PR: Emulate mtspr/mfspr using active TM SPRs

The mfspr/mtspr on TM SPRs(TEXASR/TFIAR/TFHAR) are non-privileged
instructions and can be executed by PR KVM guest in problem state
without trapping into the host. We only emulate mtspr/mfspr
texasr/tfiar/tfhar in guest PR=0 state.

When we are emulating mtspr tm sprs in guest PR=0 state, the emulation
result needs to be visible to guest PR=1 state. That is, the actual TM
SPR val should be loaded into actual registers.

We already flush TM SPRs into vcpu when switching out of CPU, and load
TM SPRs when switching back.

This patch corrects mfspr()/mtspr() emulation for TM SPRs to make the
actual source/dest be the actual TM SPRs.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/kvm/book3s_emulate.c     | 58 +++++++++++++++++++++++++++++------
 arch/powerpc/kvm/book3s_pr.c          |  2 +-
 3 files changed, 50 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index fc15ad9dfc3b..43e8bb18c2d7 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -210,6 +210,7 @@ extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
 extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 					  unsigned int vec);
 extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
+extern void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac);
 extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 			   bool upper, u32 val);
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index f81a921e0865..c4e3ec63f253 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -24,6 +24,7 @@
 #include <asm/switch_to.h>
 #include <asm/time.h>
 #include "book3s.h"
+#include <asm/asm-prototypes.h>
 
 #define OP_19_XOP_RFID		18
 #define OP_19_XOP_RFI		50
@@ -523,13 +524,38 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		break;
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	case SPRN_TFHAR:
-		vcpu->arch.tfhar = spr_val;
-		break;
 	case SPRN_TEXASR:
-		vcpu->arch.texasr = spr_val;
-		break;
 	case SPRN_TFIAR:
-		vcpu->arch.tfiar = spr_val;
+		if (!cpu_has_feature(CPU_FTR_TM))
+			break;
+
+		if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+			kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+			emulated = EMULATE_AGAIN;
+			break;
+		}
+
+		if (MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)) &&
+			!((MSR_TM_SUSPENDED(kvmppc_get_msr(vcpu))) &&
+					(sprn == SPRN_TFHAR))) {
+			/* it is illegal to mtspr() TM regs in
+			 * other than non-transactional state, with
+			 * the exception of TFHAR in suspend state.
+			 */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+			emulated = EMULATE_AGAIN;
+			break;
+		}
+
+		tm_enable();
+		if (sprn == SPRN_TFHAR)
+			mtspr(SPRN_TFHAR, spr_val);
+		else if (sprn == SPRN_TEXASR)
+			mtspr(SPRN_TEXASR, spr_val);
+		else
+			mtspr(SPRN_TFIAR, spr_val);
+		tm_disable();
+
 		break;
 #endif
 #endif
@@ -676,13 +702,25 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 		break;
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	case SPRN_TFHAR:
-		*spr_val = vcpu->arch.tfhar;
-		break;
 	case SPRN_TEXASR:
-		*spr_val = vcpu->arch.texasr;
-		break;
 	case SPRN_TFIAR:
-		*spr_val = vcpu->arch.tfiar;
+		if (!cpu_has_feature(CPU_FTR_TM))
+			break;
+
+		if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+			kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+			emulated = EMULATE_AGAIN;
+			break;
+		}
+
+		tm_enable();
+		if (sprn == SPRN_TFHAR)
+			*spr_val = mfspr(SPRN_TFHAR);
+		else if (sprn == SPRN_TEXASR)
+			*spr_val = mfspr(SPRN_TEXASR);
+		else if (sprn == SPRN_TFIAR)
+			*spr_val = mfspr(SPRN_TFIAR);
+		tm_disable();
 		break;
 #endif
 #endif
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index dcb577fde9cd..c0f45c83f683 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -918,7 +918,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 
 #ifdef CONFIG_PPC_BOOK3S_64
 
-static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
+void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
 {
 	/* Inject the Interrupt Cause field and trigger a guest interrupt */
 	vcpu->arch.fscr &= ~(0xffULL << 56);
-- 
cgit v1.2.3


From 5706340a339283fe60d55ddc72ee7728a571a834 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:01 +0800
Subject: KVM: PPC: Book3S PR: Always fail transactions in guest privileged
 state

Currently the kernel doesn't use transaction memory.
And there is an issue for privileged state in the guest that:
tbegin/tsuspend/tresume/tabort TM instructions can impact MSR TM bits
without trapping into the PR host. So following code will lead to a
false mfmsr result:
	tbegin	<- MSR bits update to Transaction active.
	beq 	<- failover handler branch
	mfmsr	<- still read MSR bits from magic page with
		transaction inactive.

It is not an issue for non-privileged guest state since its mfmsr is
not patched with magic page and will always trap into the PR host.

This patch will always fail tbegin attempt for privileged state in the
guest, so that the above issue is prevented. It is benign since
currently (guest) kernel doesn't initiate a transaction.

Test case:
https://github.com/justdoitqd/publicFiles/blob/master/test_tbegin_pr.c

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h |  2 ++
 arch/powerpc/kvm/book3s_emulate.c     | 40 +++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_pr.c          | 11 +++++++++-
 3 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 43e8bb18c2d7..c1cea8222d51 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -262,9 +262,11 @@ extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu);
 void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu);
 #else
 static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {}
 static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
 #endif
 
 extern int kvm_irq_bypass;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index c4e3ec63f253..570339b03feb 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -23,6 +23,7 @@
 #include <asm/reg.h>
 #include <asm/switch_to.h>
 #include <asm/time.h>
+#include <asm/tm.h>
 #include "book3s.h"
 #include <asm/asm-prototypes.h>
 
@@ -48,6 +49,8 @@
 #define OP_31_XOP_EIOIO		854
 #define OP_31_XOP_SLBMFEE	915
 
+#define OP_31_XOP_TBEGIN	654
+
 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
 #define OP_31_XOP_DCBZ		1010
 
@@ -363,6 +366,43 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 			break;
 		}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		case OP_31_XOP_TBEGIN:
+		{
+			if (!cpu_has_feature(CPU_FTR_TM))
+				break;
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+				kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
+				preempt_disable();
+				vcpu->arch.cr = (CR0_TBEGIN_FAILURE |
+				  (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)));
+
+				vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
+					(((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
+						 << TEXASR_FC_LG));
+
+				if ((inst >> 21) & 0x1)
+					vcpu->arch.texasr |= TEXASR_ROT;
+
+				if (kvmppc_get_msr(vcpu) & MSR_HV)
+					vcpu->arch.texasr |= TEXASR_HV;
+
+				vcpu->arch.tfhar = kvmppc_get_pc(vcpu) + 4;
+				vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+
+				kvmppc_restore_tm_sprs(vcpu);
+				preempt_enable();
+			} else
+				emulated = EMULATE_FAIL;
+			break;
+		}
+#endif
 		default:
 			emulated = EMULATE_FAIL;
 		}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c0f45c83f683..cc26be87e3b8 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -206,6 +206,15 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 	/* 64-bit Process MSR values */
 #ifdef CONFIG_PPC_BOOK3S_64
 	smsr |= MSR_ISF | MSR_HV;
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * in guest privileged state, we want to fail all TM transactions.
+	 * So disable MSR TM bit so that all tbegin. will be able to be
+	 * trapped into host.
+	 */
+	if (!(guest_msr & MSR_PR))
+		smsr &= ~MSR_TM;
 #endif
 	vcpu->arch.shadow_msr = smsr;
 }
@@ -299,7 +308,7 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
 	tm_disable();
 }
 
-static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
+void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
 {
 	tm_enable();
 	mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
-- 
cgit v1.2.3


From 19c585eb45e360db43790c6724a3ea929ea03de3 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:02 +0800
Subject: KVM: PPC: Book3S PR: Restore NV regs after emulating mfspr from TM
 SPRs

Currently kvmppc_handle_fac() will not update NV GPRs and thus it can
return with GUEST_RESUME.

However PR KVM guest always disables MSR_TM bit in privileged state.
If PR privileged-state guest is trying to read TM SPRs, it will
trigger TM facility unavailable exception and fall into
kvmppc_handle_fac().  Then the emulation will be done by
kvmppc_core_emulate_mfspr_pr().  The mfspr instruction can include a
RT with NV reg. So it is necessary to restore NV GPRs at this case, to
reflect the update to NV RT.

This patch make kvmppc_handle_fac() return GUEST_RESUME_NV for TM
facility unavailable exceptions in guest privileged state.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cc26be87e3b8..bb9209947db9 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -989,6 +989,18 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
 		break;
 	}
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/* Since we disabled MSR_TM at privilege state, the mfspr instruction
+	 * for TM spr can trigger TM fac unavailable. In this case, the
+	 * emulation is handled by kvmppc_emulate_fac(), which invokes
+	 * kvmppc_emulate_mfspr() finally. But note the mfspr can include
+	 * RT for NV registers. So it need to restore those NV reg to reflect
+	 * the update.
+	 */
+	if ((fac == FSCR_TM_LG) && !(kvmppc_get_msr(vcpu) & MSR_PR))
+		return RESUME_GUEST_NV;
+#endif
+
 	return RESUME_GUEST;
 }
 
@@ -1350,8 +1362,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 #ifdef CONFIG_PPC_BOOK3S_64
 	case BOOK3S_INTERRUPT_FAC_UNAVAIL:
-		kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
-		r = RESUME_GUEST;
+		r = kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
 		break;
 #endif
 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
-- 
cgit v1.2.3


From 03c81682a90b3fc3434d3e33f3f4c7f1f39d65cd Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:03 +0800
Subject: KVM: PPC: Book3S PR: Add emulation for treclaim.

This patch adds support for "treclaim." emulation when PR KVM guest
executes treclaim. and traps to host.

We will firstly do treclaim. and save TM checkpoint. Then it is
necessary to update vcpu current reg content with checkpointed vals.
When rfid into guest again, those vcpu current reg content (now the
checkpoint vals) will be loaded into regs.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_emulate.c | 76 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 570339b03feb..04c29e01b391 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -51,6 +51,8 @@
 
 #define OP_31_XOP_TBEGIN	654
 
+#define OP_31_XOP_TRECLAIM	942
+
 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
 #define OP_31_XOP_DCBZ		1010
 
@@ -130,6 +132,46 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
 	vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
 }
 
+static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
+{
+	unsigned long guest_msr = kvmppc_get_msr(vcpu);
+	int fc_val = ra_val ? ra_val : 1;
+
+	/* CR0 = 0 | MSR[TS] | 0 */
+	vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
+		(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
+		 << CR0_SHIFT);
+
+	preempt_disable();
+	kvmppc_save_tm_pr(vcpu);
+	kvmppc_copyfrom_vcpu_tm(vcpu);
+
+	tm_enable();
+	vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+	/* failure recording depends on Failure Summary bit */
+	if (!(vcpu->arch.texasr & TEXASR_FS)) {
+		vcpu->arch.texasr &= ~TEXASR_FC;
+		vcpu->arch.texasr |= ((u64)fc_val << TEXASR_FC_LG);
+
+		vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV);
+		if (kvmppc_get_msr(vcpu) & MSR_PR)
+			vcpu->arch.texasr |= TEXASR_PR;
+
+		if (kvmppc_get_msr(vcpu) & MSR_HV)
+			vcpu->arch.texasr |= TEXASR_HV;
+
+		vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+		mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+		mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+	}
+	tm_disable();
+	/*
+	 * treclaim need quit to non-transactional state.
+	 */
+	guest_msr &= ~(MSR_TS_MASK);
+	kvmppc_set_msr(vcpu, guest_msr);
+	preempt_enable();
+}
 #endif
 
 int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -402,6 +444,40 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				emulated = EMULATE_FAIL;
 			break;
 		}
+		case OP_31_XOP_TRECLAIM:
+		{
+			ulong guest_msr = kvmppc_get_msr(vcpu);
+			unsigned long ra_val = 0;
+
+			if (!cpu_has_feature(CPU_FTR_TM))
+				break;
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+				kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			/* generate interrupts based on priorities */
+			if (guest_msr & MSR_PR) {
+				/* Privileged Instruction type Program Interrupt */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			if (!MSR_TM_ACTIVE(guest_msr)) {
+				/* TM bad thing interrupt */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			if (ra)
+				ra_val = kvmppc_get_gpr(vcpu, ra);
+			kvmppc_emulate_treclaim(vcpu, ra_val);
+			break;
+		}
 #endif
 		default:
 			emulated = EMULATE_FAIL;
-- 
cgit v1.2.3


From e32c53d1cf0022af180548474f4c29c189d37d93 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:04 +0800
Subject: KVM: PPC: Book3S PR: Add emulation for trechkpt.

This patch adds host emulation when guest PR KVM executes "trechkpt.",
which is a privileged instruction and will trap into host.

We firstly copy vcpu ongoing content into vcpu tm checkpoint
content, then perform kvmppc_restore_tm_pr() to do trechkpt.
with updated vcpu tm checkpoint values.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h |  2 ++
 arch/powerpc/kvm/book3s_emulate.c     | 61 +++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_pr.c          |  2 +-
 3 files changed, 64 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index c1cea8222d51..2940de7bac08 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -262,10 +262,12 @@ extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu);
 void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu);
 void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu);
 #else
 static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {}
 static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
 static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 04c29e01b391..b7530cf58834 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -52,6 +52,7 @@
 #define OP_31_XOP_TBEGIN	654
 
 #define OP_31_XOP_TRECLAIM	942
+#define OP_31_XOP_TRCHKPT	1006
 
 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
 #define OP_31_XOP_DCBZ		1010
@@ -172,6 +173,29 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
 	kvmppc_set_msr(vcpu, guest_msr);
 	preempt_enable();
 }
+
+static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
+{
+	unsigned long guest_msr = kvmppc_get_msr(vcpu);
+
+	preempt_disable();
+	/*
+	 * need flush FP/VEC/VSX to vcpu save area before
+	 * copy.
+	 */
+	kvmppc_giveup_ext(vcpu, MSR_VSX);
+	kvmppc_copyto_vcpu_tm(vcpu);
+	kvmppc_save_tm_sprs(vcpu);
+
+	/*
+	 * as a result of trecheckpoint. set TS to suspended.
+	 */
+	guest_msr &= ~(MSR_TS_MASK);
+	guest_msr |= MSR_TS_S;
+	kvmppc_set_msr(vcpu, guest_msr);
+	kvmppc_restore_tm_pr(vcpu);
+	preempt_enable();
+}
 #endif
 
 int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -478,6 +502,43 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			kvmppc_emulate_treclaim(vcpu, ra_val);
 			break;
 		}
+		case OP_31_XOP_TRCHKPT:
+		{
+			ulong guest_msr = kvmppc_get_msr(vcpu);
+			unsigned long texasr;
+
+			if (!cpu_has_feature(CPU_FTR_TM))
+				break;
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+				kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			/* generate interrupt based on priorities */
+			if (guest_msr & MSR_PR) {
+				/* Privileged Instruction type Program Intr */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			tm_enable();
+			texasr = mfspr(SPRN_TEXASR);
+			tm_disable();
+
+			if (MSR_TM_ACTIVE(guest_msr) ||
+				!(texasr & (TEXASR_FS))) {
+				/* TM bad thing interrupt */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			kvmppc_emulate_trchkpt(vcpu);
+			break;
+		}
 #endif
 		default:
 			emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index bb9209947db9..a275f8b3a4a0 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -299,7 +299,7 @@ out:
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
+void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
 {
 	tm_enable();
 	vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
-- 
cgit v1.2.3


From 26798f88d58dff1b61abf04becf5055e6f860d4f Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:05 +0800
Subject: KVM: PPC: Book3S PR: Add emulation for tabort. in privileged state

Currently privileged-state guest will be run with TM disabled.

Although the privileged-state guest cannot initiate a new transaction,
it can use tabort to terminate its problem state's transaction.
So it is still necessary to emulate tabort. for privileged-state guest.

Tested with:
https://github.com/justdoitqd/publicFiles/blob/master/test_tabort.c

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_emulate.c | 68 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index b7530cf58834..34f910e03972 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -50,6 +50,7 @@
 #define OP_31_XOP_SLBMFEE	915
 
 #define OP_31_XOP_TBEGIN	654
+#define OP_31_XOP_TABORT	910
 
 #define OP_31_XOP_TRECLAIM	942
 #define OP_31_XOP_TRCHKPT	1006
@@ -196,6 +197,47 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
 	kvmppc_restore_tm_pr(vcpu);
 	preempt_enable();
 }
+
+/* emulate tabort. at guest privilege state */
+static void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
+{
+	/* currently we only emulate tabort. but no emulation of other
+	 * tabort variants since there is no kernel usage of them at
+	 * present.
+	 */
+	unsigned long guest_msr = kvmppc_get_msr(vcpu);
+
+	preempt_disable();
+	tm_enable();
+	tm_abort(ra_val);
+
+	/* CR0 = 0 | MSR[TS] | 0 */
+	vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
+		(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
+		 << CR0_SHIFT);
+
+	vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+	/* failure recording depends on Failure Summary bit,
+	 * and tabort will be treated as nops in non-transactional
+	 * state.
+	 */
+	if (!(vcpu->arch.texasr & TEXASR_FS) &&
+			MSR_TM_ACTIVE(guest_msr)) {
+		vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV);
+		if (guest_msr & MSR_PR)
+			vcpu->arch.texasr |= TEXASR_PR;
+
+		if (guest_msr & MSR_HV)
+			vcpu->arch.texasr |= TEXASR_HV;
+
+		vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+		mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+		mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+	}
+	tm_disable();
+	preempt_enable();
+}
+
 #endif
 
 int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -468,6 +510,32 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				emulated = EMULATE_FAIL;
 			break;
 		}
+		case OP_31_XOP_TABORT:
+		{
+			ulong guest_msr = kvmppc_get_msr(vcpu);
+			unsigned long ra_val = 0;
+
+			if (!cpu_has_feature(CPU_FTR_TM))
+				break;
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+				kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			/* only emulate for privilege guest, since problem state
+			 * guest can run with TM enabled and we don't expect to
+			 * trap at here for that case.
+			 */
+			WARN_ON(guest_msr & MSR_PR);
+
+			if (ra)
+				ra_val = kvmppc_get_gpr(vcpu, ra);
+
+			kvmppc_emulate_tabort(vcpu, ra_val);
+			break;
+		}
 		case OP_31_XOP_TRECLAIM:
 		{
 			ulong guest_msr = kvmppc_get_msr(vcpu);
-- 
cgit v1.2.3


From 68ab07b985764ec5be816e7054a84b7ad121afc7 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:06 +0800
Subject: KVM: PPC: Book3S PR: Add guard code to prevent returning to guest
 with PR=0 and Transactional state

Currently PR KVM doesn't support transaction memory in guest privileged
state.

This patch adds a check at setting guest msr, so that we can never return
to guest with PR=0 and TS=0b10. A tabort will be emulated to indicate
this and fail transaction immediately.

[paulus@ozlabs.org - don't change the TM_CAUSE_MISC definition, instead
 use TM_CAUSE_KVM_FAC_UNAV.]

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s.h         |  6 ++++++
 arch/powerpc/kvm/book3s_emulate.c |  2 +-
 arch/powerpc/kvm/book3s_pr.c      | 13 ++++++++++++-
 3 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4ad5e287b8bc..14ef03501d21 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -31,4 +31,10 @@ extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
 extern int kvmppc_book3s_init_pr(void);
 extern void kvmppc_book3s_exit_pr(void);
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
+#else
+static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
+#endif
+
 #endif
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 34f910e03972..67d0fb40e8b2 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -199,7 +199,7 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
 }
 
 /* emulate tabort. at guest privilege state */
-static void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
+void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
 {
 	/* currently we only emulate tabort. but no emulation of other
 	 * tabort variants since there is no kernel usage of them at
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index a275f8b3a4a0..ad0a2ee8d8b1 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -446,12 +446,23 @@ static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
 
 static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 {
-	ulong old_msr = kvmppc_get_msr(vcpu);
+	ulong old_msr;
 
 #ifdef EXIT_DEBUG
 	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/* We should never target guest MSR to TS=10 && PR=0,
+	 * since we always fail transaction for guest privilege
+	 * state.
+	 */
+	if (!(msr & MSR_PR) && MSR_TM_TRANSACTIONAL(msr))
+		kvmppc_emulate_tabort(vcpu,
+			TM_CAUSE_KVM_FAC_UNAV | TM_CAUSE_PERSISTENT);
+#endif
+
+	old_msr = kvmppc_get_msr(vcpu);
 	msr &= to_book3s(vcpu)->msr_mask;
 	kvmppc_set_msr_fast(vcpu, msr);
 	kvmppc_recalc_shadow_msr(vcpu);
-- 
cgit v1.2.3


From 7284ca8a5eaee311d2e4aec73b2df9bd57e0cdcb Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:07 +0800
Subject: KVM: PPC: Book3S PR: Support TAR handling for PR KVM HTM

Currently guest kernel doesn't handle TAR facility unavailable and it
always runs with TAR bit on. PR KVM will lazily enable TAR. TAR is not
a frequent-use register and it is not included in SVCPU struct.

Due to the above, the checkpointed TAR val might be a bogus TAR val.
To solve this issue, we will make vcpu->arch.fscr tar bit consistent
with shadow_fscr when TM is enabled.

At the end of emulating treclaim., the correct TAR val need to be loaded
into the register if FSCR_TAR bit is on.

At the beginning of emulating trechkpt., TAR needs to be flushed so that
the right tar val can be copied into tar_tm.

Tested with:
tools/testing/selftests/powerpc/tm/tm-tar
tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar (remove DSCR/PPR
related testing).

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_book3s.h |  2 ++
 arch/powerpc/kvm/book3s_emulate.c     |  4 ++++
 arch/powerpc/kvm/book3s_pr.c          | 23 ++++++++++++++++++-----
 arch/powerpc/kvm/tm.S                 | 16 ++++++++++++++--
 4 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 2940de7bac08..1f345a0b6ba2 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -271,6 +271,8 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
 static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
 #endif
 
+void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
+
 extern int kvm_irq_bypass;
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 67d0fb40e8b2..fdbc695038dc 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -173,6 +173,9 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
 	guest_msr &= ~(MSR_TS_MASK);
 	kvmppc_set_msr(vcpu, guest_msr);
 	preempt_enable();
+
+	if (vcpu->arch.shadow_fscr & FSCR_TAR)
+		mtspr(SPRN_TAR, vcpu->arch.tar);
 }
 
 static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
@@ -185,6 +188,7 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
 	 * copy.
 	 */
 	kvmppc_giveup_ext(vcpu, MSR_VSX);
+	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
 	kvmppc_copyto_vcpu_tm(vcpu);
 	kvmppc_save_tm_sprs(vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ad0a2ee8d8b1..eaf0c4f03c47 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -55,7 +55,9 @@
 
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 			     ulong msr);
-static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
+#ifdef CONFIG_PPC_BOOK3S_64
+static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac);
+#endif
 
 /* Some compatibility defines */
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -346,6 +348,7 @@ void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
 		return;
 	}
 
+	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
 	kvmppc_giveup_ext(vcpu, MSR_VSX);
 
 	preempt_disable();
@@ -357,8 +360,11 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
 {
 	if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) {
 		kvmppc_restore_tm_sprs(vcpu);
-		if (kvmppc_get_msr(vcpu) & MSR_TM)
+		if (kvmppc_get_msr(vcpu) & MSR_TM) {
 			kvmppc_handle_lost_math_exts(vcpu);
+			if (vcpu->arch.fscr & FSCR_TAR)
+				kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+		}
 		return;
 	}
 
@@ -366,9 +372,11 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
 	_kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu));
 	preempt_enable();
 
-	if (kvmppc_get_msr(vcpu) & MSR_TM)
+	if (kvmppc_get_msr(vcpu) & MSR_TM) {
 		kvmppc_handle_lost_math_exts(vcpu);
-
+		if (vcpu->arch.fscr & FSCR_TAR)
+			kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+	}
 }
 #endif
 
@@ -819,7 +827,7 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 }
 
 /* Give up facility (TAR / EBB / DSCR) */
-static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
+void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
@@ -1020,7 +1028,12 @@ void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
 	if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
 		/* TAR got dropped, drop it in shadow too */
 		kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+	} else if (!(vcpu->arch.fscr & FSCR_TAR) && (fscr & FSCR_TAR)) {
+		vcpu->arch.fscr = fscr;
+		kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+		return;
 	}
+
 	vcpu->arch.fscr = fscr;
 }
 #endif
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 4a68dd4050a4..90e330f21356 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -172,15 +172,21 @@ _GLOBAL(_kvmppc_save_tm_pr)
 	mfmsr	r5
 	SAVE_GPR(5, r1)
 
-	/* also save DSCR/CR so that it can be recovered later */
+	/* also save DSCR/CR/TAR so that it can be recovered later */
 	mfspr   r6, SPRN_DSCR
 	SAVE_GPR(6, r1)
 
 	mfcr    r7
 	stw     r7, _CCR(r1)
 
+	mfspr   r8, SPRN_TAR
+	SAVE_GPR(8, r1)
+
 	bl	__kvmppc_save_tm
 
+	REST_GPR(8, r1)
+	mtspr   SPRN_TAR, r8
+
 	ld      r7, _CCR(r1)
 	mtcr	r7
 
@@ -340,15 +346,21 @@ _GLOBAL(_kvmppc_restore_tm_pr)
 	mfmsr	r5
 	SAVE_GPR(5, r1)
 
-	/* also save DSCR/CR so that it can be recovered later */
+	/* also save DSCR/CR/TAR so that it can be recovered later */
 	mfspr   r6, SPRN_DSCR
 	SAVE_GPR(6, r1)
 
 	mfcr    r7
 	stw     r7, _CCR(r1)
 
+	mfspr   r8, SPRN_TAR
+	SAVE_GPR(8, r1)
+
 	bl	__kvmppc_restore_tm
 
+	REST_GPR(8, r1)
+	mtspr   SPRN_TAR, r8
+
 	ld      r7, _CCR(r1)
 	mtcr	r7
 
-- 
cgit v1.2.3


From d234d68eb7464c9ebd11e870404e83d3e9348406 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:08 +0800
Subject: KVM: PPC: Book3S PR: Enable HTM for PR KVM for KVM_CHECK_EXTENSION
 ioctl

With current patch set, PR KVM now supports HTM. So this patch turns it
on for PR KVM.

Tested with:
https://github.com/justdoitqd/publicFiles/blob/master/test_kvm_htm_cap.c

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/powerpc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 05eccdc10fdd..b8247fac3e56 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -648,9 +648,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 #endif
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	case KVM_CAP_PPC_HTM:
-		r = hv_enabled &&
-		    (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) ||
-		     cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
+		r = !!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) ||
+		     (hv_enabled && cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
 		break;
 #endif
 	default:
-- 
cgit v1.2.3


From b3cebfe8c1cadf1817939dcc3688a2504a69c662 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:09 +0800
Subject: KVM: PPC: Move vcpu_load/vcpu_put down to each ioctl case in
 kvm_arch_vcpu_ioctl

Although we already have kvm_arch_vcpu_async_ioctl() which doesn't require
ioctl to load vcpu, the sync ioctl code need to be cleaned up when
CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL is not configured.

This patch moves vcpu_load/vcpu_put down to each ioctl switch case so that
each ioctl can decide to do vcpu_load/vcpu_put or not independently.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/powerpc.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b8247fac3e56..c2c3477af746 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1980,16 +1980,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	void __user *argp = (void __user *)arg;
 	long r;
 
-	vcpu_load(vcpu);
-
 	switch (ioctl) {
 	case KVM_ENABLE_CAP:
 	{
 		struct kvm_enable_cap cap;
 		r = -EFAULT;
+		vcpu_load(vcpu);
 		if (copy_from_user(&cap, argp, sizeof(cap)))
 			goto out;
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		vcpu_put(vcpu);
 		break;
 	}
 
@@ -1998,12 +1998,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	{
 		struct kvm_one_reg reg;
 		r = -EFAULT;
+		vcpu_load(vcpu);
 		if (copy_from_user(&reg, argp, sizeof(reg)))
 			goto out;
 		if (ioctl == KVM_SET_ONE_REG)
 			r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
 		else
 			r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
+		vcpu_put(vcpu);
 		break;
 	}
 
@@ -2011,9 +2013,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	case KVM_DIRTY_TLB: {
 		struct kvm_dirty_tlb dirty;
 		r = -EFAULT;
+		vcpu_load(vcpu);
 		if (copy_from_user(&dirty, argp, sizeof(dirty)))
 			goto out;
 		r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+		vcpu_put(vcpu);
 		break;
 	}
 #endif
@@ -2022,7 +2026,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 
 out:
-	vcpu_put(vcpu);
 	return r;
 }
 
-- 
cgit v1.2.3


From c8235c2891d0561960fd5d62fdda58cfb6e503b2 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:10 +0800
Subject: KVM: PPC: Remove load/put vcpu for KVM_GET/SET_ONE_REG ioctl

Since the vcpu mutex locking/unlock has been moved out of vcpu_load()
/vcpu_put(), KVM_GET_ONE_REG and KVM_SET_ONE_REG doesn't need to do
ioctl with loading vcpu anymore. This patch removes vcpu_load()/vcpu_put()
from KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctl.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/powerpc.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c2c3477af746..4cb377605167 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1998,14 +1998,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	{
 		struct kvm_one_reg reg;
 		r = -EFAULT;
-		vcpu_load(vcpu);
 		if (copy_from_user(&reg, argp, sizeof(reg)))
 			goto out;
 		if (ioctl == KVM_SET_ONE_REG)
 			r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
 		else
 			r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
-		vcpu_put(vcpu);
 		break;
 	}
 
-- 
cgit v1.2.3


From 8f04412699e42c7fb5f68a847b1531ad3211b523 Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:11 +0800
Subject: KVM: PPC: Book3S: Remove load/put vcpu for KVM_GET_REGS/KVM_SET_REGS

In both HV and PR KVM, the KVM_SET_REGS/KVM_GET_REGS ioctl should
be able to perform without the vcpu loaded.

Since the vcpu mutex locking/unlock has been moved out of vcpu_load()
/vcpu_put(), KVM_SET_REGS/KVM_GET_REGS don't need to do ioctl with
the vcpu loaded anymore. This patch removes vcpu_load()/vcpu_put()
from KVM_SET_REGS/KVM_GET_REGS ioctl.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 320cdcf84591..309c8cf8fed4 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -509,8 +509,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
 
-	vcpu_load(vcpu);
-
 	regs->pc = kvmppc_get_pc(vcpu);
 	regs->cr = kvmppc_get_cr(vcpu);
 	regs->ctr = kvmppc_get_ctr(vcpu);
@@ -532,7 +530,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 
-	vcpu_put(vcpu);
 	return 0;
 }
 
@@ -540,8 +537,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
 
-	vcpu_load(vcpu);
-
 	kvmppc_set_pc(vcpu, regs->pc);
 	kvmppc_set_cr(vcpu, regs->cr);
 	kvmppc_set_ctr(vcpu, regs->ctr);
@@ -562,7 +557,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 
-	vcpu_put(vcpu);
 	return 0;
 }
 
-- 
cgit v1.2.3


From deeb879de955f4e04be3d43c33bc311087f063bf Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Wed, 23 May 2018 15:02:12 +0800
Subject: KVM: PPC: Book3S PR: Enable kvmppc_get/set_one_reg_pr() for HTM
 registers

We need to migrate PR KVM during transaction and userspace will use
kvmppc_get_one_reg_pr()/kvmppc_set_one_reg_pr() APIs to get/set
transaction checkpoint state. This patch adds support for that.

So far, QEMU on PR KVM doesn't fully function for migration but the
savevm/loadvm can be done against a RHEL72 guest. During savevm/
loadvm procedure, the kvm ioctls will be invoked as well.

Test has been performed to savevm/loadvm for a guest running
a HTM test program:
https://github.com/justdoitqd/publicFiles/blob/master/test-tm-mig.c

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 133 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index eaf0c4f03c47..e96ead92ae48 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1539,6 +1539,73 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 		else
 			*val = get_reg_val(id, 0);
 		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_REG_PPC_TFHAR:
+		*val = get_reg_val(id, vcpu->arch.tfhar);
+		break;
+	case KVM_REG_PPC_TFIAR:
+		*val = get_reg_val(id, vcpu->arch.tfiar);
+		break;
+	case KVM_REG_PPC_TEXASR:
+		*val = get_reg_val(id, vcpu->arch.texasr);
+		break;
+	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+		*val = get_reg_val(id,
+				vcpu->arch.gpr_tm[id-KVM_REG_PPC_TM_GPR0]);
+		break;
+	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+	{
+		int i, j;
+
+		i = id - KVM_REG_PPC_TM_VSR0;
+		if (i < 32)
+			for (j = 0; j < TS_FPRWIDTH; j++)
+				val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
+		else {
+			if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				val->vval = vcpu->arch.vr_tm.vr[i-32];
+			else
+				r = -ENXIO;
+		}
+		break;
+	}
+	case KVM_REG_PPC_TM_CR:
+		*val = get_reg_val(id, vcpu->arch.cr_tm);
+		break;
+	case KVM_REG_PPC_TM_XER:
+		*val = get_reg_val(id, vcpu->arch.xer_tm);
+		break;
+	case KVM_REG_PPC_TM_LR:
+		*val = get_reg_val(id, vcpu->arch.lr_tm);
+		break;
+	case KVM_REG_PPC_TM_CTR:
+		*val = get_reg_val(id, vcpu->arch.ctr_tm);
+		break;
+	case KVM_REG_PPC_TM_FPSCR:
+		*val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
+		break;
+	case KVM_REG_PPC_TM_AMR:
+		*val = get_reg_val(id, vcpu->arch.amr_tm);
+		break;
+	case KVM_REG_PPC_TM_PPR:
+		*val = get_reg_val(id, vcpu->arch.ppr_tm);
+		break;
+	case KVM_REG_PPC_TM_VRSAVE:
+		*val = get_reg_val(id, vcpu->arch.vrsave_tm);
+		break;
+	case KVM_REG_PPC_TM_VSCR:
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			*val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
+		else
+			r = -ENXIO;
+		break;
+	case KVM_REG_PPC_TM_DSCR:
+		*val = get_reg_val(id, vcpu->arch.dscr_tm);
+		break;
+	case KVM_REG_PPC_TM_TAR:
+		*val = get_reg_val(id, vcpu->arch.tar_tm);
+		break;
+#endif
 	default:
 		r = -EINVAL;
 		break;
@@ -1572,6 +1639,72 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_LPCR_64:
 		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
 		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_REG_PPC_TFHAR:
+		vcpu->arch.tfhar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TFIAR:
+		vcpu->arch.tfiar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TEXASR:
+		vcpu->arch.texasr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+		vcpu->arch.gpr_tm[id - KVM_REG_PPC_TM_GPR0] =
+			set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+	{
+		int i, j;
+
+		i = id - KVM_REG_PPC_TM_VSR0;
+		if (i < 32)
+			for (j = 0; j < TS_FPRWIDTH; j++)
+				vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
+		else
+			if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				vcpu->arch.vr_tm.vr[i-32] = val->vval;
+			else
+				r = -ENXIO;
+		break;
+	}
+	case KVM_REG_PPC_TM_CR:
+		vcpu->arch.cr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_XER:
+		vcpu->arch.xer_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_LR:
+		vcpu->arch.lr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_CTR:
+		vcpu->arch.ctr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_FPSCR:
+		vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_AMR:
+		vcpu->arch.amr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_PPR:
+		vcpu->arch.ppr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VRSAVE:
+		vcpu->arch.vrsave_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VSCR:
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
+		else
+			r = -ENXIO;
+		break;
+	case KVM_REG_PPC_TM_DSCR:
+		vcpu->arch.dscr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_TAR:
+		vcpu->arch.tar_tm = set_reg_val(id, *val);
+		break;
+#endif
 	default:
 		r = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 1499fa809e9e6713952ef84a7e9d51606881681f Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Thu, 19 Apr 2018 00:49:58 +0530
Subject: kvm: Change return type to vm_fault_t

Use new return type vm_fault_t for fault handler. For
now, this is just documenting that the function returns
a VM_FAULT value rather than an errno. Once all instances
are converted, vm_fault_t will become a distinct type.

commit 1c8f422059ae ("mm: change return type to vm_fault_t")

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/mips/kvm/mips.c       | 2 +-
 arch/powerpc/kvm/powerpc.c | 2 +-
 arch/s390/kvm/kvm-s390.c   | 2 +-
 arch/x86/kvm/x86.c         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 2549fdd27ee1..03e0e0f189cc 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1076,7 +1076,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -ENOIOCTLCMD;
 }
 
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 {
 	return VM_FAULT_SIGBUS;
 }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4e387647b5af..3764d000872e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1830,7 +1830,7 @@ out:
 	return r;
 }
 
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 {
 	return VM_FAULT_SIGBUS;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e521f7699032..7142508ca6e1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -3993,7 +3993,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	return r;
 }
 
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 {
 #ifdef CONFIG_KVM_S390_UCONTROL
 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 22bd20fedd6d..1d3dfc2c941d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3966,7 +3966,7 @@ out_nofree:
 	return r;
 }
 
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 {
 	return VM_FAULT_SIGBUS;
 }
-- 
cgit v1.2.3


From d1e5b0e98ea27b4f17871dc4e8ea4b0447e35221 Mon Sep 17 00:00:00 2001
From: Marc Orr <marcorr@google.com>
Date: Tue, 15 May 2018 04:37:37 -0700
Subject: kvm: Make VM ioctl do valloc for some archs

The kvm struct has been bloating. For example, it's tens of kilo-bytes
for x86, which turns out to be a large amount of memory to allocate
contiguously via kzalloc. Thus, this patch does the following:
1. Uses architecture-specific routines to allocate the kvm struct via
   vzalloc for x86.
2. Switches arm to __KVM_HAVE_ARCH_VM_ALLOC so that it can use vzalloc
   when has_vhe() is true.

Other architectures continue to default to kalloc, as they have a
dependency on kalloc or have a small-enough struct kvm.

Signed-off-by: Marc Orr <marcorr@google.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h   | 4 ++++
 arch/arm64/include/asm/kvm_host.h | 4 ++++
 arch/x86/kvm/svm.c                | 4 ++--
 arch/x86/kvm/vmx.c                | 4 ++--
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f079a2039c8a..4b12f32f540c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -324,4 +324,8 @@ static inline bool kvm_arm_harden_branch_predictor(void)
 static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
 
+#define __KVM_HAVE_ARCH_VM_ALLOC
+struct kvm *kvm_arch_alloc_vm(void);
+void kvm_arch_free_vm(struct kvm *kvm);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a4ca202ff3f2..c923d3e17ba3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -482,4 +482,8 @@ static inline bool kvm_arm_harden_branch_predictor(void)
 void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
 void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
 
+#define __KVM_HAVE_ARCH_VM_ALLOC
+struct kvm *kvm_arch_alloc_vm(void);
+void kvm_arch_free_vm(struct kvm *kvm);
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index de21d5c5168b..d9305f1723f5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1852,13 +1852,13 @@ static void __unregister_enc_region_locked(struct kvm *kvm,
 
 static struct kvm *svm_vm_alloc(void)
 {
-	struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
+	struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
 	return &kvm_svm->kvm;
 }
 
 static void svm_vm_free(struct kvm *kvm)
 {
-	kfree(to_kvm_svm(kvm));
+	vfree(to_kvm_svm(kvm));
 }
 
 static void sev_vm_destroy(struct kvm *kvm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e50beb76d846..d205e9246f99 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10139,13 +10139,13 @@ STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
 
 static struct kvm *vmx_vm_alloc(void)
 {
-	struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+	struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
 	return &kvm_vmx->kvm;
 }
 
 static void vmx_vm_free(struct kvm *kvm)
 {
-	kfree(to_kvm_vmx(kvm));
+	vfree(to_kvm_vmx(kvm));
 }
 
 static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
-- 
cgit v1.2.3


From 929f45e32499171ce3e5a15db972256eac513ad7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 29 May 2018 18:22:04 +0200
Subject: kvm: no need to check return value of debugfs_create functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

This cleans up the error handling a lot, as this code will never get
hit.

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Christoffer Dall <christoffer.dall@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Radim KrÄmÃ¡Å™" <rkrcmar@redhat.com>
Cc: Arvind Yadav <arvind.yadav.cs@gmail.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Andre Przywara <andre.przywara@arm.com>
Cc: kvm-ppc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: kvm@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/book3s_hv.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4d07fca5121c..67d7de1470cc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3950,8 +3950,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	 */
 	snprintf(buf, sizeof(buf), "vm%d", current->pid);
 	kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
-	if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
-		kvmppc_mmu_debugfs_init(kvm);
+	kvmppc_mmu_debugfs_init(kvm);
 
 	return 0;
 }
-- 
cgit v1.2.3


From c5ce8235cffa00c207e24210329094d7634bb467 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 29 May 2018 14:53:17 +0800
Subject: KVM: VMX: Optimize tscdeadline timer latency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'Commit d0659d946be0 ("KVM: x86: add option to advance tscdeadline
hrtimer expiration")' advances the tscdeadline (the timer is emulated
by hrtimer) expiration in order that the latency which is incurred
by hypervisor (apic_timer_fn -> vmentry) can be avoided. This patch
adds the advance tscdeadline expiration support to which the tscdeadline
timer is emulated by VMX preemption timer to reduce the hypervisor
lantency (handle_preemption_timer -> vmentry). The guest can also
set an expiration that is very small (for example in Linux if an
hrtimer feeds a expiration in the past); in that case we set delta_tsc
to 0, leading to an immediately vmexit when delta_tsc is not bigger than
advance ns.

This patch can reduce ~63% latency (~4450 cycles to ~1660 cycles on
a haswell desktop) for kvm-unit-tests/tscdeadline_latency when testing
busy waits.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 8 +++++++-
 arch/x86/kvm/x86.c | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d205e9246f99..aff0f3ee6a1d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -12435,7 +12435,7 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift,
 static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
 {
 	struct vcpu_vmx *vmx;
-	u64 tscl, guest_tscl, delta_tsc;
+	u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
 
 	if (kvm_mwait_in_guest(vcpu->kvm))
 		return -EOPNOTSUPP;
@@ -12444,6 +12444,12 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
 	tscl = rdtsc();
 	guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
 	delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+	lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
+
+	if (delta_tsc > lapic_timer_advance_cycles)
+		delta_tsc -= lapic_timer_advance_cycles;
+	else
+		delta_tsc = 0;
 
 	/* Convert to host delta tsc if tsc scaling is enabled */
 	if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1d3dfc2c941d..93dd25d005a1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -138,6 +138,7 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 /* lapic timer advance (tscdeadline mode only) in nanoseconds */
 unsigned int __read_mostly lapic_timer_advance_ns = 0;
 module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
 
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
-- 
cgit v1.2.3


From a943ac50d10aac96dca63d0460365a699d41fdd0 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Tue, 29 May 2018 09:11:32 -0700
Subject: kvm: nVMX: Restrict VMX capability MSR changes

Disallow changes to the VMX capability MSRs while the vCPU is in VMX
operation. Although this does break the existing API, it helps to
avoid some potentially tricky situations for which there is no
architected behavior.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aff0f3ee6a1d..55f86eebc780 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3538,6 +3538,13 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	/*
+	 * Don't allow changes to the VMX capability MSRs while the vCPU
+	 * is in VMX operation.
+	 */
+	if (vmx->nested.vmxon)
+		return -EBUSY;
+
 	switch (msr_index) {
 	case MSR_IA32_VMX_BASIC:
 		return vmx_restore_vmx_basic(vmx, data);
-- 
cgit v1.2.3


From f4160e459c57646122beaea3f163b798179ea446 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Tue, 29 May 2018 09:11:33 -0700
Subject: kvm: nVMX: Add support for "VMWRITE to any supported field"

Add support for "VMWRITE to any supported field in the VMCS" and
enable this feature by default in L1's IA32_VMX_MISC MSR. If userspace
clears the VMX capability bit, the old behavior will be restored.

Note that this feature is a prerequisite for kvm in L1 to use VMCS
shadowing, once that feature is available.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 60 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 55f86eebc780..709de996f063 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1694,6 +1694,17 @@ static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
 	return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
 }
 
+/*
+ * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
+ * to modify any valid field of the VMCS, or are the VM-exit
+ * information fields read-only?
+ */
+static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
+{
+	return to_vmx(vcpu)->nested.msrs.misc_low &
+		MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
+}
+
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
 {
 	return vmcs12->cpu_based_vm_exec_control & bit;
@@ -3311,6 +3322,7 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
 		msrs->misc_high);
 	msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
 	msrs->misc_low |=
+		MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
 		VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
 		VMX_MISC_ACTIVITY_HLT;
 	msrs->misc_high = 0;
@@ -3484,6 +3496,15 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
 
 	vmx->nested.msrs.misc_low = data;
 	vmx->nested.msrs.misc_high = data >> 32;
+
+	/*
+	 * If L1 has read-only VM-exit information fields, use the
+	 * less permissive vmx_vmwrite_bitmap to specify write
+	 * permissions for the shadow VMCS.
+	 */
+	if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
+		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+
 	return 0;
 }
 
@@ -6154,8 +6175,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	int i;
 
 	if (enable_shadow_vmcs) {
+		/*
+		 * At vCPU creation, "VMWRITE to any supported field
+		 * in the VMCS" is supported, so use the more
+		 * permissive vmx_vmread_bitmap to specify both read
+		 * and write permissions for the shadow VMCS.
+		 */
 		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
-		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
 	}
 	if (cpu_has_vmx_msr_bitmap())
 		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@ -8136,23 +8163,42 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
 
 }
 
+/*
+ * Copy the writable VMCS shadow fields back to the VMCS12, in case
+ * they have been modified by the L1 guest. Note that the "read-only"
+ * VM-exit information fields are actually writable if the vCPU is
+ * configured to support "VMWRITE to any supported field in the VMCS."
+ */
 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 {
-	int i;
+	const u16 *fields[] = {
+		shadow_read_write_fields,
+		shadow_read_only_fields
+	};
+	const int max_fields[] = {
+		max_shadow_read_write_fields,
+		max_shadow_read_only_fields
+	};
+	int i, q;
 	unsigned long field;
 	u64 field_value;
 	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
-	const u16 *fields = shadow_read_write_fields;
-	const int num_fields = max_shadow_read_write_fields;
 
 	preempt_disable();
 
 	vmcs_load(shadow_vmcs);
 
-	for (i = 0; i < num_fields; i++) {
-		field = fields[i];
-		field_value = __vmcs_readl(field);
-		vmcs12_write_any(&vmx->vcpu, field, field_value);
+	for (q = 0; q < ARRAY_SIZE(fields); q++) {
+		for (i = 0; i < max_fields[q]; i++) {
+			field = fields[q][i];
+			field_value = __vmcs_readl(field);
+			vmcs12_write_any(&vmx->vcpu, field, field_value);
+		}
+		/*
+		 * Skip the VM-exit information fields if they are read-only.
+		 */
+		if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
+			break;
 	}
 
 	vmcs_clear(shadow_vmcs);
@@ -8286,7 +8332,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 
 
 	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
-	if (vmcs_field_readonly(field)) {
+	/*
+	 * If the vCPU supports "VMWRITE to any supported field in the
+	 * VMCS," then the "read-only" fields are actually read/write.
+	 */
+	if (vmcs_field_readonly(field) &&
+	    !nested_cpu_has_vmwrite_any_field(vcpu)) {
 		nested_vmx_failValid(vcpu,
 			VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
 		return kvm_skip_emulated_instruction(vcpu);
-- 
cgit v1.2.3


From ebcbd75e396258a5041d2b28fec02c27f65d59bb Mon Sep 17 00:00:00 2001
From: Alan Kao <alankao@andestech.com>
Date: Tue, 8 May 2018 10:59:33 +0800
Subject: riscv: Fix the bug in memory access fixup code

A piece of fixup code is currently shared by __copy_user and
__clear_user.  It first disables the access to user-space memory
and then returns the "n" argument, which represents #(bytes not processed).
However,__copy_user's "n" is in register a2, while __clear_user's in a1,
and thus it causes errors for programs like setdomainname02 testcase in LTP.

This patch fixes this issue by separating their fixup code and returning
the right value for the kernel to handle a relative fault properly.

Signed-off-by: Alan Kao <alankao@andestech.com>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Zong Li <zong@andestech.com>
Cc: Vincent Chen <vincentc@andestech.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/lib/uaccess.S | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 58fb2877c865..0173ea296baa 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -84,7 +84,7 @@ ENTRY(__clear_user)
 	bgeu t0, t1, 2f
 	bltu a0, t0, 4f
 1:
-	fixup REG_S, zero, (a0), 10f
+	fixup REG_S, zero, (a0), 11f
 	addi a0, a0, SZREG
 	bltu a0, t1, 1b
 2:
@@ -96,12 +96,12 @@ ENTRY(__clear_user)
 	li a0, 0
 	ret
 4: /* Edge case: unalignment */
-	fixup sb, zero, (a0), 10f
+	fixup sb, zero, (a0), 11f
 	addi a0, a0, 1
 	bltu a0, t0, 4b
 	j 1b
 5: /* Edge case: remainder */
-	fixup sb, zero, (a0), 10f
+	fixup sb, zero, (a0), 11f
 	addi a0, a0, 1
 	bltu a0, a3, 5b
 	j 3b
@@ -109,9 +109,14 @@ ENDPROC(__clear_user)
 
 	.section .fixup,"ax"
 	.balign 4
+	/* Fixup code for __copy_user(10) and __clear_user(11) */
 10:
 	/* Disable access to user memory */
 	csrs sstatus, t6
-	sub a0, a3, a0
+	mv a0, a2
+	ret
+11:
+	csrs sstatus, t6
+	mv a0, a1
 	ret
 	.previous
-- 
cgit v1.2.3


From 178e9fc47aaec1b8952b553444e94802d7570599 Mon Sep 17 00:00:00 2001
From: Alan Kao <alankao@andestech.com>
Date: Fri, 20 Apr 2018 07:27:49 +0800
Subject: perf: riscv: preliminary RISC-V support

This patch provide a basic PMU, riscv_base_pmu, which supports two
general hardware event, instructions and cycles.  Furthermore, this
PMU serves as a reference implementation to ease the portings in
the future.

riscv_base_pmu should be able to run on any RISC-V machine that
conforms to the Priv-Spec.  Note that the latest qemu model hasn't
fully support a proper behavior of Priv-Spec 1.10 yet, but work
around should be easy with very small fixes.  Please check
https://github.com/riscv/riscv-qemu/pull/115 for future updates.

Cc: Nick Hu <nickhu@andestech.com>
Cc: Greentime Hu <greentime@andestech.com>
Signed-off-by: Alan Kao <alankao@andestech.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/Kconfig                  |  14 ++
 arch/riscv/include/asm/Kbuild       |   1 +
 arch/riscv/include/asm/perf_event.h |  84 +++++++
 arch/riscv/kernel/Makefile          |   2 +
 arch/riscv/kernel/perf_event.c      | 485 ++++++++++++++++++++++++++++++++++++
 5 files changed, 586 insertions(+)
 create mode 100644 arch/riscv/include/asm/perf_event.h
 create mode 100644 arch/riscv/kernel/perf_event.c

(limited to 'arch')

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index cd4fd85fde84..4495604394e5 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -25,6 +25,7 @@ config RISCV
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_GENERIC_DMA_COHERENT
+	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
 	select NO_BOOTMEM
 	select RISCV_ISA_A if SMP
@@ -198,6 +199,19 @@ config RISCV_ISA_C
 config RISCV_ISA_A
 	def_bool y
 
+menu "supported PMU type"
+	depends on PERF_EVENTS
+
+config RISCV_BASE_PMU
+	bool "Base Performance Monitoring Unit"
+	def_bool y
+	help
+	  A base PMU that serves as a reference implementation and has limited
+	  feature of perf.  It can run on any RISC-V machines so serves as the
+	  fallback, but this option can also be disable to reduce kernel size.
+
+endmenu
+
 endmenu
 
 menu "Kernel type"
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 4286a5f83876..576ffdca06ba 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -25,6 +25,7 @@ generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
+generic-y += local64.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += module.h
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
new file mode 100644
index 000000000000..0e638a0c3feb
--- /dev/null
+++ b/arch/riscv/include/asm/perf_event.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 SiFive
+ * Copyright (C) 2018 Andes Technology Corporation
+ *
+ */
+
+#ifndef _ASM_RISCV_PERF_EVENT_H
+#define _ASM_RISCV_PERF_EVENT_H
+
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+
+#define RISCV_BASE_COUNTERS	2
+
+/*
+ * The RISCV_MAX_COUNTERS parameter should be specified.
+ */
+
+#ifdef CONFIG_RISCV_BASE_PMU
+#define RISCV_MAX_COUNTERS	2
+#endif
+
+#ifndef RISCV_MAX_COUNTERS
+#error "Please provide a valid RISCV_MAX_COUNTERS for the PMU."
+#endif
+
+/*
+ * These are the indexes of bits in counteren register *minus* 1,
+ * except for cycle.  It would be coherent if it can directly mapped
+ * to counteren bit definition, but there is a *time* register at
+ * counteren[1].  Per-cpu structure is scarce resource here.
+ *
+ * According to the spec, an implementation can support counter up to
+ * mhpmcounter31, but many high-end processors has at most 6 general
+ * PMCs, we give the definition to MHPMCOUNTER8 here.
+ */
+#define RISCV_PMU_CYCLE		0
+#define RISCV_PMU_INSTRET	1
+#define RISCV_PMU_MHPMCOUNTER3	2
+#define RISCV_PMU_MHPMCOUNTER4	3
+#define RISCV_PMU_MHPMCOUNTER5	4
+#define RISCV_PMU_MHPMCOUNTER6	5
+#define RISCV_PMU_MHPMCOUNTER7	6
+#define RISCV_PMU_MHPMCOUNTER8	7
+
+#define RISCV_OP_UNSUPP		(-EOPNOTSUPP)
+
+struct cpu_hw_events {
+	/* # currently enabled events*/
+	int			n_events;
+	/* currently enabled events */
+	struct perf_event	*events[RISCV_MAX_COUNTERS];
+	/* vendor-defined PMU data */
+	void			*platform;
+};
+
+struct riscv_pmu {
+	struct pmu	*pmu;
+
+	/* generic hw/cache events table */
+	const int	*hw_events;
+	const int	(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+				       [PERF_COUNT_HW_CACHE_OP_MAX]
+				       [PERF_COUNT_HW_CACHE_RESULT_MAX];
+	/* method used to map hw/cache events */
+	int		(*map_hw_event)(u64 config);
+	int		(*map_cache_event)(u64 config);
+
+	/* max generic hw events in map */
+	int		max_events;
+	/* number total counters, 2(base) + x(general) */
+	int		num_counters;
+	/* the width of the counter */
+	int		counter_width;
+
+	/* vendor-defined PMU features */
+	void		*platform;
+
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+	int		irq;
+};
+
+#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 8586dd96c2f0..e1274fc03af4 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -39,4 +39,6 @@ obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= mcount-dyn.o
 
+obj-$(CONFIG_PERF_EVENTS)      += perf_event.o
+
 clean:
diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
new file mode 100644
index 000000000000..b0e10c4e9f77
--- /dev/null
+++ b/arch/riscv/kernel/perf_event.c
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2009 Jaswinder Singh Rajput
+ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ * Copyright (C) 2009 Google, Inc., Stephane Eranian
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ * Copyright (C) 2018 Andes Technology Corporation
+ *
+ * Perf_events support for RISC-V platforms.
+ *
+ * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough
+ * functionality for perf event to fully work, this file provides
+ * the very basic framework only.
+ *
+ * For platform portings, please check Documentations/riscv/pmu.txt.
+ *
+ * The Copyright line includes x86 and tile ones.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/bitmap.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+#include <linux/atomic.h>
+#include <linux/of.h>
+#include <asm/perf_event.h>
+
+static const struct riscv_pmu *riscv_pmu __read_mostly;
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+/*
+ * Hardware & cache maps and their methods
+ */
+
+static const int riscv_hw_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= RISCV_PMU_CYCLE,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= RISCV_PMU_INSTRET,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= RISCV_OP_UNSUPP,
+	[PERF_COUNT_HW_CACHE_MISSES]		= RISCV_OP_UNSUPP,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= RISCV_OP_UNSUPP,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= RISCV_OP_UNSUPP,
+	[PERF_COUNT_HW_BUS_CYCLES]		= RISCV_OP_UNSUPP,
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+[PERF_COUNT_HW_CACHE_OP_MAX]
+[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] =  RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] =  RISCV_OP_UNSUPP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
+			[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
+		},
+	},
+};
+
+static int riscv_map_hw_event(u64 config)
+{
+	if (config >= riscv_pmu->max_events)
+		return -EINVAL;
+
+	return riscv_pmu->hw_events[config];
+}
+
+int riscv_map_cache_decode(u64 config, unsigned int *type,
+			   unsigned int *op, unsigned int *result)
+{
+	return -ENOENT;
+}
+
+static int riscv_map_cache_event(u64 config)
+{
+	unsigned int type, op, result;
+	int err = -ENOENT;
+		int code;
+
+	err = riscv_map_cache_decode(config, &type, &op, &result);
+	if (!riscv_pmu->cache_events || err)
+		return err;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	code = (*riscv_pmu->cache_events)[type][op][result];
+	if (code == RISCV_OP_UNSUPP)
+		return -EINVAL;
+
+	return code;
+}
+
+/*
+ * Low-level functions: reading/writing counters
+ */
+
+static inline u64 read_counter(int idx)
+{
+	u64 val = 0;
+
+	switch (idx) {
+	case RISCV_PMU_CYCLE:
+		val = csr_read(cycle);
+		break;
+	case RISCV_PMU_INSTRET:
+		val = csr_read(instret);
+		break;
+	default:
+		WARN_ON_ONCE(idx < 0 ||	idx > RISCV_MAX_COUNTERS);
+		return -EINVAL;
+	}
+
+	return val;
+}
+
+static inline void write_counter(int idx, u64 value)
+{
+	/* currently not supported */
+	WARN_ON_ONCE(1);
+}
+
+/*
+ * pmu->read: read and update the counter
+ *
+ * Other architectures' implementation often have a xxx_perf_event_update
+ * routine, which can return counter values when called in the IRQ, but
+ * return void when being called by the pmu->read method.
+ */
+static void riscv_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_raw_count, new_raw_count;
+	u64 oldval;
+	int idx = hwc->idx;
+	u64 delta;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = read_counter(idx);
+
+		oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					 new_raw_count);
+	} while (oldval != prev_raw_count);
+
+	/*
+	 * delta is the value to update the counter we maintain in the kernel.
+	 */
+	delta = (new_raw_count - prev_raw_count) &
+		((1ULL << riscv_pmu->counter_width) - 1);
+	local64_add(delta, &event->count);
+	/*
+	 * Something like local64_sub(delta, &hwc->period_left) here is
+	 * needed if there is an interrupt for perf.
+	 */
+}
+
+/*
+ * State transition functions:
+ *
+ * stop()/start() & add()/del()
+ */
+
+/*
+ * pmu->stop: stop the counter
+ */
+static void riscv_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		riscv_pmu->pmu->read(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+/*
+ * pmu->start: start the event.
+ */
+static void riscv_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+		/*
+		 * Set the counter to the period to the next interrupt here,
+		 * if you have any.
+		 */
+	}
+
+	hwc->state = 0;
+	perf_event_update_userpage(event);
+
+	/*
+	 * Since we cannot write to counters, this serves as an initialization
+	 * to the delta-mechanism in pmu->read(); otherwise, the delta would be
+	 * wrong when pmu->read is called for the first time.
+	 */
+	local64_set(&hwc->prev_count, read_counter(hwc->idx));
+}
+
+/*
+ * pmu->add: add the event to PMU.
+ */
+static int riscv_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (cpuc->n_events == riscv_pmu->num_counters)
+		return -ENOSPC;
+
+	/*
+	 * We don't have general conunters, so no binding-event-to-counter
+	 * process here.
+	 *
+	 * Indexing using hwc->config generally not works, since config may
+	 * contain extra information, but here the only info we have in
+	 * hwc->config is the event index.
+	 */
+	hwc->idx = hwc->config;
+	cpuc->events[hwc->idx] = event;
+	cpuc->n_events++;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		riscv_pmu->pmu->start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+/*
+ * pmu->del: delete the event from PMU.
+ */
+static void riscv_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	cpuc->events[hwc->idx] = NULL;
+	cpuc->n_events--;
+	riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Interrupt: a skeletion for reference.
+ */
+
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
+{
+	return IRQ_NONE;
+}
+
+static int reserve_pmc_hardware(void)
+{
+	int err = 0;
+
+	mutex_lock(&pmc_reserve_mutex);
+	if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
+		err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
+				  IRQF_PERCPU, "riscv-base-perf", NULL);
+	}
+	mutex_unlock(&pmc_reserve_mutex);
+
+	return err;
+}
+
+void release_pmc_hardware(void)
+{
+	mutex_lock(&pmc_reserve_mutex);
+	if (riscv_pmu->irq >= 0)
+		free_irq(riscv_pmu->irq, NULL);
+	mutex_unlock(&pmc_reserve_mutex);
+}
+
+/*
+ * Event Initialization/Finalization
+ */
+
+static atomic_t riscv_active_events = ATOMIC_INIT(0);
+
+static void riscv_event_destroy(struct perf_event *event)
+{
+	if (atomic_dec_return(&riscv_active_events) == 0)
+		release_pmc_hardware();
+}
+
+static int riscv_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	int err;
+	int code;
+
+	if (atomic_inc_return(&riscv_active_events) == 1) {
+		err = reserve_pmc_hardware();
+
+		if (err) {
+			pr_warn("PMC hardware not available\n");
+			atomic_dec(&riscv_active_events);
+			return -EBUSY;
+		}
+	}
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		code = riscv_pmu->map_hw_event(attr->config);
+		break;
+	case PERF_TYPE_HW_CACHE:
+		code = riscv_pmu->map_cache_event(attr->config);
+		break;
+	case PERF_TYPE_RAW:
+		return -EOPNOTSUPP;
+	default:
+		return -ENOENT;
+	}
+
+	event->destroy = riscv_event_destroy;
+	if (code < 0) {
+		event->destroy(event);
+		return code;
+	}
+
+	/*
+	 * idx is set to -1 because the index of a general event should not be
+	 * decided until binding to some counter in pmu->add().
+	 *
+	 * But since we don't have such support, later in pmu->add(), we just
+	 * use hwc->config as the index instead.
+	 */
+	hwc->config = code;
+	hwc->idx = -1;
+
+	return 0;
+}
+
+/*
+ * Initialization
+ */
+
+static struct pmu min_pmu = {
+	.name		= "riscv-base",
+	.event_init	= riscv_event_init,
+	.add		= riscv_pmu_add,
+	.del		= riscv_pmu_del,
+	.start		= riscv_pmu_start,
+	.stop		= riscv_pmu_stop,
+	.read		= riscv_pmu_read,
+};
+
+static const struct riscv_pmu riscv_base_pmu = {
+	.pmu = &min_pmu,
+	.max_events = ARRAY_SIZE(riscv_hw_event_map),
+	.map_hw_event = riscv_map_hw_event,
+	.hw_events = riscv_hw_event_map,
+	.map_cache_event = riscv_map_cache_event,
+	.cache_events = &riscv_cache_event_map,
+	.counter_width = 63,
+	.num_counters = RISCV_BASE_COUNTERS + 0,
+	.handle_irq = &riscv_base_pmu_handle_irq,
+
+	/* This means this PMU has no IRQ. */
+	.irq = -1,
+};
+
+static const struct of_device_id riscv_pmu_of_ids[] = {
+	{.compatible = "riscv,base-pmu",	.data = &riscv_base_pmu},
+	{ /* sentinel value */ }
+};
+
+int __init init_hw_perf_events(void)
+{
+	struct device_node *node = of_find_node_by_type(NULL, "pmu");
+	const struct of_device_id *of_id;
+
+	riscv_pmu = &riscv_base_pmu;
+
+	if (node) {
+		of_id = of_match_node(riscv_pmu_of_ids, node);
+
+		if (of_id)
+			riscv_pmu = of_id->data;
+	}
+
+	perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
+	return 0;
+}
+arch_initcall(init_hw_perf_events);
-- 
cgit v1.2.3


From 2861ae302f6bf7221db2dac5bd4cf0f2e4cab13b Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Fri, 1 Jun 2018 17:21:21 +0200
Subject: riscv: use NULL instead of a plain 0

sbi_remote_sfence_vma() & sbi_remote_fence_i() takes
a pointer as first argument but some macros call them with
a plain 0 which, while legal C, is frowned upon in the kernel.

Change this by replacing the 0 by NULL.

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/cacheflush.h | 2 +-
 arch/riscv/include/asm/tlbflush.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index efd89a88d2d0..8f13074413a7 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -47,7 +47,7 @@ static inline void flush_dcache_page(struct page *page)
 
 #else /* CONFIG_SMP */
 
-#define flush_icache_all() sbi_remote_fence_i(0)
+#define flush_icache_all() sbi_remote_fence_i(NULL)
 void flush_icache_mm(struct mm_struct *mm, bool local);
 
 #endif /* CONFIG_SMP */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 7b209aec355d..85c2d8bae957 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -49,7 +49,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 
 #include <asm/sbi.h>
 
-#define flush_tlb_all() sbi_remote_sfence_vma(0, 0, -1)
+#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
 #define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
 #define flush_tlb_range(vma, start, end) \
 	sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \
-- 
cgit v1.2.3


From 9bf97390b3030b68a465681043a66461c7cf6a65 Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Fri, 1 Jun 2018 17:21:22 +0200
Subject: riscv: no __user for probe_kernel_address()

In is_valid_bugaddr(), probe_kernel_address() is called with
the PC casted to (bug_inst_t __user *) but this function
only take a plain void* as argument, not a __user pointer.

Fix this by removing the unnneded __user in the cast.

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 93132cb59184..4c92e5af86d3 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -160,7 +160,7 @@ int is_valid_bugaddr(unsigned long pc)
 
 	if (pc < PAGE_OFFSET)
 		return 0;
-	if (probe_kernel_address((bug_insn_t __user *)pc, insn))
+	if (probe_kernel_address((bug_insn_t *)pc, insn))
 		return 0;
 	return (insn == __BUG_INSN);
 }
-- 
cgit v1.2.3


From 2a61f4747eeaa85ce26ca9fbd81421b15facd018 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 28 May 2018 18:22:00 +0900
Subject: stack-protector: test compiler capability in Kconfig and drop AUTO
 mode

Move the test for -fstack-protector(-strong) option to Kconfig.

If the compiler does not support the option, the corresponding menu
is automatically hidden.  If STRONG is not supported, it will fall
back to REGULAR.  If REGULAR is not supported, it will be disabled.
This means, AUTO is implicitly handled by the dependency solver of
Kconfig, hence removed.

I also turned the 'choice' into only two boolean symbols.  The use of
'choice' is not a good idea here, because all of all{yes,mod,no}config
would choose the first visible value, while we want allnoconfig to
disable as many features as possible.

X86 has additional shell scripts in case the compiler supports those
options, but generates broken code.  I added CC_HAS_SANE_STACKPROTECTOR
to test this.  I had to add -m32 to gcc-x86_32-has-stack-protector.sh
to make it work correctly.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Kees Cook <keescook@chromium.org>
---
 arch/Kconfig     | 32 +++++++++++---------------------
 arch/x86/Kconfig | 11 ++++++++++-
 2 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index 8a7f7e1f2ca7..2c7c3cf8adfc 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -536,13 +536,16 @@ config HAVE_CC_STACKPROTECTOR
 	bool
 	help
 	  An arch should select this symbol if:
-	  - its compiler supports the -fstack-protector option
 	  - it has implemented a stack canary (e.g. __stack_chk_guard)
 
-choice
-	prompt "Stack Protector buffer overflow detection"
+config CC_HAS_STACKPROTECTOR_NONE
+	def_bool $(cc-option,-fno-stack-protector)
+
+config CC_STACKPROTECTOR
+	bool "Stack Protector buffer overflow detection"
 	depends on HAVE_CC_STACKPROTECTOR
-	default CC_STACKPROTECTOR_AUTO
+	depends on $(cc-option,-fstack-protector)
+	default y
 	help
 	  This option turns on the "stack-protector" GCC feature. This
 	  feature puts, at the beginning of functions, a canary value on
@@ -552,14 +555,6 @@ choice
 	  overwrite the canary, which gets detected and the attack is then
 	  neutralized via a kernel panic.
 
-config CC_STACKPROTECTOR_NONE
-	bool "None"
-	help
-	  Disable "stack-protector" GCC feature.
-
-config CC_STACKPROTECTOR_REGULAR
-	bool "Regular"
-	help
 	  Functions will have the stack-protector canary logic added if they
 	  have an 8-byte or larger character array on the stack.
 
@@ -571,7 +566,10 @@ config CC_STACKPROTECTOR_REGULAR
 	  by about 0.3%.
 
 config CC_STACKPROTECTOR_STRONG
-	bool "Strong"
+	bool "Strong Stack Protector"
+	depends on CC_STACKPROTECTOR
+	depends on $(cc-option,-fstack-protector-strong)
+	default y
 	help
 	  Functions will have the stack-protector canary logic added in any
 	  of the following conditions:
@@ -589,14 +587,6 @@ config CC_STACKPROTECTOR_STRONG
 	  about 20% of all kernel functions, which increases the kernel code
 	  size by about 2%.
 
-config CC_STACKPROTECTOR_AUTO
-	bool "Automatic"
-	help
-	  If the compiler supports it, the best available stack-protector
-	  option will be chosen.
-
-endchoice
-
 config HAVE_ARCH_WITHIN_STACK_FRAMES
 	bool
 	help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cb6e3a219294..50a1b8ec9ad9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -129,7 +129,7 @@ config X86
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
 	select HAVE_ARCH_VMAP_STACK		if X86_64
 	select HAVE_ARCH_WITHIN_STACK_FRAMES
-	select HAVE_CC_STACKPROTECTOR
+	select HAVE_CC_STACKPROTECTOR		if CC_HAS_SANE_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_CONTEXT_TRACKING		if X86_64
@@ -341,6 +341,15 @@ config PGTABLE_LEVELS
 	default 2
 
 source "init/Kconfig"
+
+config CC_HAS_SANE_STACKPROTECTOR
+	bool
+	default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC)) if 64BIT
+	default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC))
+	help
+	   We have to make sure stack protector is unconditionally disabled if
+	   the compiler produces broken code.
+
 source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
-- 
cgit v1.2.3


From f3a53f7b5740b0cd411262ee56a0fb7248199d3a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 17 May 2018 15:17:10 +0900
Subject: arm64: move GCC version check for ARCH_SUPPORTS_INT128 to Kconfig

This becomes much neater in Kconfig.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 arch/arm64/Kconfig  | 1 +
 arch/arm64/Makefile | 6 ------
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b25ed7834f6c..5a0cd6b6babb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -43,6 +43,7 @@ config ARM64
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_SUPPORTS_MEMORY_FAILURE
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 3c353b4715dc..45272266dafb 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -56,12 +56,6 @@ KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst)
 KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
 KBUILD_AFLAGS	+= $(call cc-option,-mabi=lp64)
 
-ifeq ($(cc-name),clang)
-KBUILD_CFLAGS	+= -DCONFIG_ARCH_SUPPORTS_INT128
-else
-KBUILD_CFLAGS	+= $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128)
-endif
-
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
-- 
cgit v1.2.3


From 86406d51d3600bfa2b6f86e1e6bfce712bec0d53 Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Sat, 9 Jun 2018 02:33:51 +0200
Subject: riscv: split the declaration of __copy_user

We use a single __copy_user assembly function to copy memory both from
and to userspace. While this works, it triggers sparse errors because
we're implicitly casting between the kernel and user address spaces by
calling __copy_user.

This patch splits the C declaration into a pair of functions,
__asm_copy_{to,from}_user, that have sane semantics WRT __user. This
split make things fine from sparse's point of view. The assembly
implementation keeps a single definition but add a double ENTRY() for it,
one for __asm_copy_to_user and another one for __asm_copy_from_user.
The result is a spare-safe implementation that pays no performance
or code size penalty.

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/uaccess.h | 8 +++++---
 arch/riscv/kernel/riscv_ksyms.c  | 3 ++-
 arch/riscv/lib/uaccess.S         | 6 ++++--
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 14b0b22fb578..473cfc84e412 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -392,19 +392,21 @@ do {								\
 })
 
 
-extern unsigned long __must_check __copy_user(void __user *to,
+extern unsigned long __must_check __asm_copy_to_user(void __user *to,
+	const void *from, unsigned long n);
+extern unsigned long __must_check __asm_copy_from_user(void *to,
 	const void __user *from, unsigned long n);
 
 static inline unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	return __copy_user(to, from, n);
+	return __asm_copy_to_user(to, from, n);
 }
 
 static inline unsigned long
 raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	return __copy_user(to, from, n);
+	return __asm_copy_from_user(to, from, n);
 }
 
 extern long strncpy_from_user(char *dest, const char __user *src, long count);
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index 551734248748..f247d6d2137c 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -13,6 +13,7 @@
  * Assembly functions that may be used (directly or indirectly) by modules
  */
 EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(__asm_copy_to_user);
+EXPORT_SYMBOL(__asm_copy_from_user);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 58fb2877c865..f8e6440cad6e 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -13,7 +13,8 @@ _epc:
 	.previous
 	.endm
 
-ENTRY(__copy_user)
+ENTRY(__asm_copy_to_user)
+ENTRY(__asm_copy_from_user)
 
 	/* Enable access to user memory */
 	li t6, SR_SUM
@@ -63,7 +64,8 @@ ENTRY(__copy_user)
 	addi a0, a0, 1
 	bltu a1, a3, 5b
 	j 3b
-ENDPROC(__copy_user)
+ENDPROC(__asm_copy_to_user)
+ENDPROC(__asm_copy_from_user)
 
 
 ENTRY(__clear_user)
-- 
cgit v1.2.3


From cca76c1ad61d08097af5a691195f9a42d72e978f Mon Sep 17 00:00:00 2001
From: Alexander Pateenok <pateenoc@gmail.com>
Date: Mon, 23 Apr 2018 21:20:17 +0300
Subject: um: remove uml initcalls

__uml_initcall() is not used and .uml.initcall.init section is empty:

$ grep -r '__uml_initcall('
arch/um/include/shared/init.h:#define __uml_initcall(fn)	\
$ readelf -s ../umobj/linux | grep __uml_initcall
 23214: 00000000603b75d8     0 NOTYPE  GLOBAL DEFAULT   32 __uml_initcall_start
 25337: 00000000603b75d8     0 NOTYPE  GLOBAL DEFAULT   32 __uml_initcall_end

So it is unnecessary.

Signed-off-by: Alexander Pateenok <pateenoc@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/asm/common.lds.S |  6 ------
 arch/um/include/shared/init.h    |  5 -----
 arch/um/os-Linux/main.c          | 12 ------------
 3 files changed, 23 deletions(-)

(limited to 'arch')

diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index b30d73ca29d0..7adb4e6b658a 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -53,12 +53,6 @@
 	CON_INITCALL
   }
 
-  .uml.initcall.init : {
-	__uml_initcall_start = .;
-	*(.uml.initcall.init)
-	__uml_initcall_end = .;
-  }
-
   SECURITY_INIT
 
   .exitcall : {
diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h
index b3f5865a92c9..c66de434a983 100644
--- a/arch/um/include/shared/init.h
+++ b/arch/um/include/shared/init.h
@@ -64,14 +64,10 @@ struct uml_param {
         int (*setup_func)(char *, int *);
 };
 
-extern initcall_t __uml_initcall_start, __uml_initcall_end;
 extern initcall_t __uml_postsetup_start, __uml_postsetup_end;
 extern const char *__uml_help_start, *__uml_help_end;
 #endif
 
-#define __uml_initcall(fn)					  	\
-	static initcall_t __uml_initcall_##fn __uml_init_call = fn
-
 #define __uml_exitcall(fn)						\
 	static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn
 
@@ -108,7 +104,6 @@ extern struct uml_param __uml_setup_start, __uml_setup_end;
  */
 #define __uml_init_setup	__used __section(.uml.setup.init)
 #define __uml_setup_help	__used __section(.uml.help.init)
-#define __uml_init_call		__used __section(.uml.initcall.init)
 #define __uml_postsetup_call	__used __section(.uml.postsetup.init)
 #define __uml_exit_call		__used __section(.uml.exitcall.exit)
 
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 5f970ece5ac3..f1fee2b91239 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -40,17 +40,6 @@ static void set_stklim(void)
 	}
 }
 
-static __init void do_uml_initcalls(void)
-{
-	initcall_t *call;
-
-	call = &__uml_initcall_start;
-	while (call < &__uml_initcall_end) {
-		(*call)();
-		call++;
-	}
-}
-
 static void last_ditch_exit(int sig)
 {
 	uml_cleanup();
@@ -151,7 +140,6 @@ int __init main(int argc, char **argv, char **envp)
 	scan_elf_aux(envp);
 #endif
 
-	do_uml_initcalls();
 	change_sig(SIGPIPE, 0);
 	ret = linux_main(argc, argv);
 
-- 
cgit v1.2.3


From 4579a1ba692af81da7ea6ce197f8169ddc0c327f Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Tue, 5 Jun 2018 09:27:30 +0100
Subject: um: Fix initialization of vector queues

UML vector drivers could derefence uninitialized memory
when cleaning up after a queue allocation failure.

Fixes: 49da7e64f33e ("High Performance UML Vector Network Driver")
Cc: <stable@vger.kernel.org>
Reported-by: Dan Capenter <dan.carpenter@oracle.com>
Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/vector_kern.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 02168fe25105..8b852928959b 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -504,15 +504,19 @@ static struct vector_queue *create_queue(
 
 	result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
 	if (result == NULL)
-		goto out_fail;
+		return NULL;
 	result->max_depth = max_size;
 	result->dev = vp->dev;
 	result->mmsg_vector = kmalloc(
 		(sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
+	if (result->mmsg_vector == NULL)
+		goto out_mmsg_fail;
 	result->skbuff_vector = kmalloc(
 		(sizeof(void *) * max_size), GFP_KERNEL);
-	if (result->mmsg_vector == NULL || result->skbuff_vector == NULL)
-		goto out_fail;
+	if (result->skbuff_vector == NULL)
+		goto out_skb_fail;
+
+	/* further failures can be handled safely by destroy_queue*/
 
 	mmsg_vector = result->mmsg_vector;
 	for (i = 0; i < max_size; i++) {
@@ -563,6 +567,11 @@ static struct vector_queue *create_queue(
 	result->head = 0;
 	result->tail = 0;
 	return result;
+out_skb_fail:
+	kfree(result->mmsg_vector);
+out_mmsg_fail:
+	kfree(result);
+	return NULL;
 out_fail:
 	destroy_queue(result);
 	return NULL;
-- 
cgit v1.2.3


From 5ec9121195a4f1cecd0fa592636c5f81eb03dc8c Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Thu, 7 Jun 2018 12:43:15 +0100
Subject: um: Fix raw interface options

Raw interface initialization needs QDISC_BYPASS. Otherwise
it sees its own packets when transmitting.

Fixes: 49da7e64f33e ("High Performance UML Vector Network Driver")
Cc: <stable@vger.kernel.org>
Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/vector_kern.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 8b852928959b..cf9bf9b43ec3 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -188,7 +188,7 @@ static int get_transport_options(struct arglist *def)
 	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
 		return (vec_rx | VECTOR_BPF);
 	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
-		return (vec_rx | vec_tx);
+		return (vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
 	return (vec_rx | vec_tx);
 }
 
@@ -1241,9 +1241,8 @@ static int vector_net_open(struct net_device *dev)
 
 	if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
 		if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
-			vp->options = vp->options | VECTOR_BPF;
+			vp->options |= VECTOR_BPF;
 	}
-
 	if ((vp->options & VECTOR_BPF) != 0)
 		vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
 
-- 
cgit v1.2.3


From 8034c2fb1225979b1cc9b9a12fa8094ca10b4fc3 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 28 May 2018 18:22:05 +0900
Subject: gcc-plugins: move GCC version check for PowerPC to Kconfig

For PowerPC, GCC 5.2 is the requirement for GCC plugins.  Move the
version check to Kconfig so that the GCC plugin menus will be hidden
if an older compiler is in use.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 076fe3094856..737d333d4df2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -189,7 +189,7 @@ config PPC
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
-	select HAVE_GCC_PLUGINS
+	select HAVE_GCC_PLUGINS			if GCC_VERSION >= 50200   # plugin support on gcc <= 5.1 is buggy on PPC
 	select HAVE_GENERIC_GUP
 	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
 	select HAVE_IDE
-- 
cgit v1.2.3


From 59f53855babf757ac7be19995670ab884aaf9b71 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 28 May 2018 18:22:06 +0900
Subject: gcc-plugins: test plugin support in Kconfig and clean up Makefile

Run scripts/gcc-plugin.sh from Kconfig so that users can enable
GCC_PLUGINS only when the compiler supports building plugins.

Kconfig defines a new symbol, PLUGIN_HOSTCC.  This will contain
the compiler (g++ or gcc) used for building plugins, or empty
if the plugin can not be supported at all.

This allows us to remove all ugly testing in Makefile.gcc-plugins.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Kees Cook <keescook@chromium.org>
---
 arch/Kconfig | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index 2c7c3cf8adfc..e4a47d640a87 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -398,6 +398,15 @@ config SECCOMP_FILTER
 
 	  See Documentation/prctl/seccomp_filter.txt for details.
 
+preferred-plugin-hostcc := $(if-success,[ $(gcc-version) -ge 40800 ],$(HOSTCXX),$(HOSTCC))
+
+config PLUGIN_HOSTCC
+	string
+	default "$(shell,$(srctree)/scripts/gcc-plugin.sh "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")"
+	help
+	  Host compiler used to build GCC plugins.  This can be $(HOSTCXX),
+	  $(HOSTCC), or a null string if GCC plugin is unsupported.
+
 config HAVE_GCC_PLUGINS
 	bool
 	help
@@ -407,6 +416,7 @@ config HAVE_GCC_PLUGINS
 menuconfig GCC_PLUGINS
 	bool "GCC plugins"
 	depends on HAVE_GCC_PLUGINS
+	depends on PLUGIN_HOSTCC != ""
 	depends on !COMPILE_TEST
 	help
 	  GCC plugins are loadable modules that provide extra features to the
-- 
cgit v1.2.3


From 1658dcee3d43edde9c971b6e88fee64ab9fc67e0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 28 May 2018 18:22:07 +0900
Subject: gcc-plugins: allow to enable GCC_PLUGINS for COMPILE_TEST

Now that the compiler's plugin support is checked in Kconfig,
all{yes,mod}config will not be bothered.

Remove 'depends on !COMPILE_TEST' for GCC_PLUGINS.

'depends on !COMPILE_TEST' for the following three are still kept:
  GCC_PLUGIN_CYC_COMPLEXITY
  GCC_PLUGIN_STRUCTLEAK_VERBOSE
  GCC_PLUGIN_RANDSTRUCT_PERFORMANCE

Kees suggested to do so because the first two are too noisy, and the
last one would reduce the compile test coverage.  I commented the
reasons in arch/Kconfig.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Kees Cook <keescook@chromium.org>
---
 arch/Kconfig | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index e4a47d640a87..c2695293ca5b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -417,7 +417,6 @@ menuconfig GCC_PLUGINS
 	bool "GCC plugins"
 	depends on HAVE_GCC_PLUGINS
 	depends on PLUGIN_HOSTCC != ""
-	depends on !COMPILE_TEST
 	help
 	  GCC plugins are loadable modules that provide extra features to the
 	  compiler. They are useful for runtime instrumentation and static analysis.
@@ -427,7 +426,7 @@ menuconfig GCC_PLUGINS
 config GCC_PLUGIN_CYC_COMPLEXITY
 	bool "Compute the cyclomatic complexity of a function" if EXPERT
 	depends on GCC_PLUGINS
-	depends on !COMPILE_TEST
+	depends on !COMPILE_TEST	# too noisy
 	help
 	  The complexity M of a function's control flow graph is defined as:
 	   M = E - N + 2P
@@ -494,7 +493,7 @@ config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL
 config GCC_PLUGIN_STRUCTLEAK_VERBOSE
 	bool "Report forcefully initialized variables"
 	depends on GCC_PLUGIN_STRUCTLEAK
-	depends on !COMPILE_TEST
+	depends on !COMPILE_TEST	# too noisy
 	help
 	  This option will cause a warning to be printed each time the
 	  structleak plugin finds a variable it thinks needs to be
@@ -534,7 +533,7 @@ config GCC_PLUGIN_RANDSTRUCT
 config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
 	bool "Use cacheline-aware structure randomization"
 	depends on GCC_PLUGIN_RANDSTRUCT
-	depends on !COMPILE_TEST
+	depends on !COMPILE_TEST	# do not reduce test coverage
 	help
 	  If you say Y here, the RANDSTRUCT randomization will make a
 	  best effort at restricting randomization to cacheline-sized
-- 
cgit v1.2.3


From caa91ba53eb1c92d00abdf5b6c67a310d2b4c2d9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 1 Jun 2018 13:32:00 +0900
Subject: gcc-plugins: disable GCC_PLUGIN_STRUCTLEAK_BYREF_ALL for COMPILE_TEST
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have enabled GCC_PLUGINS for COMPILE_TEST, but allmodconfig now
produces new warnings.

  CC [M]  drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.o
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function ‘wlc_phy_workarounds_nphy_rev7’:
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c:16563:1: warning: the frame size of 3128 bytes is larger than 2048 bytes [-Wframe-larger-than=]
 }
 ^
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function ‘wlc_phy_workarounds_nphy_rev3’:
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c:16905:1: warning: the frame size of 2800 bytes is larger than 2048 bytes [-Wframe-larger-than=]
 }
 ^
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function ‘wlc_phy_cal_txiqlo_nphy’:
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c:26033:1: warning: the frame size of 2488 bytes is larger than 2048 bytes [-Wframe-larger-than=]
 }
 ^

It looks like GCC_PLUGIN_STRUCTLEAK_BYREF_ALL is causing this.
Add "depends on !COMPILE_TEST" to not dirturb the compile test.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index c2695293ca5b..f62542e6dc55 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -486,6 +486,7 @@ config GCC_PLUGIN_STRUCTLEAK
 config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL
 	bool "Force initialize all struct type variables passed by reference"
 	depends on GCC_PLUGIN_STRUCTLEAK
+	depends on !COMPILE_TEST
 	help
 	  Zero initialize any struct type local variable that may be passed by
 	  reference without having been initialized.
-- 
cgit v1.2.3


From abba759796f9b73eb24df9b734dd063839fc62e0 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 30 May 2018 22:19:22 +1000
Subject: powerpc/kbuild: move -mprofile-kernel check to Kconfig

This eliminates the workaround that requires disabling
-mprofile-kernel by default in Kconfig.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/powerpc/Kconfig                            | 16 +---------------
 arch/powerpc/Makefile                           | 13 +------------
 arch/powerpc/include/asm/module.h               |  2 +-
 arch/powerpc/kernel/module_64.c                 |  4 ++--
 arch/powerpc/kernel/trace/ftrace.c              |  6 +++---
 arch/powerpc/tools/gcc-check-mprofile-kernel.sh |  1 -
 6 files changed, 8 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 737d333d4df2..7b96e4c48cbd 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -459,23 +459,9 @@ config LD_HEAD_STUB_CATCH
 
 	  If unsure, say "N".
 
-config DISABLE_MPROFILE_KERNEL
-	bool "Disable use of mprofile-kernel for kernel tracing"
-	depends on PPC64 && CPU_LITTLE_ENDIAN
-	default y
-	help
-	  Selecting this options disables use of the mprofile-kernel ABI for
-	  kernel tracing. That will cause options such as live patching
-	  (CONFIG_LIVEPATCH) which depend on CONFIG_DYNAMIC_FTRACE_WITH_REGS to
-	  be disabled also.
-
-	  If you have a toolchain which supports mprofile-kernel, then you can
-	  disable this. Otherwise leave it enabled. If you're not sure, say
-	  "Y".
-
 config MPROFILE_KERNEL
 	depends on PPC64 && CPU_LITTLE_ENDIAN
-	def_bool !DISABLE_MPROFILE_KERNEL
+	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__)
 
 config HOTPLUG_CPU
 	bool "Support for enabling/disabling CPUs"
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 9b52e42e581b..bd06a3ccda31 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -161,18 +161,7 @@ CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
 endif
 
 ifdef CONFIG_MPROFILE_KERNEL
-    ifeq ($(shell $(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__),OK)
-        CC_FLAGS_FTRACE := -pg -mprofile-kernel
-        KBUILD_CPPFLAGS += -DCC_USING_MPROFILE_KERNEL
-    else
-        # If the user asked for mprofile-kernel but the toolchain doesn't
-        # support it, emit a warning and deliberately break the build later
-        # with mprofile-kernel-not-supported. We would prefer to make this an
-        # error right here, but then the user would never be able to run
-        # oldconfig to change their configuration.
-        $(warning Compiler does not support mprofile-kernel, set CONFIG_DISABLE_MPROFILE_KERNEL)
-        CC_FLAGS_FTRACE := -mprofile-kernel-not-supported
-    endif
+	CC_FLAGS_FTRACE := -pg -mprofile-kernel
 endif
 
 CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index d8374f984f39..d61b0818e267 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -14,7 +14,7 @@
 #include <asm-generic/module.h>
 
 
-#ifdef CC_USING_MPROFILE_KERNEL
+#ifdef CONFIG_MPROFILE_KERNEL
 #define MODULE_ARCH_VERMAGIC_FTRACE	"mprofile-kernel "
 #else
 #define MODULE_ARCH_VERMAGIC_FTRACE	""
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 1b7419579820..b8d61e019d06 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -466,7 +466,7 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
 	return (unsigned long)&stubs[i];
 }
 
-#ifdef CC_USING_MPROFILE_KERNEL
+#ifdef CONFIG_MPROFILE_KERNEL
 static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction)
 {
 	if (strcmp("_mcount", name))
@@ -753,7 +753,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
-#ifdef CC_USING_MPROFILE_KERNEL
+#ifdef CONFIG_MPROFILE_KERNEL
 
 #define PACATOC offsetof(struct paca_struct, kernel_toc)
 
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index c076a32093fd..4bfbb54dee51 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -144,7 +144,7 @@ __ftrace_make_nop(struct module *mod,
 		return -EINVAL;
 	}
 
-#ifdef CC_USING_MPROFILE_KERNEL
+#ifdef CONFIG_MPROFILE_KERNEL
 	/* When using -mkernel_profile there is no load to jump over */
 	pop = PPC_INST_NOP;
 
@@ -188,7 +188,7 @@ __ftrace_make_nop(struct module *mod,
 		pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
 		return -EINVAL;
 	}
-#endif /* CC_USING_MPROFILE_KERNEL */
+#endif /* CONFIG_MPROFILE_KERNEL */
 
 	if (patch_instruction((unsigned int *)ip, pop)) {
 		pr_err("Patching NOP failed.\n");
@@ -324,7 +324,7 @@ int ftrace_make_nop(struct module *mod,
  * They should effectively be a NOP, and follow formal constraints,
  * depending on the ABI. Return false if they don't.
  */
-#ifndef CC_USING_MPROFILE_KERNEL
+#ifndef CONFIG_MPROFILE_KERNEL
 static int
 expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
 {
diff --git a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
index a7dd0e5d9f98..137f3376ac2b 100755
--- a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
+++ b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
@@ -24,5 +24,4 @@ echo -e "#include <linux/compiler.h>\nnotrace int func() { return 0; }" | \
     2> /dev/null | grep -q "_mcount" && \
     exit 1
 
-echo "OK"
 exit 0
-- 
cgit v1.2.3


From 2c4da1a78af5c50f3a866656fe223eb50b2a5ff0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 9 May 2018 16:53:22 +0900
Subject: sh: remove no-op macro VMLINUX_SYMBOL()

VMLINUX_SYMBOL() is no-op unless CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
is defined.  It has ever been selected only by BLACKFIN and METAG.
VMLINUX_SYMBOL() is unneeded for SuperH-specific code.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/sh/include/asm/vmlinux.lds.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sh/include/asm/vmlinux.lds.h b/arch/sh/include/asm/vmlinux.lds.h
index f312813f39d8..992955685874 100644
--- a/arch/sh/include/asm/vmlinux.lds.h
+++ b/arch/sh/include/asm/vmlinux.lds.h
@@ -7,9 +7,9 @@
 #ifdef CONFIG_DWARF_UNWINDER
 #define DWARF_EH_FRAME							\
 	.eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {			\
-		  VMLINUX_SYMBOL(__start_eh_frame) = .;			\
+		  __start_eh_frame = .;					\
 		  *(.eh_frame)						\
-		  VMLINUX_SYMBOL(__stop_eh_frame) = .;			\
+		  __stop_eh_frame = .;					\
 	}
 #else
 #define DWARF_EH_FRAME
-- 
cgit v1.2.3


From 889d746edd02a4498d80df3a12017d484cc78e5c Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Thu, 31 May 2018 17:42:01 +0200
Subject: riscv: add riscv-specific predefines to CHECKFLAGS

RISC-V uses the macro __riscv_xlen, predefined by GCC, to
make the distinction between 32 or 64 bit code.

However, sparse doesn't know anything about this macro
which lead to wrong warnings and failures.

Fix this by adding a define of __riscv_xlen to CHECKFLAGS
and add one for __riscv too.

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 76e958a5414a..6d4a5f6c3f4f 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -71,6 +71,9 @@ KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
 # architectures.  It's faster to have GCC emit only aligned accesses.
 KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
 
+# arch specific predefines for sparse
+CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)
+
 head-y := arch/riscv/kernel/head.o
 
 core-y += arch/riscv/kernel/ arch/riscv/mm/
-- 
cgit v1.2.3


From 1dd985229d5fc19359250bc0e4235aff217672b2 Mon Sep 17 00:00:00 2001
From: Alan Kao <alankao@andestech.com>
Date: Tue, 8 May 2018 11:21:57 +0800
Subject: riscv/ftrace: Export _mcount when DYNAMIC_FTRACE isn't set

The EXPORT_SYMBOL(_mcount) for RISC-V ended up inside a
CONFIG_DYNAMIC_FTRACE ifdef.  If you enable modules without enabling
CONFIG_DYNAMIC_FTRACE then you'll get a build error without this patch
because the modules won't be able to find _mcount.

The new behavior is to export _mcount whenever CONFIG_FUNCTION_TRACER is
defined.  This matches what every other architecture is doing.

Signed-off-by: Alan Kao <alankao@andestech.com>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Zong Li <zong@andestech.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/mcount.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index ce9bdc57a2a1..5721624886a1 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -126,5 +126,5 @@ do_trace:
 	RESTORE_ABI_STATE
 	ret
 ENDPROC(_mcount)
-EXPORT_SYMBOL(_mcount)
 #endif
+EXPORT_SYMBOL(_mcount)
-- 
cgit v1.2.3


From 77aa85de16aeefd75d639737c7bfcf0d2604e471 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Thu, 7 Jun 2018 12:27:27 +0200
Subject: RISC-V: Handle R_RISCV_32 in modules

With CONFIG_MODVERSIONS=y the R_RISCV_32 relocation is used by the
__kcrctab section.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/module.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 5dddba301d0a..1d5e9b934b8c 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -17,6 +17,17 @@
 #include <linux/errno.h>
 #include <linux/moduleloader.h>
 
+static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
+{
+	if (v != (u32)v) {
+		pr_err("%s: value %016llx out of range for 32-bit field\n",
+		       me->name, v);
+		return -EINVAL;
+	}
+	*location = v;
+	return 0;
+}
+
 static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v)
 {
 	*(u64 *)location = v;
@@ -265,6 +276,7 @@ static int apply_r_riscv_sub32_rela(struct module *me, u32 *location,
 
 static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
 				Elf_Addr v) = {
+	[R_RISCV_32]			= apply_r_riscv_32_rela,
 	[R_RISCV_64]			= apply_r_riscv_64_rela,
 	[R_RISCV_BRANCH]		= apply_r_riscv_branch_rela,
 	[R_RISCV_JAL]			= apply_r_riscv_jal_rela,
-- 
cgit v1.2.3


From 24a130ccfe58e0ef7907ce63030ad0ff7d7c633b Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Thu, 8 Mar 2018 13:57:58 -0800
Subject: RISC-V: Add CONFIG_HVC_RISCV_SBI=y to defconfig

The SBI exists on all RISC-V systems, so there's no reason not to
compile this driver in.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index bca0eee733b0..07326466871b 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -44,6 +44,7 @@ CONFIG_INPUT_MOUSEDEV=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_HVC_RISCV_SBI=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_DRM=y
 CONFIG_DRM_RADEON=y
-- 
cgit v1.2.3


From 727ba748e110b4de50d142edca9d6a9b7e6111d8 Mon Sep 17 00:00:00 2001
From: Felix Wilhelm <fwilhelm@google.com>
Date: Mon, 11 Jun 2018 09:43:44 +0200
Subject: kvm: nVMX: Enforce cpl=0 for VMX instructions

VMX instructions executed inside a L1 VM will always trigger a VM exit
even when executed with cpl 3. This means we must perform the
privilege check in software.

Fixes: 70f3aac964ae("kvm: nVMX: Remove superfluous VMX instruction fault checks")
Cc: stable@vger.kernel.org
Signed-off-by: Felix Wilhelm <fwilhelm@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 709de996f063..4bf1f9de9332 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7905,6 +7905,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
+	/* CPL=0 must be checked manually. */
+	if (vmx_get_cpl(vcpu)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
 	if (vmx->nested.vmxon) {
 		nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
 		return kvm_skip_emulated_instruction(vcpu);
@@ -7964,6 +7970,11 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
  */
 static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
 {
+	if (vmx_get_cpl(vcpu)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
 	if (!to_vmx(vcpu)->nested.vmxon) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 0;
@@ -8283,7 +8294,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 		if (get_vmx_mem_address(vcpu, exit_qualification,
 				vmx_instruction_info, true, &gva))
 			return 1;
-		/* _system ok, as hardware has verified cpl=0 */
+		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
 		kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
 			     &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
 	}
@@ -8448,7 +8459,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	if (get_vmx_mem_address(vcpu, exit_qualification,
 			vmx_instruction_info, true, &vmcs_gva))
 		return 1;
-	/* ok to use *_system, as hardware has verified cpl=0 */
+	/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
 	if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
 				 (void *)&to_vmx(vcpu)->nested.current_vmptr,
 				 sizeof(u64), &e)) {
-- 
cgit v1.2.3


From 79367a65743975e5cac8d24d08eccc7fdae832b0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Jun 2018 16:43:02 +0200
Subject: KVM: x86: introduce linear_{read,write}_system

Wrap the common invocation of ctxt->ops->read_std and ctxt->ops->write_std, so
as to have a smaller patch when the functions grow another argument.

Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12)
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 64 +++++++++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 143b7ae52624..fcf54642b293 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -812,6 +812,19 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
 	return assign_eip_near(ctxt, ctxt->_eip + rel);
 }
 
+static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
+			      void *data, unsigned size)
+{
+	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+}
+
+static int linear_write_system(struct x86_emulate_ctxt *ctxt,
+			       ulong linear, void *data,
+			       unsigned int size)
+{
+	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+}
+
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 			      struct segmented_address addr,
 			      void *data,
@@ -1496,8 +1509,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
 		return emulate_gp(ctxt, index << 3 | 0x2);
 
 	addr = dt.address + index * 8;
-	return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
-				   &ctxt->exception);
+	return linear_read_system(ctxt, addr, desc, sizeof *desc);
 }
 
 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
@@ -1560,8 +1572,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
-				   &ctxt->exception);
+	return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
 }
 
 /* allowed just for 8 bytes segments */
@@ -1575,8 +1586,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
-				    &ctxt->exception);
+	return linear_write_system(ctxt, addr, desc, sizeof *desc);
 }
 
 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -1737,8 +1747,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				return ret;
 		}
 	} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
-		ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
-				sizeof(base3), &ctxt->exception);
+		ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
 		if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
@@ -2051,11 +2060,11 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 	eip_addr = dt.address + (irq << 2);
 	cs_addr = dt.address + (irq << 2) + 2;
 
-	rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
+	rc = linear_read_system(ctxt, cs_addr, &cs, 2);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
+	rc = linear_read_system(ctxt, eip_addr, &eip, 2);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -3053,35 +3062,30 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 			  u16 tss_selector, u16 old_tss_sel,
 			  ulong old_tss_base, struct desc_struct *new_desc)
 {
-	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct tss_segment_16 tss_seg;
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
 
-	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
-			    &ctxt->exception);
+	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	save_state_to_tss16(ctxt, &tss_seg);
 
-	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
-			     &ctxt->exception);
+	ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
-	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
-			    &ctxt->exception);
+	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	if (old_tss_sel != 0xffff) {
 		tss_seg.prev_task_link = old_tss_sel;
 
-		ret = ops->write_std(ctxt, new_tss_base,
-				     &tss_seg.prev_task_link,
-				     sizeof tss_seg.prev_task_link,
-				     &ctxt->exception);
+		ret = linear_write_system(ctxt, new_tss_base,
+					  &tss_seg.prev_task_link,
+					  sizeof tss_seg.prev_task_link);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
 	}
@@ -3197,38 +3201,34 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 			  u16 tss_selector, u16 old_tss_sel,
 			  ulong old_tss_base, struct desc_struct *new_desc)
 {
-	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct tss_segment_32 tss_seg;
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
 	u32 eip_offset = offsetof(struct tss_segment_32, eip);
 	u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
 
-	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
-			    &ctxt->exception);
+	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	save_state_to_tss32(ctxt, &tss_seg);
 
 	/* Only GP registers and segment selectors are saved */
-	ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
-			     ldt_sel_offset - eip_offset, &ctxt->exception);
+	ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
+				  ldt_sel_offset - eip_offset);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
-	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
-			    &ctxt->exception);
+	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	if (old_tss_sel != 0xffff) {
 		tss_seg.prev_task_link = old_tss_sel;
 
-		ret = ops->write_std(ctxt, new_tss_base,
-				     &tss_seg.prev_task_link,
-				     sizeof tss_seg.prev_task_link,
-				     &ctxt->exception);
+		ret = linear_write_system(ctxt, new_tss_base,
+					  &tss_seg.prev_task_link,
+					  sizeof tss_seg.prev_task_link);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
 	}
-- 
cgit v1.2.3


From ce14e868a54edeb2e30cb7a7b104a2fc4b9d76ca Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Jun 2018 17:37:49 +0200
Subject: KVM: x86: pass kvm_vcpu to kvm_read_guest_virt and
 kvm_write_guest_virt_system

Int the next patch the emulator's .read_std and .write_std callbacks will
grow another argument, which is not needed in kvm_read_guest_virt and
kvm_write_guest_virt_system's callers.  Since we have to make separate
functions, let's give the currently existing names a nicer interface, too.

Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12)
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 23 ++++++++++-------------
 arch/x86/kvm/x86.c | 39 ++++++++++++++++++++++++++-------------
 arch/x86/kvm/x86.h |  4 ++--
 3 files changed, 38 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4bf1f9de9332..48989f78be60 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7823,8 +7823,7 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 			vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
 		return 1;
 
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
-				sizeof(*vmpointer), &e)) {
+	if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
@@ -8295,8 +8294,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 				vmx_instruction_info, true, &gva))
 			return 1;
 		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
-		kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
-			     &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
+		kvm_write_guest_virt_system(vcpu, gva, &field_value,
+					    (is_long_mode(vcpu) ? 8 : 4), NULL);
 	}
 
 	nested_vmx_succeed(vcpu);
@@ -8334,8 +8333,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		if (get_vmx_mem_address(vcpu, exit_qualification,
 				vmx_instruction_info, false, &gva))
 			return 1;
-		if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
-			   &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
+		if (kvm_read_guest_virt(vcpu, gva, &field_value,
+					(is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
 			kvm_inject_page_fault(vcpu, &e);
 			return 1;
 		}
@@ -8460,9 +8459,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 			vmx_instruction_info, true, &vmcs_gva))
 		return 1;
 	/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
-	if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
-				 (void *)&to_vmx(vcpu)->nested.current_vmptr,
-				 sizeof(u64), &e)) {
+	if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
+					(void *)&to_vmx(vcpu)->nested.current_vmptr,
+					sizeof(u64), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
@@ -8509,8 +8508,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
 			vmx_instruction_info, false, &gva))
 		return 1;
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
-				sizeof(operand), &e)) {
+	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
@@ -8574,8 +8572,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
 			vmx_instruction_info, false, &gva))
 		return 1;
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
-				sizeof(operand), &e)) {
+	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 93dd25d005a1..2bbe9858e187 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4798,11 +4798,10 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
 	return X86EMUL_CONTINUE;
 }
 
-int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
 			       gva_t addr, void *val, unsigned int bytes,
 			       struct x86_exception *exception)
 {
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
 
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
@@ -4810,9 +4809,9 @@ int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
 
-static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
-				      gva_t addr, void *val, unsigned int bytes,
-				      struct x86_exception *exception)
+static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
+			     gva_t addr, void *val, unsigned int bytes,
+			     struct x86_exception *exception)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
@@ -4827,18 +4826,16 @@ static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
 	return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
 }
 
-int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
-				       gva_t addr, void *val,
-				       unsigned int bytes,
-				       struct x86_exception *exception)
+static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
+				      struct kvm_vcpu *vcpu, u32 access,
+				      struct x86_exception *exception)
 {
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
 	while (bytes) {
 		gpa_t gpa =  vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
-							     PFERR_WRITE_MASK,
+							     access,
 							     exception);
 		unsigned offset = addr & (PAGE_SIZE-1);
 		unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
@@ -4859,6 +4856,22 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 out:
 	return r;
 }
+
+static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
+			      unsigned int bytes, struct x86_exception *exception)
+{
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+					   PFERR_WRITE_MASK, exception);
+}
+
+int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
+				unsigned int bytes, struct x86_exception *exception)
+{
+	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+					   PFERR_WRITE_MASK, exception);
+}
 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 
 int handle_ud(struct kvm_vcpu *vcpu)
@@ -5611,8 +5624,8 @@ static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
 static const struct x86_emulate_ops emulate_ops = {
 	.read_gpr            = emulator_read_gpr,
 	.write_gpr           = emulator_write_gpr,
-	.read_std            = kvm_read_guest_virt_system,
-	.write_std           = kvm_write_guest_virt_system,
+	.read_std            = emulator_read_std,
+	.write_std           = emulator_write_std,
 	.read_phys           = kvm_read_guest_phys_system,
 	.fetch               = kvm_fetch_guest_virt,
 	.read_emulated       = emulator_read_emulated,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c9492f764902..331993c49dae 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -247,11 +247,11 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
 u64 get_kvmclock_ns(struct kvm *kvm);
 
-int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 
-int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
+int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 
-- 
cgit v1.2.3


From 3c9fa24ca7c9c47605672916491f79e8ccacb9e6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Jun 2018 17:38:09 +0200
Subject: kvm: x86: use correct privilege level for sgdt/sidt/fxsave/fxrstor
 access

The functions that were used in the emulation of fxrstor, fxsave, sgdt and
sidt were originally meant for task switching, and as such they did not
check privilege levels.  This is very bad when the same functions are used
in the emulation of unprivileged instructions.  This is CVE-2018-10853.

The obvious fix is to add a new argument to ops->read_std and ops->write_std,
which decides whether the access is a "system" access or should use the
processor's CPL.

Fixes: 129a72a0d3c8 ("KVM: x86: Introduce segmented_write_std", 2017-01-12)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  6 ++++--
 arch/x86/kvm/emulate.c             | 12 ++++++------
 arch/x86/kvm/x86.c                 | 22 ++++++++++++++++------
 3 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index b24b1c8b3979..0f82cd91cd3c 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -107,11 +107,12 @@ struct x86_emulate_ops {
 	 *  @addr:  [IN ] Linear address from which to read.
 	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to read from memory.
+	 *  @system:[IN ] Whether the access is forced to be at CPL0.
 	 */
 	int (*read_std)(struct x86_emulate_ctxt *ctxt,
 			unsigned long addr, void *val,
 			unsigned int bytes,
-			struct x86_exception *fault);
+			struct x86_exception *fault, bool system);
 
 	/*
 	 * read_phys: Read bytes of standard (non-emulated/special) memory.
@@ -129,10 +130,11 @@ struct x86_emulate_ops {
 	 *  @addr:  [IN ] Linear address to which to write.
 	 *  @val:   [OUT] Value write to memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to write to memory.
+	 *  @system:[IN ] Whether the access is forced to be at CPL0.
 	 */
 	int (*write_std)(struct x86_emulate_ctxt *ctxt,
 			 unsigned long addr, void *val, unsigned int bytes,
-			 struct x86_exception *fault);
+			 struct x86_exception *fault, bool system);
 	/*
 	 * fetch: Read bytes of standard (non-emulated/special) memory.
 	 *        Used for instruction fetch.
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fcf54642b293..4c4f4263420c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -815,14 +815,14 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
 			      void *data, unsigned size)
 {
-	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
 }
 
 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
 			       ulong linear, void *data,
 			       unsigned int size)
 {
-	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
 }
 
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
@@ -836,7 +836,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 	rc = linearize(ctxt, addr, size, false, &linear);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
 }
 
 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
@@ -850,7 +850,7 @@ static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
 	rc = linearize(ctxt, addr, size, true, &linear);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
 }
 
 /*
@@ -2928,12 +2928,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
 #ifdef CONFIG_X86_64
 	base |= ((u64)base3) << 32;
 #endif
-	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
+	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
 	if (r != X86EMUL_CONTINUE)
 		return false;
 	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
 		return false;
-	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
+	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
 	if (r != X86EMUL_CONTINUE)
 		return false;
 	if ((perm >> bit_idx) & mask)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2bbe9858e187..439fb0c7dbc0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4811,10 +4811,15 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
 
 static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
 			     gva_t addr, void *val, unsigned int bytes,
-			     struct x86_exception *exception)
+			     struct x86_exception *exception, bool system)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
+	u32 access = 0;
+
+	if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+		access |= PFERR_USER_MASK;
+
+	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
 }
 
 static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
@@ -4858,12 +4863,17 @@ out:
 }
 
 static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
-			      unsigned int bytes, struct x86_exception *exception)
+			      unsigned int bytes, struct x86_exception *exception,
+			      bool system)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	u32 access = PFERR_WRITE_MASK;
+
+	if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+		access |= PFERR_USER_MASK;
 
 	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
-					   PFERR_WRITE_MASK, exception);
+					   access, exception);
 }
 
 int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
@@ -4882,8 +4892,8 @@ int handle_ud(struct kvm_vcpu *vcpu)
 	struct x86_exception e;
 
 	if (force_emulation_prefix &&
-	    kvm_read_guest_virt(&vcpu->arch.emulate_ctxt,
-				kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 &&
+	    kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
+				sig, sizeof(sig), &e) == 0 &&
 	    memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
 		kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
 		emul_type = 0;
-- 
cgit v1.2.3


From 766d3571d8e50d3a73b77043dc632226f9e6b389 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 8 Jun 2018 02:19:53 +0300
Subject: kvm: fix typo in flag name

KVM_X86_DISABLE_EXITS_HTL really refers to exit on halt.
Obviously a typo: should be named KVM_X86_DISABLE_EXITS_HLT.

Fixes: caa057a2cad ("KVM: X86: Provide a capability to disable HLT intercepts")
Cc: stable@vger.kernel.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 439fb0c7dbc0..06dd4cdb2ca8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2899,7 +2899,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_CLOCK_TSC_STABLE;
 		break;
 	case KVM_CAP_X86_DISABLE_EXITS:
-		r |=  KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE;
+		r |=  KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
 		if(kvm_can_mwait_in_guest())
 			r |= KVM_X86_DISABLE_EXITS_MWAIT;
 		break;
@@ -4253,7 +4253,7 @@ split_irqchip_unlock:
 		if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
 			kvm_can_mwait_in_guest())
 			kvm->arch.mwait_in_guest = true;
-		if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
+		if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
 			kvm->arch.hlt_in_guest = true;
 		if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
 			kvm->arch.pause_in_guest = true;
-- 
cgit v1.2.3


From 6566f907bf3168a082b3fd8542a9938a44d8f0b6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 7 Jun 2018 07:57:20 -0700
Subject: Convert intel uncore to struct_size

Need to do a bit of rearranging to make this work.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/events/intel/uncore.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 15b07379e72d..e15cfad4f89b 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -865,8 +865,6 @@ static void uncore_types_exit(struct intel_uncore_type **types)
 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 {
 	struct intel_uncore_pmu *pmus;
-	struct attribute_group *attr_group;
-	struct attribute **attrs;
 	size_t size;
 	int i, j;
 
@@ -891,21 +889,24 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 				0, type->num_counters, 0, 0);
 
 	if (type->event_descs) {
+		struct {
+			struct attribute_group group;
+			struct attribute *attrs[];
+		} *attr_group;
 		for (i = 0; type->event_descs[i].attr.attr.name; i++);
 
-		attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
-					sizeof(*attr_group), GFP_KERNEL);
+		attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
+								GFP_KERNEL);
 		if (!attr_group)
 			goto err;
 
-		attrs = (struct attribute **)(attr_group + 1);
-		attr_group->name = "events";
-		attr_group->attrs = attrs;
+		attr_group->group.name = "events";
+		attr_group->group.attrs = attr_group->attrs;
 
 		for (j = 0; j < i; j++)
-			attrs[j] = &type->event_descs[j].attr.attr;
+			attr_group->attrs[j] = &type->event_descs[j].attr.attr;
 
-		type->events_group = attr_group;
+		type->events_group = &attr_group->group;
 	}
 
 	type->pmu_group = &uncore_pmu_attr_group;
-- 
cgit v1.2.3


From 6da2ec56059c3c7a7e5f729e6349e74ace1e5c57 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Jun 2018 13:55:00 -0700
Subject: treewide: kmalloc() -> kmalloc_array()

The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
patch replaces cases of:

        kmalloc(a * b, gfp)

with:
        kmalloc_array(a * b, gfp)

as well as handling cases of:

        kmalloc(a * b * c, gfp)

with:

        kmalloc(array3_size(a, b, c), gfp)

as it's slightly less ugly than:

        kmalloc_array(array_size(a, b), c, gfp)

This does, however, attempt to ignore constant size factors like:

        kmalloc(4 * 1024, gfp)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The tools/ directory was manually excluded, since it has its own
implementation of kmalloc().

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
  kmalloc(
-	(sizeof(TYPE)) * E
+	sizeof(TYPE) * E
  , ...)
|
  kmalloc(
-	(sizeof(THING)) * E
+	sizeof(THING) * E
  , ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
  kmalloc(
-	sizeof(u8) * (COUNT)
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(__u8) * (COUNT)
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(char) * (COUNT)
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(unsigned char) * (COUNT)
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(u8) * COUNT
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(__u8) * COUNT
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(char) * COUNT
+	COUNT
  , ...)
|
  kmalloc(
-	sizeof(unsigned char) * COUNT
+	COUNT
  , ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * (COUNT_ID)
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * COUNT_ID
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * (COUNT_CONST)
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * COUNT_CONST
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * (COUNT_ID)
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * COUNT_ID
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * (COUNT_CONST)
+	COUNT_CONST, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * COUNT_CONST
+	COUNT_CONST, sizeof(THING)
  , ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

- kmalloc
+ kmalloc_array
  (
-	SIZE * COUNT
+	COUNT, SIZE
  , ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
  kmalloc(
-	sizeof(TYPE) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kmalloc(
-	sizeof(TYPE) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kmalloc(
-	sizeof(TYPE) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kmalloc(
-	sizeof(TYPE) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kmalloc(
-	sizeof(THING) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kmalloc(
-	sizeof(THING) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kmalloc(
-	sizeof(THING) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kmalloc(
-	sizeof(THING) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
  kmalloc(
-	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kmalloc(
-	sizeof(THING1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kmalloc(
-	sizeof(THING1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
|
  kmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
  kmalloc(
-	(COUNT) * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	COUNT * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	COUNT * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	(COUNT) * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	COUNT * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	(COUNT) * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	(COUNT) * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kmalloc(
-	COUNT * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
)

// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
  kmalloc(C1 * C2 * C3, ...)
|
  kmalloc(
-	(E1) * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kmalloc(
-	(E1) * (E2) * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kmalloc(
-	(E1) * (E2) * (E3)
+	array3_size(E1, E2, E3)
  , ...)
|
  kmalloc(
-	E1 * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
)

// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@

(
  kmalloc(sizeof(THING) * C2, ...)
|
  kmalloc(sizeof(TYPE) * C2, ...)
|
  kmalloc(C1 * C2 * C3, ...)
|
  kmalloc(C1 * C2, ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * (E2)
+	E2, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(TYPE) * E2
+	E2, sizeof(TYPE)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * (E2)
+	E2, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	sizeof(THING) * E2
+	E2, sizeof(THING)
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	(E1) * E2
+	E1, E2
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	(E1) * (E2)
+	E1, E2
  , ...)
|
- kmalloc
+ kmalloc_array
  (
-	E1 * E2
+	E1, E2
  , ...)
)

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/kernel/sys_oabi-compat.c      |  4 ++--
 arch/arm/mm/pgd.c                      |  2 +-
 arch/arm/probes/kprobes/test-core.c    |  5 +++--
 arch/ia64/kernel/mca_drv.c             |  3 ++-
 arch/ia64/mm/tlb.c                     |  5 +++--
 arch/ia64/sn/kernel/irq.c              |  3 ++-
 arch/mips/alchemy/common/dbdma.c       |  4 ++--
 arch/powerpc/lib/rheap.c               |  2 +-
 arch/powerpc/platforms/4xx/hsta_msi.c  |  3 ++-
 arch/powerpc/platforms/4xx/msi.c       |  2 +-
 arch/powerpc/sysdev/mpic.c             |  5 +++--
 arch/s390/hypfs/hypfs_diag0c.c         |  3 ++-
 arch/s390/kernel/debug.c               |  6 ++++--
 arch/s390/kernel/perf_cpum_cf_events.c |  2 +-
 arch/s390/mm/extmem.c                  |  2 +-
 arch/sparc/kernel/nmi.c                |  3 ++-
 arch/sparc/kernel/sys_sparc_64.c       |  5 +++--
 arch/sparc/net/bpf_jit_comp_32.c       |  2 +-
 arch/um/drivers/ubd_kern.c             | 12 ++++++------
 arch/um/drivers/vector_kern.c          | 12 ++++++------
 arch/unicore32/kernel/pm.c             |  5 +++--
 arch/x86/events/core.c                 |  2 +-
 arch/x86/kernel/hpet.c                 |  4 ++--
 arch/x86/kernel/ksysfs.c               |  2 +-
 arch/x86/kvm/svm.c                     |  4 +++-
 arch/x86/net/bpf_jit_comp.c            |  2 +-
 arch/x86/net/bpf_jit_comp32.c          |  2 +-
 arch/x86/platform/uv/tlb_uv.c          |  2 +-
 28 files changed, 61 insertions(+), 47 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index b9786f491873..1df21a61e379 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -286,7 +286,7 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
 		return -EINVAL;
 	if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
 		return -EFAULT;
-	kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
+	kbuf = kmalloc_array(maxevents, sizeof(*kbuf), GFP_KERNEL);
 	if (!kbuf)
 		return -ENOMEM;
 	fs = get_fs();
@@ -324,7 +324,7 @@ asmlinkage long sys_oabi_semtimedop(int semid,
 		return -EINVAL;
 	if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
 		return -EFAULT;
-	sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
+	sops = kmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
 	if (!sops)
 		return -ENOMEM;
 	err = 0;
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 61e281cb29fb..a1606d950251 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -20,7 +20,7 @@
 #include "mm.h"
 
 #ifdef CONFIG_ARM_LPAE
-#define __pgd_alloc()	kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL)
+#define __pgd_alloc()	kmalloc_array(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL)
 #define __pgd_free(pgd)	kfree(pgd)
 #else
 #define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL, 2)
diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c
index 9ed0129bed3c..14db14152909 100644
--- a/arch/arm/probes/kprobes/test-core.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -766,8 +766,9 @@ static int coverage_start_fn(const struct decode_header *h, void *args)
 
 static int coverage_start(const union decode_item *table)
 {
-	coverage.base = kmalloc(MAX_COVERAGE_ENTRIES *
-				sizeof(struct coverage_entry), GFP_KERNEL);
+	coverage.base = kmalloc_array(MAX_COVERAGE_ENTRIES,
+				      sizeof(struct coverage_entry),
+				      GFP_KERNEL);
 	coverage.num_entries = 0;
 	coverage.nesting = 0;
 	return table_iter(table, coverage_start_fn, &coverage);
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 94f8bf777afa..dfe40cbdf3b3 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -350,7 +350,8 @@ init_record_index_pools(void)
 	/* - 3 - */
 	slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
 	slidx_pool.buffer =
-		kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
+		kmalloc_array(slidx_pool.max_idx, sizeof(slidx_list_t),
+			      GFP_KERNEL);
 
 	return slidx_pool.buffer ? 0 : -ENOMEM;
 }
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 46ecc5d948aa..acf10eb9da15 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -430,8 +430,9 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
 	int cpu = smp_processor_id();
 
 	if (!ia64_idtrs[cpu]) {
-		ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX *
-				sizeof (struct ia64_tr_entry), GFP_KERNEL);
+		ia64_idtrs[cpu] = kmalloc_array(2 * IA64_TR_ALLOC_MAX,
+						sizeof(struct ia64_tr_entry),
+						GFP_KERNEL);
 		if (!ia64_idtrs[cpu])
 			return -ENOMEM;
 	}
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 85d095154902..d9b576df4f82 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -474,7 +474,8 @@ void __init sn_irq_lh_init(void)
 {
 	int i;
 
-	sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL);
+	sn_irq_lh = kmalloc_array(NR_IRQS, sizeof(struct list_head *),
+				  GFP_KERNEL);
 	if (!sn_irq_lh)
 		panic("SN PCI INIT: Failed to allocate memory for PCI init\n");
 
diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c
index fc482d900ddd..24b04758cce5 100644
--- a/arch/mips/alchemy/common/dbdma.c
+++ b/arch/mips/alchemy/common/dbdma.c
@@ -411,8 +411,8 @@ u32 au1xxx_dbdma_ring_alloc(u32 chanid, int entries)
 	 * and if we try that first we are likely to not waste larger
 	 * slabs of memory.
 	 */
-	desc_base = (u32)kmalloc(entries * sizeof(au1x_ddma_desc_t),
-				 GFP_KERNEL|GFP_DMA);
+	desc_base = (u32)kmalloc_array(entries, sizeof(au1x_ddma_desc_t),
+				       GFP_KERNEL|GFP_DMA);
 	if (desc_base == 0)
 		return 0;
 
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
index 94058c21a482..6aa774aa5b16 100644
--- a/arch/powerpc/lib/rheap.c
+++ b/arch/powerpc/lib/rheap.c
@@ -54,7 +54,7 @@ static int grow(rh_info_t * info, int max_blocks)
 
 	new_blocks = max_blocks - info->max_blocks;
 
-	block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_ATOMIC);
+	block = kmalloc_array(max_blocks, sizeof(rh_block_t), GFP_ATOMIC);
 	if (block == NULL)
 		return -ENOMEM;
 
diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c
index 9926ad67af76..1c18f2955f7d 100644
--- a/arch/powerpc/platforms/4xx/hsta_msi.c
+++ b/arch/powerpc/platforms/4xx/hsta_msi.c
@@ -156,7 +156,8 @@ static int hsta_msi_probe(struct platform_device *pdev)
 	if (ret)
 		goto out;
 
-	ppc4xx_hsta_msi.irq_map = kmalloc(sizeof(int) * irq_count, GFP_KERNEL);
+	ppc4xx_hsta_msi.irq_map = kmalloc_array(irq_count, sizeof(int),
+						GFP_KERNEL);
 	if (!ppc4xx_hsta_msi.irq_map) {
 		ret = -ENOMEM;
 		goto out1;
diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c
index 96aaae678928..81b2cbce7df8 100644
--- a/arch/powerpc/platforms/4xx/msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -89,7 +89,7 @@ static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	if (type == PCI_CAP_ID_MSIX)
 		pr_debug("ppc4xx msi: MSI-X untested, trying anyway.\n");
 
-	msi_data->msi_virqs = kmalloc((msi_irqs) * sizeof(int), GFP_KERNEL);
+	msi_data->msi_virqs = kmalloc_array(msi_irqs, sizeof(int), GFP_KERNEL);
 	if (!msi_data->msi_virqs)
 		return -ENOMEM;
 
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 1d4e0ef658d3..df062a154ca8 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1639,8 +1639,9 @@ void __init mpic_init(struct mpic *mpic)
 
 #ifdef CONFIG_PM
 	/* allocate memory to save mpic state */
-	mpic->save_data = kmalloc(mpic->num_sources * sizeof(*mpic->save_data),
-				  GFP_KERNEL);
+	mpic->save_data = kmalloc_array(mpic->num_sources,
+				        sizeof(*mpic->save_data),
+				        GFP_KERNEL);
 	BUG_ON(mpic->save_data == NULL);
 #endif
 
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index dce87f1bec94..cebf05150cc1 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -49,7 +49,8 @@ static void *diag0c_store(unsigned int *count)
 
 	get_online_cpus();
 	cpu_count = num_online_cpus();
-	cpu_vec = kmalloc(sizeof(*cpu_vec) * num_possible_cpus(), GFP_KERNEL);
+	cpu_vec = kmalloc_array(num_possible_cpus(), sizeof(*cpu_vec),
+				GFP_KERNEL);
 	if (!cpu_vec)
 		goto fail_put_online_cpus;
 	/* Note: Diag 0c needs 8 byte alignment and real storage */
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 80e974adb9e8..d374f9b218b4 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -194,11 +194,13 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas)
 	debug_entry_t ***areas;
 	int i, j;
 
-	areas = kmalloc(nr_areas * sizeof(debug_entry_t **), GFP_KERNEL);
+	areas = kmalloc_array(nr_areas, sizeof(debug_entry_t **), GFP_KERNEL);
 	if (!areas)
 		goto fail_malloc_areas;
 	for (i = 0; i < nr_areas; i++) {
-		areas[i] = kmalloc(pages_per_area * sizeof(debug_entry_t *), GFP_KERNEL);
+		areas[i] = kmalloc_array(pages_per_area,
+					 sizeof(debug_entry_t *),
+					 GFP_KERNEL);
 		if (!areas[i])
 			goto fail_malloc_areas2;
 		for (j = 0; j < pages_per_area; j++) {
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index feebb2944882..d63fb3c56b8a 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -527,7 +527,7 @@ static __init struct attribute **merge_attr(struct attribute **a,
 		j++;
 	j++;
 
-	new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+	new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
 	if (!new)
 		return NULL;
 	j = 0;
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 920d40894535..6ad15d3fab81 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -103,7 +103,7 @@ static int scode_set;
 static int
 dcss_set_subcodes(void)
 {
-	char *name = kmalloc(8 * sizeof(char), GFP_KERNEL | GFP_DMA);
+	char *name = kmalloc(8, GFP_KERNEL | GFP_DMA);
 	unsigned long rx, ry;
 	int rc;
 
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 048ad783ea3f..8babbeb30adf 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -166,7 +166,8 @@ static int __init check_nmi_watchdog(void)
 	if (!atomic_read(&nmi_active))
 		return 0;
 
-	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL);
+	prev_nmi_count = kmalloc_array(nr_cpu_ids, sizeof(unsigned int),
+				       GFP_KERNEL);
 	if (!prev_nmi_count) {
 		err = -ENOMEM;
 		goto error;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 7e49bbc925a5..33e351704f9f 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -575,8 +575,9 @@ SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
 			unsigned long *p = current_thread_info()->utraps;
 
 			current_thread_info()->utraps =
-				kmalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long),
-					GFP_KERNEL);
+				kmalloc_array(UT_TRAP_INSTRUCTION_31 + 1,
+					      sizeof(long),
+					      GFP_KERNEL);
 			if (!current_thread_info()->utraps) {
 				current_thread_info()->utraps = p;
 				return -ENOMEM;
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
index 3bd8ca95e521..a5ff88643d5c 100644
--- a/arch/sparc/net/bpf_jit_comp_32.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -335,7 +335,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 	if (!bpf_jit_enable)
 		return;
 
-	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
+	addrs = kmalloc_array(flen, sizeof(*addrs), GFP_KERNEL);
 	if (addrs == NULL)
 		return;
 
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index dcf5ea28a281..83c470364dfb 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1127,9 +1127,9 @@ static int __init ubd_init(void)
 			return -1;
 	}
 
-	irq_req_buffer = kmalloc(
-			sizeof(struct io_thread_req *) * UBD_REQ_BUFFER_SIZE,
-			GFP_KERNEL
+	irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
+				       sizeof(struct io_thread_req *),
+				       GFP_KERNEL
 		);
 	irq_remainder = 0;
 
@@ -1137,9 +1137,9 @@ static int __init ubd_init(void)
 		printk(KERN_ERR "Failed to initialize ubd buffering\n");
 		return -1;
 	}
-	io_req_buffer = kmalloc(
-			sizeof(struct io_thread_req *) * UBD_REQ_BUFFER_SIZE,
-			GFP_KERNEL
+	io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
+				      sizeof(struct io_thread_req *),
+				      GFP_KERNEL
 		);
 
 	io_remainder = 0;
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 02168fe25105..627075e6d875 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -527,14 +527,14 @@ static struct vector_queue *create_queue(
 	result->max_iov_frags = num_extra_frags;
 	for (i = 0; i < max_size; i++) {
 		if (vp->header_size > 0)
-			iov = kmalloc(
-				sizeof(struct iovec) * (3 + num_extra_frags),
-				GFP_KERNEL
+			iov = kmalloc_array(3 + num_extra_frags,
+					    sizeof(struct iovec),
+					    GFP_KERNEL
 			);
 		else
-			iov = kmalloc(
-				sizeof(struct iovec) * (2 + num_extra_frags),
-				GFP_KERNEL
+			iov = kmalloc_array(2 + num_extra_frags,
+					    sizeof(struct iovec),
+					    GFP_KERNEL
 			);
 		if (iov == NULL)
 			goto out_fail;
diff --git a/arch/unicore32/kernel/pm.c b/arch/unicore32/kernel/pm.c
index 784bc2db3b28..6f8164d91dc2 100644
--- a/arch/unicore32/kernel/pm.c
+++ b/arch/unicore32/kernel/pm.c
@@ -109,8 +109,9 @@ static int __init puv3_pm_init(void)
 		return -EINVAL;
 	}
 
-	sleep_save = kmalloc(puv3_cpu_pm_fns->save_count
-				* sizeof(unsigned long), GFP_KERNEL);
+	sleep_save = kmalloc_array(puv3_cpu_pm_fns->save_count,
+				   sizeof(unsigned long),
+				   GFP_KERNEL);
 	if (!sleep_save) {
 		printk(KERN_ERR "failed to alloc memory for pm save\n");
 		return -ENOMEM;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6e461fb1e0d4..5f4829f10129 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1637,7 +1637,7 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
 		j++;
 	j++;
 
-	new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+	new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
 	if (!new)
 		return NULL;
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b6be34ee88e9..ddccdea0b63b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -966,8 +966,8 @@ int __init hpet_enable(void)
 #endif
 
 	cfg = hpet_readl(HPET_CFG);
-	hpet_boot_cfg = kmalloc((last + 2) * sizeof(*hpet_boot_cfg),
-				GFP_KERNEL);
+	hpet_boot_cfg = kmalloc_array(last + 2, sizeof(*hpet_boot_cfg),
+				      GFP_KERNEL);
 	if (hpet_boot_cfg)
 		*hpet_boot_cfg = cfg;
 	else
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 8c1cc08f514f..163ae706a0d4 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -283,7 +283,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
 	if (ret)
 		goto out_setup_data_kobj;
 
-	kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL);
+	kobjp = kmalloc_array(nr, sizeof(*kobjp), GFP_KERNEL);
 	if (!kobjp) {
 		ret = -ENOMEM;
 		goto out_setup_data_kobj;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 950ec50f77c3..e831e6d3b70e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1001,7 +1001,9 @@ static int svm_cpu_init(int cpu)
 
 	if (svm_sev_enabled()) {
 		r = -ENOMEM;
-		sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
+		sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
+					      sizeof(void *),
+					      GFP_KERNEL);
 		if (!sd->sev_vmcbs)
 			goto err_1;
 	}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8fca446aaef6..2580cd2e98b1 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1107,7 +1107,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		extra_pass = true;
 		goto skip_init_addrs;
 	}
-	addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
+	addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
 	if (!addrs) {
 		prog = orig_prog;
 		goto out_addrs;
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 0cc04e30adc1..55799873ebe5 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2345,7 +2345,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		prog = tmp;
 	}
 
-	addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
+	addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
 	if (!addrs) {
 		prog = orig_prog;
 		goto out;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index b96d38288c60..ca446da48fd2 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2142,7 +2142,7 @@ static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
 	if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
 		timeout_us = calculate_destination_timeout();
 
-	vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
+	vp = kmalloc_array(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
 	uvhub_descs = (struct uvhub_desc *)vp;
 	memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
 	uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
-- 
cgit v1.2.3


From 6396bb221514d2876fd6dc0aa2a1f240d99b37bb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Jun 2018 14:03:40 -0700
Subject: treewide: kzalloc() -> kcalloc()

The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:

        kzalloc(a * b, gfp)

with:
        kcalloc(a * b, gfp)

as well as handling cases of:

        kzalloc(a * b * c, gfp)

with:

        kzalloc(array3_size(a, b, c), gfp)

as it's slightly less ugly than:

        kzalloc_array(array_size(a, b), c, gfp)

This does, however, attempt to ignore constant size factors like:

        kzalloc(4 * 1024, gfp)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
  kzalloc(
-	(sizeof(TYPE)) * E
+	sizeof(TYPE) * E
  , ...)
|
  kzalloc(
-	(sizeof(THING)) * E
+	sizeof(THING) * E
  , ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
  kzalloc(
-	sizeof(u8) * (COUNT)
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(__u8) * (COUNT)
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(char) * (COUNT)
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(unsigned char) * (COUNT)
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(u8) * COUNT
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(__u8) * COUNT
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(char) * COUNT
+	COUNT
  , ...)
|
  kzalloc(
-	sizeof(unsigned char) * COUNT
+	COUNT
  , ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * (COUNT_ID)
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * COUNT_ID
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * (COUNT_CONST)
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * COUNT_CONST
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * (COUNT_ID)
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * COUNT_ID
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * (COUNT_CONST)
+	COUNT_CONST, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * COUNT_CONST
+	COUNT_CONST, sizeof(THING)
  , ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

- kzalloc
+ kcalloc
  (
-	SIZE * COUNT
+	COUNT, SIZE
  , ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
  kzalloc(
-	sizeof(TYPE) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kzalloc(
-	sizeof(TYPE) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kzalloc(
-	sizeof(TYPE) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kzalloc(
-	sizeof(TYPE) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kzalloc(
-	sizeof(THING) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kzalloc(
-	sizeof(THING) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kzalloc(
-	sizeof(THING) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kzalloc(
-	sizeof(THING) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
  kzalloc(
-	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kzalloc(
-	sizeof(THING1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kzalloc(
-	sizeof(THING1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
|
  kzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
  kzalloc(
-	(COUNT) * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	COUNT * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	COUNT * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	(COUNT) * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	COUNT * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	(COUNT) * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	(COUNT) * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kzalloc(
-	COUNT * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
)

// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
  kzalloc(C1 * C2 * C3, ...)
|
  kzalloc(
-	(E1) * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kzalloc(
-	(E1) * (E2) * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kzalloc(
-	(E1) * (E2) * (E3)
+	array3_size(E1, E2, E3)
  , ...)
|
  kzalloc(
-	E1 * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
)

// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@

(
  kzalloc(sizeof(THING) * C2, ...)
|
  kzalloc(sizeof(TYPE) * C2, ...)
|
  kzalloc(C1 * C2 * C3, ...)
|
  kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * (E2)
+	E2, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(TYPE) * E2
+	E2, sizeof(TYPE)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * (E2)
+	E2, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	sizeof(THING) * E2
+	E2, sizeof(THING)
  , ...)
|
- kzalloc
+ kcalloc
  (
-	(E1) * E2
+	E1, E2
  , ...)
|
- kzalloc
+ kcalloc
  (
-	(E1) * (E2)
+	E1, E2
  , ...)
|
- kzalloc
+ kcalloc
  (
-	E1 * E2
+	E1, E2
  , ...)
)

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/mach-footbridge/dc21285.c             | 2 +-
 arch/arm/mach-ixp4xx/common-pci.c              | 2 +-
 arch/arm/mach-omap1/mcbsp.c                    | 2 +-
 arch/arm/mach-omap2/hsmmc.c                    | 2 +-
 arch/arm/mach-omap2/omap_device.c              | 4 ++--
 arch/arm/mach-omap2/prm_common.c               | 9 +++++----
 arch/arm/mach-vexpress/spc.c                   | 2 +-
 arch/arm/mm/dma-mapping.c                      | 4 ++--
 arch/arm64/kernel/armv8_deprecated.c           | 4 ++--
 arch/arm64/mm/context.c                        | 2 +-
 arch/ia64/kernel/topology.c                    | 6 +++---
 arch/ia64/sn/kernel/io_common.c                | 2 +-
 arch/ia64/sn/pci/pcibr/pcibr_provider.c        | 2 +-
 arch/mips/alchemy/common/clock.c               | 2 +-
 arch/mips/alchemy/common/dbdma.c               | 2 +-
 arch/mips/alchemy/common/platform.c            | 4 ++--
 arch/mips/alchemy/devboards/platform.c         | 4 ++--
 arch/mips/bmips/dma.c                          | 2 +-
 arch/mips/txx9/rbtx4939/setup.c                | 2 +-
 arch/powerpc/kernel/vdso.c                     | 4 ++--
 arch/powerpc/mm/numa.c                         | 2 +-
 arch/powerpc/net/bpf_jit_comp.c                | 2 +-
 arch/powerpc/net/bpf_jit_comp64.c              | 2 +-
 arch/powerpc/oprofile/cell/spu_profiler.c      | 4 ++--
 arch/powerpc/platforms/4xx/pci.c               | 2 +-
 arch/powerpc/platforms/powernv/opal-sysparam.c | 8 ++++----
 arch/powerpc/sysdev/mpic.c                     | 4 ++--
 arch/powerpc/sysdev/xive/native.c              | 2 +-
 arch/s390/appldata/appldata_base.c             | 2 +-
 arch/s390/kernel/vdso.c                        | 4 ++--
 arch/sh/drivers/dma/dmabrg.c                   | 2 +-
 arch/sh/drivers/pci/pcie-sh7786.c              | 2 +-
 arch/sparc/kernel/sys_sparc_64.c               | 3 ++-
 arch/x86/events/amd/iommu.c                    | 2 +-
 arch/x86/events/intel/uncore.c                 | 2 +-
 arch/x86/kernel/cpu/mcheck/mce.c               | 2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd.c           | 2 +-
 arch/x86/kernel/cpu/mtrr/if.c                  | 2 +-
 arch/x86/kernel/hpet.c                         | 2 +-
 arch/x86/pci/xen.c                             | 2 +-
 arch/x86/platform/uv/uv_time.c                 | 2 +-
 41 files changed, 60 insertions(+), 58 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index e7b350f18f5f..16d71bac0061 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -252,7 +252,7 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys)
 	if (nr || !footbridge_cfn_mode())
 		return 0;
 
-	res = kzalloc(sizeof(struct resource) * 2, GFP_KERNEL);
+	res = kcalloc(2, sizeof(struct resource), GFP_KERNEL);
 	if (!res) {
 		printk("out of memory for root bus resources");
 		return 0;
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index bcf3df59f71b..6835b17113e5 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -421,7 +421,7 @@ int ixp4xx_setup(int nr, struct pci_sys_data *sys)
 	if (nr >= 1)
 		return 0;
 
-	res = kzalloc(sizeof(*res) * 2, GFP_KERNEL);
+	res = kcalloc(2, sizeof(*res), GFP_KERNEL);
 	if (res == NULL) {
 		/* 
 		 * If we're out of memory this early, something is wrong,
diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c
index 8ed67f8d1762..27e22e702f96 100644
--- a/arch/arm/mach-omap1/mcbsp.c
+++ b/arch/arm/mach-omap1/mcbsp.c
@@ -389,7 +389,7 @@ static void omap_mcbsp_register_board_cfg(struct resource *res, int res_count,
 {
 	int i;
 
-	omap_mcbsp_devices = kzalloc(size * sizeof(struct platform_device *),
+	omap_mcbsp_devices = kcalloc(size, sizeof(struct platform_device *),
 				     GFP_KERNEL);
 	if (!omap_mcbsp_devices) {
 		printk(KERN_ERR "Could not register McBSP devices\n");
diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index b064066d431c..9344035d537f 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -35,7 +35,7 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 {
 	char *hc_name;
 
-	hc_name = kzalloc(sizeof(char) * (HSMMC_NAME_LEN + 1), GFP_KERNEL);
+	hc_name = kzalloc(HSMMC_NAME_LEN + 1, GFP_KERNEL);
 	if (!hc_name) {
 		kfree(hc_name);
 		return -ENOMEM;
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 3b829a50d1db..06b6bca3a179 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -155,7 +155,7 @@ static int omap_device_build_from_dt(struct platform_device *pdev)
 	if (!omap_hwmod_parse_module_range(NULL, node, &res))
 		return -ENODEV;
 
-	hwmods = kzalloc(sizeof(struct omap_hwmod *) * oh_cnt, GFP_KERNEL);
+	hwmods = kcalloc(oh_cnt, sizeof(struct omap_hwmod *), GFP_KERNEL);
 	if (!hwmods) {
 		ret = -ENOMEM;
 		goto odbfd_exit;
@@ -405,7 +405,7 @@ omap_device_copy_resources(struct omap_hwmod *oh,
 		goto error;
 	}
 
-	res = kzalloc(sizeof(*res) * 2, GFP_KERNEL);
+	res = kcalloc(2, sizeof(*res), GFP_KERNEL);
 	if (!res)
 		return -ENOMEM;
 
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index 021b5a8b9c0a..058a37e6d11c 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -285,10 +285,11 @@ int omap_prcm_register_chain_handler(struct omap_prcm_irq_setup *irq_setup)
 
 	prcm_irq_setup = irq_setup;
 
-	prcm_irq_chips = kzalloc(sizeof(void *) * nr_regs, GFP_KERNEL);
-	prcm_irq_setup->saved_mask = kzalloc(sizeof(u32) * nr_regs, GFP_KERNEL);
-	prcm_irq_setup->priority_mask = kzalloc(sizeof(u32) * nr_regs,
-		GFP_KERNEL);
+	prcm_irq_chips = kcalloc(nr_regs, sizeof(void *), GFP_KERNEL);
+	prcm_irq_setup->saved_mask = kcalloc(nr_regs, sizeof(u32),
+					     GFP_KERNEL);
+	prcm_irq_setup->priority_mask = kcalloc(nr_regs, sizeof(u32),
+						GFP_KERNEL);
 
 	if (!prcm_irq_chips || !prcm_irq_setup->saved_mask ||
 	    !prcm_irq_setup->priority_mask)
diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
index 21c064267af5..0f5381d13494 100644
--- a/arch/arm/mach-vexpress/spc.c
+++ b/arch/arm/mach-vexpress/spc.c
@@ -403,7 +403,7 @@ static int ve_spc_populate_opps(uint32_t cluster)
 	uint32_t data = 0, off, ret, idx;
 	struct ve_spc_opp *opps;
 
-	opps = kzalloc(sizeof(*opps) * MAX_OPPS, GFP_KERNEL);
+	opps = kcalloc(MAX_OPPS, sizeof(*opps), GFP_KERNEL);
 	if (!opps)
 		return -ENOMEM;
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index af27f1c22d93..be0fa7e39c26 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2162,8 +2162,8 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size)
 		goto err;
 
 	mapping->bitmap_size = bitmap_size;
-	mapping->bitmaps = kzalloc(extensions * sizeof(unsigned long *),
-				GFP_KERNEL);
+	mapping->bitmaps = kcalloc(extensions, sizeof(unsigned long *),
+				   GFP_KERNEL);
 	if (!mapping->bitmaps)
 		goto err2;
 
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 97d45d5151d4..d4707abb2f16 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -234,8 +234,8 @@ static void __init register_insn_emulation_sysctl(void)
 	struct insn_emulation *insn;
 	struct ctl_table *insns_sysctl, *sysctl;
 
-	insns_sysctl = kzalloc(sizeof(*sysctl) * (nr_insn_emulated + 1),
-			      GFP_KERNEL);
+	insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl),
+			       GFP_KERNEL);
 
 	raw_spin_lock_irqsave(&insn_emulation_lock, flags);
 	list_for_each_entry(insn, &insn_emulation, node) {
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 301417ae2ba8..c127f94da8e2 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -263,7 +263,7 @@ static int asids_init(void)
 	 */
 	WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus());
 	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
-	asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
+	asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*asid_map),
 			   GFP_KERNEL);
 	if (!asid_map)
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index d76529cbff20..9b820f7a6a98 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -85,7 +85,7 @@ static int __init topology_init(void)
 	}
 #endif
 
-	sysfs_cpus = kzalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL);
+	sysfs_cpus = kcalloc(NR_CPUS, sizeof(struct ia64_cpu), GFP_KERNEL);
 	if (!sysfs_cpus)
 		panic("kzalloc in topology_init failed - NR_CPUS too big?");
 
@@ -319,8 +319,8 @@ static int cpu_cache_sysfs_init(unsigned int cpu)
 		return -1;
 	}
 
-	this_cache=kzalloc(sizeof(struct cache_info)*unique_caches,
-			GFP_KERNEL);
+	this_cache=kcalloc(unique_caches, sizeof(struct cache_info),
+			   GFP_KERNEL);
 	if (this_cache == NULL)
 		return -ENOMEM;
 
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 8479e9a7ce16..102aabad6d20 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -132,7 +132,7 @@ static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device,
 	printk_once(KERN_WARNING
 		"PROM version < 4.50 -- implementing old PROM flush WAR\n");
 
-	war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
+	war_list = kcalloc(DEV_PER_WIDGET, sizeof(*war_list), GFP_KERNEL);
 	BUG_ON(!war_list);
 
 	SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 8dbbef4a4f47..7195df1da121 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -184,7 +184,7 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	/* Setup the PMU ATE map */
 	soft->pbi_int_ate_resource.lowest_free_index = 0;
 	soft->pbi_int_ate_resource.ate =
-	    kzalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL);
+	    kcalloc(soft->pbi_int_ate_size, sizeof(u64), GFP_KERNEL);
 
 	if (!soft->pbi_int_ate_resource.ate) {
 		kfree(soft);
diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index 6b6f6851df92..d129475fd40d 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c
@@ -985,7 +985,7 @@ static int __init alchemy_clk_setup_imux(int ctype)
 		return -ENODEV;
 	}
 
-	a = kzalloc((sizeof(*a)) * 6, GFP_KERNEL);
+	a = kcalloc(6, sizeof(*a), GFP_KERNEL);
 	if (!a)
 		return -ENOMEM;
 
diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c
index 24b04758cce5..4ca2c28878e0 100644
--- a/arch/mips/alchemy/common/dbdma.c
+++ b/arch/mips/alchemy/common/dbdma.c
@@ -1050,7 +1050,7 @@ static int __init dbdma_setup(unsigned int irq, dbdev_tab_t *idtable)
 {
 	int ret;
 
-	dbdev_tab = kzalloc(sizeof(dbdev_tab_t) * DBDEV_TAB_SIZE, GFP_KERNEL);
+	dbdev_tab = kcalloc(DBDEV_TAB_SIZE, sizeof(dbdev_tab_t), GFP_KERNEL);
 	if (!dbdev_tab)
 		return -ENOMEM;
 
diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index d77a64f4c78b..1454d9f6ab2d 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -115,7 +115,7 @@ static void __init alchemy_setup_uarts(int ctype)
 	uartclk = clk_get_rate(clk);
 	clk_put(clk);
 
-	ports = kzalloc(s * (c + 1), GFP_KERNEL);
+	ports = kcalloc(s, (c + 1), GFP_KERNEL);
 	if (!ports) {
 		printk(KERN_INFO "Alchemy: no memory for UART data\n");
 		return;
@@ -198,7 +198,7 @@ static unsigned long alchemy_ehci_data[][2] __initdata = {
 
 static int __init _new_usbres(struct resource **r, struct platform_device **d)
 {
-	*r = kzalloc(sizeof(struct resource) * 2, GFP_KERNEL);
+	*r = kcalloc(2, sizeof(struct resource), GFP_KERNEL);
 	if (!*r)
 		return -ENOMEM;
 	*d = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
diff --git a/arch/mips/alchemy/devboards/platform.c b/arch/mips/alchemy/devboards/platform.c
index 4640edab207c..203854ddd1bb 100644
--- a/arch/mips/alchemy/devboards/platform.c
+++ b/arch/mips/alchemy/devboards/platform.c
@@ -103,7 +103,7 @@ int __init db1x_register_pcmcia_socket(phys_addr_t pcmcia_attr_start,
 	if (stschg_irq)
 		cnt++;
 
-	sr = kzalloc(sizeof(struct resource) * cnt, GFP_KERNEL);
+	sr = kcalloc(cnt, sizeof(struct resource), GFP_KERNEL);
 	if (!sr)
 		return -ENOMEM;
 
@@ -178,7 +178,7 @@ int __init db1x_register_norflash(unsigned long size, int width,
 		return -EINVAL;
 
 	ret = -ENOMEM;
-	parts = kzalloc(sizeof(struct mtd_partition) * 5, GFP_KERNEL);
+	parts = kcalloc(5, sizeof(struct mtd_partition), GFP_KERNEL);
 	if (!parts)
 		goto out;
 
diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
index 04790f4e1805..6dec30842b2f 100644
--- a/arch/mips/bmips/dma.c
+++ b/arch/mips/bmips/dma.c
@@ -94,7 +94,7 @@ static int __init bmips_init_dma_ranges(void)
 		goto out_bad;
 
 	/* add a dummy (zero) entry at the end as a sentinel */
-	bmips_dma_ranges = kzalloc(sizeof(struct bmips_dma_range) * (len + 1),
+	bmips_dma_ranges = kcalloc(len + 1, sizeof(struct bmips_dma_range),
 				   GFP_KERNEL);
 	if (!bmips_dma_ranges)
 		goto out_bad;
diff --git a/arch/mips/txx9/rbtx4939/setup.c b/arch/mips/txx9/rbtx4939/setup.c
index fd26fadc8617..ef29a9c2ffd6 100644
--- a/arch/mips/txx9/rbtx4939/setup.c
+++ b/arch/mips/txx9/rbtx4939/setup.c
@@ -219,7 +219,7 @@ static int __init rbtx4939_led_probe(struct platform_device *pdev)
 		"nand-disk",
 	};
 
-	leds_data = kzalloc(sizeof(*leds_data) * RBTX4939_MAX_7SEGLEDS,
+	leds_data = kcalloc(RBTX4939_MAX_7SEGLEDS, sizeof(*leds_data),
 			    GFP_KERNEL);
 	if (!leds_data)
 		return -ENOMEM;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index b44ec104a5a1..d2205b97628c 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -791,7 +791,7 @@ static int __init vdso_init(void)
 
 #ifdef CONFIG_VDSO32
 	/* Make sure pages are in the correct state */
-	vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 2),
+	vdso32_pagelist = kcalloc(vdso32_pages + 2, sizeof(struct page *),
 				  GFP_KERNEL);
 	BUG_ON(vdso32_pagelist == NULL);
 	for (i = 0; i < vdso32_pages; i++) {
@@ -805,7 +805,7 @@ static int __init vdso_init(void)
 #endif
 
 #ifdef CONFIG_PPC64
-	vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 2),
+	vdso64_pagelist = kcalloc(vdso64_pages + 2, sizeof(struct page *),
 				  GFP_KERNEL);
 	BUG_ON(vdso64_pagelist == NULL);
 	for (i = 0; i < vdso64_pages; i++) {
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 57a5029b4521..0c7e05d89244 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1316,7 +1316,7 @@ int numa_update_cpu_topology(bool cpus_locked)
 	if (!weight)
 		return 0;
 
-	updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
+	updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL);
 	if (!updates)
 		return 0;
 
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index a9636d8cba15..5b061fc81df3 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -566,7 +566,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 	if (!bpf_jit_enable)
 		return;
 
-	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
+	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
 	if (addrs == NULL)
 		return;
 
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index f1c95779843b..380cbf9a40d9 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -949,7 +949,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		goto skip_init_ctx;
 	}
 
-	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
+	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
 	if (addrs == NULL) {
 		fp = org_fp;
 		goto out_addrs;
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index 5182f2936af2..4e099e556645 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -210,8 +210,8 @@ int start_spu_profiling_cycles(unsigned int cycles_reset)
 	timer.function = profile_spus;
 
 	/* Allocate arrays for collecting SPU PC samples */
-	samples = kzalloc(SPUS_PER_NODE *
-			  TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
+	samples = kcalloc(SPUS_PER_NODE * TRACE_ARRAY_SIZE, sizeof(u32),
+			  GFP_KERNEL);
 
 	if (!samples)
 		return -ENOMEM;
diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c
index 73e6b36bcd51..5aca523551ae 100644
--- a/arch/powerpc/platforms/4xx/pci.c
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -1449,7 +1449,7 @@ static int __init ppc4xx_pciex_check_core_init(struct device_node *np)
 	count = ppc4xx_pciex_hwops->core_init(np);
 	if (count > 0) {
 		ppc4xx_pciex_ports =
-		       kzalloc(count * sizeof(struct ppc4xx_pciex_port),
+		       kcalloc(count, sizeof(struct ppc4xx_pciex_port),
 			       GFP_KERNEL);
 		if (ppc4xx_pciex_ports) {
 			ppc4xx_pciex_port_count = count;
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index 6fd4092798d5..9aa87df114fd 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -198,21 +198,21 @@ void __init opal_sys_param_init(void)
 		goto out_param_buf;
 	}
 
-	id = kzalloc(sizeof(*id) * count, GFP_KERNEL);
+	id = kcalloc(count, sizeof(*id), GFP_KERNEL);
 	if (!id) {
 		pr_err("SYSPARAM: Failed to allocate memory to read parameter "
 				"id\n");
 		goto out_param_buf;
 	}
 
-	size = kzalloc(sizeof(*size) * count, GFP_KERNEL);
+	size = kcalloc(count, sizeof(*size), GFP_KERNEL);
 	if (!size) {
 		pr_err("SYSPARAM: Failed to allocate memory to read parameter "
 				"size\n");
 		goto out_free_id;
 	}
 
-	perm = kzalloc(sizeof(*perm) * count, GFP_KERNEL);
+	perm = kcalloc(count, sizeof(*perm), GFP_KERNEL);
 	if (!perm) {
 		pr_err("SYSPARAM: Failed to allocate memory to read supported "
 				"action on the parameter");
@@ -235,7 +235,7 @@ void __init opal_sys_param_init(void)
 		goto out_free_perm;
 	}
 
-	attr = kzalloc(sizeof(*attr) * count, GFP_KERNEL);
+	attr = kcalloc(count, sizeof(*attr), GFP_KERNEL);
 	if (!attr) {
 		pr_err("SYSPARAM: Failed to allocate memory for parameter "
 				"attributes\n");
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index df062a154ca8..353b43972bbf 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -544,7 +544,7 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic)
 	printk(KERN_INFO "mpic: Setting up HT PICs workarounds for U3/U4\n");
 
 	/* Allocate fixups array */
-	mpic->fixups = kzalloc(128 * sizeof(*mpic->fixups), GFP_KERNEL);
+	mpic->fixups = kcalloc(128, sizeof(*mpic->fixups), GFP_KERNEL);
 	BUG_ON(mpic->fixups == NULL);
 
 	/* Init spinlock */
@@ -1324,7 +1324,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	if (psrc) {
 		/* Allocate a bitmap with one bit per interrupt */
 		unsigned int mapsize = BITS_TO_LONGS(intvec_top + 1);
-		mpic->protected = kzalloc(mapsize*sizeof(long), GFP_KERNEL);
+		mpic->protected = kcalloc(mapsize, sizeof(long), GFP_KERNEL);
 		BUG_ON(mpic->protected == NULL);
 		for (i = 0; i < psize/sizeof(u32); i++) {
 			if (psrc[i] > intvec_top)
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 83bcd72b21cf..311185b9960a 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -489,7 +489,7 @@ static bool xive_parse_provisioning(struct device_node *np)
 	if (rc == 0)
 		return true;
 
-	xive_provision_chips = kzalloc(4 * xive_provision_chip_count,
+	xive_provision_chips = kcalloc(4, xive_provision_chip_count,
 				       GFP_KERNEL);
 	if (WARN_ON(!xive_provision_chips))
 		return false;
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index cb6e8066b1ad..ee6a9c387c87 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -391,7 +391,7 @@ int appldata_register_ops(struct appldata_ops *ops)
 	if (ops->size > APPLDATA_MAX_REC_SIZE)
 		return -EINVAL;
 
-	ops->ctl_table = kzalloc(4 * sizeof(struct ctl_table), GFP_KERNEL);
+	ops->ctl_table = kcalloc(4, sizeof(struct ctl_table), GFP_KERNEL);
 	if (!ops->ctl_table)
 		return -ENOMEM;
 
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index f3a1c7c6824e..09abae40f917 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -285,7 +285,7 @@ static int __init vdso_init(void)
 			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
 
 	/* Make sure pages are in the correct state */
-	vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1),
+	vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *),
 				  GFP_KERNEL);
 	BUG_ON(vdso32_pagelist == NULL);
 	for (i = 0; i < vdso32_pages - 1; i++) {
@@ -303,7 +303,7 @@ static int __init vdso_init(void)
 			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
 
 	/* Make sure pages are in the correct state */
-	vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1),
+	vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *),
 				  GFP_KERNEL);
 	BUG_ON(vdso64_pagelist == NULL);
 	for (i = 0; i < vdso64_pages - 1; i++) {
diff --git a/arch/sh/drivers/dma/dmabrg.c b/arch/sh/drivers/dma/dmabrg.c
index c0dd904483c7..e5a57a109d6c 100644
--- a/arch/sh/drivers/dma/dmabrg.c
+++ b/arch/sh/drivers/dma/dmabrg.c
@@ -154,7 +154,7 @@ static int __init dmabrg_init(void)
 	unsigned long or;
 	int ret;
 
-	dmabrg_handlers = kzalloc(10 * sizeof(struct dmabrg_handler),
+	dmabrg_handlers = kcalloc(10, sizeof(struct dmabrg_handler),
 				  GFP_KERNEL);
 	if (!dmabrg_handlers)
 		return -ENOMEM;
diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c
index 382e7ecf4c82..3d81a8b80942 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.c
+++ b/arch/sh/drivers/pci/pcie-sh7786.c
@@ -561,7 +561,7 @@ static int __init sh7786_pcie_init(void)
 	if (unlikely(nr_ports == 0))
 		return -ENODEV;
 
-	sh7786_pcie_ports = kzalloc(nr_ports * sizeof(struct sh7786_pcie_port),
+	sh7786_pcie_ports = kcalloc(nr_ports, sizeof(struct sh7786_pcie_port),
 				    GFP_KERNEL);
 	if (unlikely(!sh7786_pcie_ports))
 		return -ENOMEM;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 33e351704f9f..63baa8aa9414 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -565,7 +565,8 @@ SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
 	}
 	if (!current_thread_info()->utraps) {
 		current_thread_info()->utraps =
-			kzalloc((UT_TRAP_INSTRUCTION_31+1)*sizeof(long), GFP_KERNEL);
+			kcalloc(UT_TRAP_INSTRUCTION_31 + 1, sizeof(long),
+				GFP_KERNEL);
 		if (!current_thread_info()->utraps)
 			return -ENOMEM;
 		current_thread_info()->utraps[0] = 1;
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 38b5d41b0c37..3210fee27e7f 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -387,7 +387,7 @@ static __init int _init_events_attrs(void)
 	while (amd_iommu_v2_event_descs[i].attr.attr.name)
 		i++;
 
-	attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL);
+	attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL);
 	if (!attrs)
 		return -ENOMEM;
 
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index e15cfad4f89b..27a461414b30 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -868,7 +868,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 	size_t size;
 	int i, j;
 
-	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
+	pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
 	if (!pmus)
 		return -ENOMEM;
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index cd76380af79f..e4cf6ff1c2e1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1457,7 +1457,7 @@ static int __mcheck_cpu_mce_banks_init(void)
 	int i;
 	u8 num_banks = mca_cfg.banks;
 
-	mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
+	mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL);
 	if (!mce_banks)
 		return -ENOMEM;
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f591b01930db..dd33c357548f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1384,7 +1384,7 @@ int mce_threshold_create_device(unsigned int cpu)
 	if (bp)
 		return 0;
 
-	bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
+	bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
 		     GFP_KERNEL);
 	if (!bp)
 		return -ENOMEM;
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index c610f47373e4..4021d3859499 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -43,7 +43,7 @@ mtrr_file_add(unsigned long base, unsigned long size,
 
 	max = num_var_ranges;
 	if (fcount == NULL) {
-		fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL);
+		fcount = kcalloc(max, sizeof(*fcount), GFP_KERNEL);
 		if (!fcount)
 			return -ENOMEM;
 		FILE_FCOUNT(file) = fcount;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ddccdea0b63b..346b24883911 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -610,7 +610,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
 	if (!hpet_domain)
 		return;
 
-	hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+	hpet_devs = kcalloc(num_timers, sizeof(struct hpet_dev), GFP_KERNEL);
 	if (!hpet_devs)
 		return;
 
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 9542a746dc50..9112d1cb397b 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -168,7 +168,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 
-	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
+	v = kcalloc(max(1, nvec), sizeof(int), GFP_KERNEL);
 	if (!v)
 		return -ENOMEM;
 
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index b082d71b08ee..a36b368eea08 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -158,7 +158,7 @@ static __init int uv_rtc_allocate_timers(void)
 {
 	int cpu;
 
-	blade_info = kzalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
+	blade_info = kcalloc(uv_possible_blades, sizeof(void *), GFP_KERNEL);
 	if (!blade_info)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 778e1cdd81bb5fcd1e72bf48a2965cd7aaec82a8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Jun 2018 14:04:48 -0700
Subject: treewide: kvzalloc() -> kvcalloc()

The kvzalloc() function has a 2-factor argument form, kvcalloc(). This
patch replaces cases of:

        kvzalloc(a * b, gfp)

with:
        kvcalloc(a * b, gfp)

as well as handling cases of:

        kvzalloc(a * b * c, gfp)

with:

        kvzalloc(array3_size(a, b, c), gfp)

as it's slightly less ugly than:

        kvcalloc(array_size(a, b), c, gfp)

This does, however, attempt to ignore constant size factors like:

        kvzalloc(4 * 1024, gfp)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
  kvzalloc(
-	(sizeof(TYPE)) * E
+	sizeof(TYPE) * E
  , ...)
|
  kvzalloc(
-	(sizeof(THING)) * E
+	sizeof(THING) * E
  , ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
  kvzalloc(
-	sizeof(u8) * (COUNT)
+	COUNT
  , ...)
|
  kvzalloc(
-	sizeof(__u8) * (COUNT)
+	COUNT
  , ...)
|
  kvzalloc(
-	sizeof(char) * (COUNT)
+	COUNT
  , ...)
|
  kvzalloc(
-	sizeof(unsigned char) * (COUNT)
+	COUNT
  , ...)
|
  kvzalloc(
-	sizeof(u8) * COUNT
+	COUNT
  , ...)
|
  kvzalloc(
-	sizeof(__u8) * COUNT
+	COUNT
  , ...)
|
  kvzalloc(
-	sizeof(char) * COUNT
+	COUNT
  , ...)
|
  kvzalloc(
-	sizeof(unsigned char) * COUNT
+	COUNT
  , ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
- kvzalloc
+ kvcalloc
  (
-	sizeof(TYPE) * (COUNT_ID)
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(TYPE) * COUNT_ID
+	COUNT_ID, sizeof(TYPE)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(TYPE) * (COUNT_CONST)
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(TYPE) * COUNT_CONST
+	COUNT_CONST, sizeof(TYPE)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(THING) * (COUNT_ID)
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(THING) * COUNT_ID
+	COUNT_ID, sizeof(THING)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(THING) * (COUNT_CONST)
+	COUNT_CONST, sizeof(THING)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(THING) * COUNT_CONST
+	COUNT_CONST, sizeof(THING)
  , ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

- kvzalloc
+ kvcalloc
  (
-	SIZE * COUNT
+	COUNT, SIZE
  , ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
  kvzalloc(
-	sizeof(TYPE) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kvzalloc(
-	sizeof(TYPE) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kvzalloc(
-	sizeof(TYPE) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kvzalloc(
-	sizeof(TYPE) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  kvzalloc(
-	sizeof(THING) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kvzalloc(
-	sizeof(THING) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kvzalloc(
-	sizeof(THING) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  kvzalloc(
-	sizeof(THING) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
  kvzalloc(
-	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kvzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  kvzalloc(
-	sizeof(THING1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kvzalloc(
-	sizeof(THING1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  kvzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
|
  kvzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
  kvzalloc(
-	(COUNT) * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kvzalloc(
-	COUNT * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kvzalloc(
-	COUNT * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kvzalloc(
-	(COUNT) * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kvzalloc(
-	COUNT * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kvzalloc(
-	(COUNT) * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kvzalloc(
-	(COUNT) * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  kvzalloc(
-	COUNT * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
)

// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
  kvzalloc(C1 * C2 * C3, ...)
|
  kvzalloc(
-	(E1) * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kvzalloc(
-	(E1) * (E2) * E3
+	array3_size(E1, E2, E3)
  , ...)
|
  kvzalloc(
-	(E1) * (E2) * (E3)
+	array3_size(E1, E2, E3)
  , ...)
|
  kvzalloc(
-	E1 * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
)

// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@

(
  kvzalloc(sizeof(THING) * C2, ...)
|
  kvzalloc(sizeof(TYPE) * C2, ...)
|
  kvzalloc(C1 * C2 * C3, ...)
|
  kvzalloc(C1 * C2, ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(TYPE) * (E2)
+	E2, sizeof(TYPE)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(TYPE) * E2
+	E2, sizeof(TYPE)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(THING) * (E2)
+	E2, sizeof(THING)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	sizeof(THING) * E2
+	E2, sizeof(THING)
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	(E1) * E2
+	E1, E2
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	(E1) * (E2)
+	E1, E2
  , ...)
|
- kvzalloc
+ kvcalloc
  (
-	E1 * E2
+	E1, E2
  , ...)
)

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/kvm/page_track.c | 5 +++--
 arch/x86/kvm/x86.c        | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 01c1371f39f8..3052a59a3065 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -40,8 +40,9 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
 	int  i;
 
 	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
-		slot->arch.gfn_track[i] = kvzalloc(npages *
-					    sizeof(*slot->arch.gfn_track[i]), GFP_KERNEL);
+		slot->arch.gfn_track[i] =
+			kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
+				 GFP_KERNEL);
 		if (!slot->arch.gfn_track[i])
 			goto track_free;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 71e7cda6d014..31853061ed4f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8871,13 +8871,14 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 				      slot->base_gfn, level) + 1;
 
 		slot->arch.rmap[i] =
-			kvzalloc(lpages * sizeof(*slot->arch.rmap[i]), GFP_KERNEL);
+			kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
+				 GFP_KERNEL);
 		if (!slot->arch.rmap[i])
 			goto out_free;
 		if (i == 0)
 			continue;
 
-		linfo = kvzalloc(lpages * sizeof(*linfo), GFP_KERNEL);
+		linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL);
 		if (!linfo)
 			goto out_free;
 
-- 
cgit v1.2.3


From 42bc47b35320e0e587a88e437e18f80f9c5bcbb2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Jun 2018 14:27:11 -0700
Subject: treewide: Use array_size() in vmalloc()

The vmalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:

        vmalloc(a * b)

with:
        vmalloc(array_size(a, b))

as well as handling cases of:

        vmalloc(a * b * c)

with:

        vmalloc(array3_size(a, b, c))

This does, however, attempt to ignore constant size factors like:

        vmalloc(4 * 1024)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
  vmalloc(
-	(sizeof(TYPE)) * E
+	sizeof(TYPE) * E
  , ...)
|
  vmalloc(
-	(sizeof(THING)) * E
+	sizeof(THING) * E
  , ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
  vmalloc(
-	sizeof(u8) * (COUNT)
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(__u8) * (COUNT)
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(char) * (COUNT)
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(unsigned char) * (COUNT)
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(u8) * COUNT
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(__u8) * COUNT
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(char) * COUNT
+	COUNT
  , ...)
|
  vmalloc(
-	sizeof(unsigned char) * COUNT
+	COUNT
  , ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
  vmalloc(
-	sizeof(TYPE) * (COUNT_ID)
+	array_size(COUNT_ID, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * COUNT_ID
+	array_size(COUNT_ID, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * (COUNT_CONST)
+	array_size(COUNT_CONST, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * COUNT_CONST
+	array_size(COUNT_CONST, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(THING) * (COUNT_ID)
+	array_size(COUNT_ID, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * COUNT_ID
+	array_size(COUNT_ID, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * (COUNT_CONST)
+	array_size(COUNT_CONST, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * COUNT_CONST
+	array_size(COUNT_CONST, sizeof(THING))
  , ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

  vmalloc(
-	SIZE * COUNT
+	array_size(COUNT, SIZE)
  , ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
  vmalloc(
-	sizeof(TYPE) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(TYPE) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vmalloc(
-	sizeof(THING) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vmalloc(
-	sizeof(THING) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
  vmalloc(
-	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  vmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  vmalloc(
-	sizeof(THING1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  vmalloc(
-	sizeof(THING1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  vmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
|
  vmalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
  vmalloc(
-	(COUNT) * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	COUNT * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	COUNT * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	(COUNT) * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	COUNT * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	(COUNT) * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	(COUNT) * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vmalloc(
-	COUNT * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
)

// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
  vmalloc(C1 * C2 * C3, ...)
|
  vmalloc(
-	E1 * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
)

// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@

(
  vmalloc(C1 * C2, ...)
|
  vmalloc(
-	E1 * E2
+	array_size(E1, E2)
  , ...)
)

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/powerpc/kernel/rtasd.c         | 3 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
 arch/s390/hypfs/hypfs_diag.c        | 2 +-
 arch/s390/kernel/module.c           | 4 ++--
 arch/s390/kernel/sthyi.c            | 2 +-
 arch/s390/kvm/gaccess.c             | 2 +-
 arch/s390/kvm/kvm-s390.c            | 2 +-
 arch/x86/kvm/cpuid.c                | 5 +++--
 8 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index f915db93cd42..44d66c33d59d 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -559,7 +559,8 @@ static int __init rtas_event_scan_init(void)
 	rtas_error_log_max = rtas_get_error_log_max();
 	rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
 
-	rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER);
+	rtas_log_buf = vmalloc(array_size(LOG_NUMBER,
+					  rtas_error_log_buffer_max));
 	if (!rtas_log_buf) {
 		printk(KERN_ERR "rtasd: no memory\n");
 		return -ENOMEM;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index a670fa5fbe50..1b3fcafc685e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -108,7 +108,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
 	npte = 1ul << (order - 4);
 
 	/* Allocate reverse map array */
-	rev = vmalloc(sizeof(struct revmap_entry) * npte);
+	rev = vmalloc(array_size(npte, sizeof(struct revmap_entry)));
 	if (!rev) {
 		if (cma)
 			kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index be8cc53204b5..a2945b289a29 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -239,7 +239,7 @@ static void *page_align_ptr(void *ptr)
 static void *diag204_alloc_vbuf(int pages)
 {
 	/* The buffer has to be page aligned! */
-	diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1));
+	diag204_buf_vmalloc = vmalloc(array_size(PAGE_SIZE, (pages + 1)));
 	if (!diag204_buf_vmalloc)
 		return ERR_PTR(-ENOMEM);
 	diag204_buf = page_align_ptr(diag204_buf_vmalloc);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 0dc8ac8548ee..d298d3cb46d0 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -123,8 +123,8 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
 	/* Allocate one syminfo structure per symbol. */
 	me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
-	me->arch.syminfo = vmalloc(me->arch.nsyms *
-				   sizeof(struct mod_arch_syminfo));
+	me->arch.syminfo = vmalloc(array_size(sizeof(struct mod_arch_syminfo),
+					      me->arch.nsyms));
 	if (!me->arch.syminfo)
 		return -ENOMEM;
 	symbols = (void *) hdr + symtab->sh_offset;
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 80b862e9c53c..0859cde36f75 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -315,7 +315,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
 	if (pages <= 0)
 		return;
 
-	diag204_buf = vmalloc(PAGE_SIZE * pages);
+	diag204_buf = vmalloc(array_size(pages, PAGE_SIZE));
 	if (!diag204_buf)
 		return;
 
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 8e2b8647ee12..07d30ffcfa41 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -847,7 +847,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
 	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
 	pages = pages_array;
 	if (nr_pages > ARRAY_SIZE(pages_array))
-		pages = vmalloc(nr_pages * sizeof(unsigned long));
+		pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
 	if (!pages)
 		return -ENOMEM;
 	need_ipte_lock = psw_bits(*psw).dat && !asce.r;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 64c986243018..3f6625c64341 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1725,7 +1725,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
 	if (args->count == 0)
 		return 0;
 
-	bits = vmalloc(sizeof(*bits) * args->count);
+	bits = vmalloc(array_size(sizeof(*bits), args->count));
 	if (!bits)
 		return -ENOMEM;
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f4f30d0c25c4..66fc27b92c59 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -203,8 +203,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 		goto out;
 	r = -ENOMEM;
 	if (cpuid->nent) {
-		cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) *
-					cpuid->nent);
+		cpuid_entries =
+			vmalloc(array_size(sizeof(struct kvm_cpuid_entry),
+					   cpuid->nent));
 		if (!cpuid_entries)
 			goto out;
 		r = -EFAULT;
-- 
cgit v1.2.3


From fad953ce0b22cfd352a9a90b070c34b8791e6868 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Jun 2018 14:27:37 -0700
Subject: treewide: Use array_size() in vzalloc()

The vzalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:

        vzalloc(a * b)

with:
        vzalloc(array_size(a, b))

as well as handling cases of:

        vzalloc(a * b * c)

with:

        vzalloc(array3_size(a, b, c))

This does, however, attempt to ignore constant size factors like:

        vzalloc(4 * 1024)

though any constants defined via macros get caught up in the conversion.

Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.

The Coccinelle script used for this was:

// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@

(
  vzalloc(
-	(sizeof(TYPE)) * E
+	sizeof(TYPE) * E
  , ...)
|
  vzalloc(
-	(sizeof(THING)) * E
+	sizeof(THING) * E
  , ...)
)

// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@

(
  vzalloc(
-	sizeof(u8) * (COUNT)
+	COUNT
  , ...)
|
  vzalloc(
-	sizeof(__u8) * (COUNT)
+	COUNT
  , ...)
|
  vzalloc(
-	sizeof(char) * (COUNT)
+	COUNT
  , ...)
|
  vzalloc(
-	sizeof(unsigned char) * (COUNT)
+	COUNT
  , ...)
|
  vzalloc(
-	sizeof(u8) * COUNT
+	COUNT
  , ...)
|
  vzalloc(
-	sizeof(__u8) * COUNT
+	COUNT
  , ...)
|
  vzalloc(
-	sizeof(char) * COUNT
+	COUNT
  , ...)
|
  vzalloc(
-	sizeof(unsigned char) * COUNT
+	COUNT
  , ...)
)

// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@

(
  vzalloc(
-	sizeof(TYPE) * (COUNT_ID)
+	array_size(COUNT_ID, sizeof(TYPE))
  , ...)
|
  vzalloc(
-	sizeof(TYPE) * COUNT_ID
+	array_size(COUNT_ID, sizeof(TYPE))
  , ...)
|
  vzalloc(
-	sizeof(TYPE) * (COUNT_CONST)
+	array_size(COUNT_CONST, sizeof(TYPE))
  , ...)
|
  vzalloc(
-	sizeof(TYPE) * COUNT_CONST
+	array_size(COUNT_CONST, sizeof(TYPE))
  , ...)
|
  vzalloc(
-	sizeof(THING) * (COUNT_ID)
+	array_size(COUNT_ID, sizeof(THING))
  , ...)
|
  vzalloc(
-	sizeof(THING) * COUNT_ID
+	array_size(COUNT_ID, sizeof(THING))
  , ...)
|
  vzalloc(
-	sizeof(THING) * (COUNT_CONST)
+	array_size(COUNT_CONST, sizeof(THING))
  , ...)
|
  vzalloc(
-	sizeof(THING) * COUNT_CONST
+	array_size(COUNT_CONST, sizeof(THING))
  , ...)
)

// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@

  vzalloc(
-	SIZE * COUNT
+	array_size(COUNT, SIZE)
  , ...)

// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@

(
  vzalloc(
-	sizeof(TYPE) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vzalloc(
-	sizeof(TYPE) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vzalloc(
-	sizeof(TYPE) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vzalloc(
-	sizeof(TYPE) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(TYPE))
  , ...)
|
  vzalloc(
-	sizeof(THING) * (COUNT) * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vzalloc(
-	sizeof(THING) * (COUNT) * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vzalloc(
-	sizeof(THING) * COUNT * (STRIDE)
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
|
  vzalloc(
-	sizeof(THING) * COUNT * STRIDE
+	array3_size(COUNT, STRIDE, sizeof(THING))
  , ...)
)

// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@

(
  vzalloc(
-	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  vzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
  , ...)
|
  vzalloc(
-	sizeof(THING1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  vzalloc(
-	sizeof(THING1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
  , ...)
|
  vzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * COUNT
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
|
  vzalloc(
-	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
  , ...)
)

// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@

(
  vzalloc(
-	(COUNT) * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vzalloc(
-	COUNT * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vzalloc(
-	COUNT * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vzalloc(
-	(COUNT) * (STRIDE) * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vzalloc(
-	COUNT * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vzalloc(
-	(COUNT) * STRIDE * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vzalloc(
-	(COUNT) * (STRIDE) * (SIZE)
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
|
  vzalloc(
-	COUNT * STRIDE * SIZE
+	array3_size(COUNT, STRIDE, SIZE)
  , ...)
)

// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@

(
  vzalloc(C1 * C2 * C3, ...)
|
  vzalloc(
-	E1 * E2 * E3
+	array3_size(E1, E2, E3)
  , ...)
)

// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@

(
  vzalloc(C1 * C2, ...)
|
  vzalloc(
-	E1 * E2
+	array_size(E1, E2)
  , ...)
)

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/powerpc/kvm/book3s_hv.c        | 2 +-
 arch/powerpc/mm/mmu_context_iommu.c | 2 +-
 arch/x86/kvm/cpuid.c                | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cb6d2313b19f..746645cd2ba7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3548,7 +3548,7 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
 static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
 					 unsigned long npages)
 {
-	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+	slot->arch.rmap = vzalloc(array_size(npages, sizeof(*slot->arch.rmap)));
 	if (!slot->arch.rmap)
 		return -ENOMEM;
 
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 4c615fcb0cf0..abb43646927a 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -159,7 +159,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
 		goto unlock_exit;
 	}
 
-	mem->hpas = vzalloc(entries * sizeof(mem->hpas[0]));
+	mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
 	if (!mem->hpas) {
 		kfree(mem);
 		ret = -ENOMEM;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 66fc27b92c59..812cada68e0f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -785,7 +785,8 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
 		return -EINVAL;
 
 	r = -ENOMEM;
-	cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
+	cpuid_entries = vzalloc(array_size(sizeof(struct kvm_cpuid_entry2),
+					   cpuid->nent));
 	if (!cpuid_entries)
 		goto out;
 
-- 
cgit v1.2.3


From b71dc519a993d10f5db416c82b174f60e644ac3a Mon Sep 17 00:00:00 2001
From: Cameron Kaiser <spectre@floodgap.com>
Date: Tue, 5 Jun 2018 07:48:55 -0700
Subject: KVM: PPC: Book3S PR: Handle additional interrupt types

This adds trivial handling for additional interrupt types that KVM-PR must
support for proper virtualization on a POWER9 host in HPT mode, as a further
prerequisite to enabling KVM-PR on that configuration.

Signed-off-by: Cameron Kaiser <spectre@floodgap.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index e96ead92ae48..a3569165a970 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1253,10 +1253,13 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_EXTERNAL:
 	case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
 	case BOOK3S_INTERRUPT_EXTERNAL_HV:
+	case BOOK3S_INTERRUPT_H_VIRT:
 		vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
+	case BOOK3S_INTERRUPT_HMI:
 	case BOOK3S_INTERRUPT_PERFMON:
+	case BOOK3S_INTERRUPT_SYSTEM_RESET:
 		r = RESUME_GUEST;
 		break;
 	case BOOK3S_INTERRUPT_PROGRAM:
-- 
cgit v1.2.3


From 916ccadccdcd8a0b7184dce37066a9fb2f9b4195 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 7 Jun 2018 18:04:37 +1000
Subject: KVM: PPC: Book3S PR: Fix MSR setting when delivering interrupts

This makes sure that MSR "partial-function" bits are not transferred
to SRR1 when delivering an interrupt.  This was causing failures in
guests running kernels that include commit f3d96e698ed0 ("powerpc/mm:
Overhaul handling of bad page faults", 2017-07-19), which added code
to check bits of SRR1 on instruction storage interrupts (ISIs) that
indicate a bad page fault.  The symptom was that a guest user program
that handled a signal and attempted to return from the signal handler
would get a SIGBUS signal and die.

The code that generated ISIs and some other interrupts would
previously set bits in the guest MSR to indicate the interrupt status
and then call kvmppc_book3s_queue_irqprio().  This technique no
longer works now that kvmppc_inject_interrupt() is masking off those
bits.  Instead we make kvmppc_core_queue_data_storage() and
kvmppc_core_queue_inst_storage() call kvmppc_inject_interrupt()
directly, and make sure that all the places that generate ISIs or
DSIs call kvmppc_core_queue_{data,inst}_storage instead of
kvmppc_book3s_queue_irqprio().

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s.c    | 13 +++++--------
 arch/powerpc/kvm/book3s_pr.c | 42 +++++++++++++++++-------------------------
 2 files changed, 22 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 309c8cf8fed4..edaf4720d156 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -134,7 +134,7 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
 	kvmppc_unfixup_split_real(vcpu);
 	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
-	kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
+	kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags);
 	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
 	vcpu->arch.mmu.reset_msr(vcpu);
 }
@@ -256,18 +256,15 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
 {
 	kvmppc_set_dar(vcpu, dar);
 	kvmppc_set_dsisr(vcpu, flags);
-	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
 }
-EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage);	/* used by kvm_hv */
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage);
 
 void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
 {
-	u64 msr = kvmppc_get_msr(vcpu);
-	msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
-	msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
-	kvmppc_set_msr_fast(vcpu, msr);
-	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, flags);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_inst_storage);
 
 static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
 					 unsigned int priority)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index a3569165a970..e8036ddeded1 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -728,24 +728,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.may_execute = !data;
 	}
 
-	if (page_found == -ENOENT) {
-		/* Page not found in guest PTE entries */
-		u64 ssrr1 = vcpu->arch.shadow_srr1;
-		u64 msr = kvmppc_get_msr(vcpu);
-		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
-		kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr);
-		kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
-		kvmppc_book3s_queue_irqprio(vcpu, vec);
-	} else if (page_found == -EPERM) {
-		/* Storage protection */
-		u32 dsisr = vcpu->arch.fault_dsisr;
-		u64 ssrr1 = vcpu->arch.shadow_srr1;
-		u64 msr = kvmppc_get_msr(vcpu);
-		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
-		dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT;
-		kvmppc_set_dsisr(vcpu, dsisr);
-		kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
-		kvmppc_book3s_queue_irqprio(vcpu, vec);
+	if (page_found == -ENOENT || page_found == -EPERM) {
+		/* Page not found in guest PTE entries, or protection fault */
+		u64 flags;
+
+		if (page_found == -EPERM)
+			flags = DSISR_PROTFAULT;
+		else
+			flags = DSISR_NOHPTE;
+		if (data) {
+			flags |= vcpu->arch.fault_dsisr & DSISR_ISSTORE;
+			kvmppc_core_queue_data_storage(vcpu, eaddr, flags);
+		} else {
+			kvmppc_core_queue_inst_storage(vcpu, flags);
+		}
 	} else if (page_found == -EINVAL) {
 		/* Page not found in guest SLB */
 		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
@@ -1178,10 +1174,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
 			r = RESUME_GUEST;
 		} else {
-			u64 msr = kvmppc_get_msr(vcpu);
-			msr |= shadow_srr1 & 0x58000000;
-			kvmppc_set_msr_fast(vcpu, msr);
-			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			kvmppc_core_queue_inst_storage(vcpu,
+						shadow_srr1 & 0x58000000);
 			r = RESUME_GUEST;
 		}
 		break;
@@ -1220,9 +1214,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
 			srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		} else {
-			kvmppc_set_dar(vcpu, dar);
-			kvmppc_set_dsisr(vcpu, fault_dsisr);
-			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr);
 			r = RESUME_GUEST;
 		}
 		break;
-- 
cgit v1.2.3


From a50623fb57002207b9b2636e1d3bd5cf17a3e74f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 7 Jun 2018 18:06:21 +1000
Subject: KVM: PPC: Book3S PR: Fix failure status setting in treclaim.
 emulation

The treclaim. emulation needs to record failure status in the TEXASR
register if the transaction had not previously failed.  However, the
current code first does kvmppc_save_tm_pr() (which does a treclaim.
itself) and then checks the failure summary bit in TEXASR after that.
Since treclaim. itself causes transaction failure, the FS bit is
always set, so we were never updating TEXASR with the failure cause
supplied by the guest as the RA parameter to the treclaim. instruction.
This caused the tm-unavailable test in tools/testing/selftests/powerpc/tm
to fail.

To fix this, we need to read TEXASR before calling kvmppc_save_tm_pr(),
and base the final value of TEXASR on that value.

Fixes: 03c81682a90b ("KVM: PPC: Book3S PR: Add emulation for treclaim.")
Reviewed-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_emulate.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index fdbc695038dc..05cac5ea79c5 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -138,6 +138,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
 {
 	unsigned long guest_msr = kvmppc_get_msr(vcpu);
 	int fc_val = ra_val ? ra_val : 1;
+	uint64_t texasr;
 
 	/* CR0 = 0 | MSR[TS] | 0 */
 	vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
@@ -145,25 +146,26 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
 		 << CR0_SHIFT);
 
 	preempt_disable();
+	tm_enable();
+	texasr = mfspr(SPRN_TEXASR);
 	kvmppc_save_tm_pr(vcpu);
 	kvmppc_copyfrom_vcpu_tm(vcpu);
 
-	tm_enable();
-	vcpu->arch.texasr = mfspr(SPRN_TEXASR);
 	/* failure recording depends on Failure Summary bit */
-	if (!(vcpu->arch.texasr & TEXASR_FS)) {
-		vcpu->arch.texasr &= ~TEXASR_FC;
-		vcpu->arch.texasr |= ((u64)fc_val << TEXASR_FC_LG);
+	if (!(texasr & TEXASR_FS)) {
+		texasr &= ~TEXASR_FC;
+		texasr |= ((u64)fc_val << TEXASR_FC_LG) | TEXASR_FS;
 
-		vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV);
+		texasr &= ~(TEXASR_PR | TEXASR_HV);
 		if (kvmppc_get_msr(vcpu) & MSR_PR)
-			vcpu->arch.texasr |= TEXASR_PR;
+			texasr |= TEXASR_PR;
 
 		if (kvmppc_get_msr(vcpu) & MSR_HV)
-			vcpu->arch.texasr |= TEXASR_HV;
+			texasr |= TEXASR_HV;
 
+		vcpu->arch.texasr = texasr;
 		vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
-		mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+		mtspr(SPRN_TEXASR, texasr);
 		mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
 	}
 	tm_disable();
-- 
cgit v1.2.3


From 4f169d21181faad87a6cdb288742e08c808ec0ef Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 7 Jun 2018 18:07:06 +1000
Subject: KVM: PPC: Book3S PR: Don't let PAPR guest set MSR hypervisor bit

PAPR guests run in supervisor mode and should not be able to set the
MSR HV (hypervisor mode) bit or clear the ME (machine check enable)
bit by mtmsrd or any other means.  To enforce this, we force MSR_HV
off and MSR_ME on in kvmppc_set_msr_pr.  Without this, the guest
can appear to be in hypervisor mode to itself and to userspace.
This has been observed to cause a crash in QEMU when it tries to
deliver a system reset interrupt to the guest.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index e8036ddeded1..5c99b84e0856 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -456,6 +456,10 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 {
 	ulong old_msr;
 
+	/* For PAPR guest, make sure MSR reflects guest mode */
+	if (vcpu->arch.papr_enabled)
+		msr = (msr & ~MSR_HV) | MSR_ME;
+
 #ifdef EXIT_DEBUG
 	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
-- 
cgit v1.2.3


From db96a04a86c73817b4584aa4fa2a3f60a9aa3c52 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 7 Jun 2018 18:08:02 +1000
Subject: KVM: PPC: Book3S PR: Enable use on POWER9 bare-metal hosts in HPT
 mode

It turns out that PR KVM has no dependency on the format of HPTEs,
because it uses functions pointed to by mmu_hash_ops which do all
the formatting and interpretation of HPTEs.  Thus we can allow PR
KVM to load on POWER9 bare-metal hosts as long as they are running
in HPT mode.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_pr.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 5c99b84e0856..c3b8006f0eac 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -2041,13 +2041,9 @@ static int kvmppc_core_check_processor_compat_pr(void)
 	 * PR KVM can work on POWER9 inside a guest partition
 	 * running in HPT mode.  It can't work if we are using
 	 * radix translation (because radix provides no way for
-	 * a process to have unique translations in quadrant 3)
-	 * or in a bare-metal HPT-mode host (because POWER9
-	 * uses a modified HPTE format which the PR KVM code
-	 * has not been adapted to use).
+	 * a process to have unique translations in quadrant 3).
 	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300) &&
-	    (radix_enabled() || cpu_has_feature(CPU_FTR_HVMODE)))
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
 		return -EIO;
 	return 0;
 }
-- 
cgit v1.2.3


From f61e0d3cc4aee194014074471658a5a037e311ce Mon Sep 17 00:00:00 2001
From: Simon Guo <wei.guo.simon@gmail.com>
Date: Fri, 8 Jun 2018 01:40:03 -0400
Subject: KVM: PPC: Book3S PR: Fix failure status setting in tabort. emulation

tabort. will perform transaction failure recording and the recording
depends on TEXASR FS bit. Currently the TEXASR FS bit is retrieved
after tabort., when the TEXASR FS bit is already been updated by
tabort. itself.

This patch corrects this behavior by retrieving TEXASR val before
tabort.

tabort. will not immediately leads to transaction failure handling
in suspend state. So this patch also remove the mtspr on TEXASR/TFIAR
registers to avoid TM bad thing exception.

Fixes: 26798f88d58d ("KVM: PPC: Book3S PR: Add emulation for tabort. in privileged state")
Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_emulate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 05cac5ea79c5..36b11c5a0dbb 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -212,9 +212,11 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
 	 * present.
 	 */
 	unsigned long guest_msr = kvmppc_get_msr(vcpu);
+	uint64_t org_texasr;
 
 	preempt_disable();
 	tm_enable();
+	org_texasr = mfspr(SPRN_TEXASR);
 	tm_abort(ra_val);
 
 	/* CR0 = 0 | MSR[TS] | 0 */
@@ -227,7 +229,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
 	 * and tabort will be treated as nops in non-transactional
 	 * state.
 	 */
-	if (!(vcpu->arch.texasr & TEXASR_FS) &&
+	if (!(org_texasr & TEXASR_FS) &&
 			MSR_TM_ACTIVE(guest_msr)) {
 		vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV);
 		if (guest_msr & MSR_PR)
@@ -237,8 +239,6 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
 			vcpu->arch.texasr |= TEXASR_HV;
 
 		vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
-		mtspr(SPRN_TEXASR, vcpu->arch.texasr);
-		mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
 	}
 	tm_disable();
 	preempt_enable();
-- 
cgit v1.2.3


From dbee3d02458b129b847c21f5fa60baba3eafc6f7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 12 Jun 2018 20:28:00 -0700
Subject: KVM: x86: VMX: fix build without hyper-v

Commit ceef7d10dfb6 ("KVM: x86: VMX: hyper-v: Enlightened MSR-Bitmap
support") broke the build with Hyper-V disabled, because it accesses
ms_hyperv.nested_features without checking if that exists.

This is the quick-and-hacky build fix.

I suspect the proper fix is to replace the

    static_branch_unlikely(&enable_evmcs)

tests with an inline helper function that also checks that CONFIG_HYPERV
is enabled, since without that, enable_evmcs makes no sense.

But I want a working build environment first and foremost, and I'm upset
this slipped through in the first place.  My primary build tests missed
it because I tend to build with everything enabled, but it should have
been caught in the kvm tree.

Fixes: ceef7d10dfb6 ("KVM: x86: VMX: hyper-v: Enlightened MSR-Bitmap support")
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kvm/vmx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fc61e25966e4..d0dd35d582da 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4429,6 +4429,7 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 			goto out_vmcs;
 		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
 
+#if IS_ENABLED(CONFIG_HYPERV)
 		if (static_branch_unlikely(&enable_evmcs) &&
 		    (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
 			struct hv_enlightened_vmcs *evmcs =
@@ -4436,6 +4437,8 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 
 			evmcs->hv_enlightenments_control.msr_bitmap = 1;
 		}
+#endif
+
 	}
 	return 0;
 
-- 
cgit v1.2.3


From 050e9baa9dc9fbd9ce2b27f0056990fc9e0a08a0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 14 Jun 2018 12:21:18 +0900
Subject: Kbuild: rename CC_STACKPROTECTOR[_STRONG] config variables

The changes to automatically test for working stack protector compiler
support in the Kconfig files removed the special STACKPROTECTOR_AUTO
option that picked the strongest stack protector that the compiler
supported.

That was all a nice cleanup - it makes no sense to have the AUTO case
now that the Kconfig phase can just determine the compiler support
directly.

HOWEVER.

It also meant that doing "make oldconfig" would now _disable_ the strong
stackprotector if you had AUTO enabled, because in a legacy config file,
the sane stack protector configuration would look like

  CONFIG_HAVE_CC_STACKPROTECTOR=y
  # CONFIG_CC_STACKPROTECTOR_NONE is not set
  # CONFIG_CC_STACKPROTECTOR_REGULAR is not set
  # CONFIG_CC_STACKPROTECTOR_STRONG is not set
  CONFIG_CC_STACKPROTECTOR_AUTO=y

and when you ran this through "make oldconfig" with the Kbuild changes,
it would ask you about the regular CONFIG_CC_STACKPROTECTOR (that had
been renamed from CONFIG_CC_STACKPROTECTOR_REGULAR to just
CONFIG_CC_STACKPROTECTOR), but it would think that the STRONG version
used to be disabled (because it was really enabled by AUTO), and would
disable it in the new config, resulting in:

  CONFIG_HAVE_CC_STACKPROTECTOR=y
  CONFIG_CC_HAS_STACKPROTECTOR_NONE=y
  CONFIG_CC_STACKPROTECTOR=y
  # CONFIG_CC_STACKPROTECTOR_STRONG is not set
  CONFIG_CC_HAS_SANE_STACKPROTECTOR=y

That's dangerously subtle - people could suddenly find themselves with
the weaker stack protector setup without even realizing.

The solution here is to just rename not just the old RECULAR stack
protector option, but also the strong one.  This does that by just
removing the CC_ prefix entirely for the user choices, because it really
is not about the compiler support (the compiler support now instead
automatially impacts _visibility_ of the options to users).

This results in "make oldconfig" actually asking the user for their
choice, so that we don't have any silent subtle security model changes.
The end result would generally look like this:

  CONFIG_HAVE_CC_STACKPROTECTOR=y
  CONFIG_CC_HAS_STACKPROTECTOR_NONE=y
  CONFIG_STACKPROTECTOR=y
  CONFIG_STACKPROTECTOR_STRONG=y
  CONFIG_CC_HAS_SANE_STACKPROTECTOR=y

where the "CC_" versions really are about internal compiler
infrastructure, not the user selections.

Acked-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig                          | 6 +++---
 arch/arm/kernel/asm-offsets.c         | 2 +-
 arch/arm/kernel/entry-armv.S          | 4 ++--
 arch/arm/kernel/process.c             | 2 +-
 arch/arm64/kernel/process.c           | 2 +-
 arch/mips/kernel/asm-offsets.c        | 2 +-
 arch/mips/kernel/octeon_switch.S      | 2 +-
 arch/mips/kernel/process.c            | 2 +-
 arch/mips/kernel/r2300_switch.S       | 2 +-
 arch/mips/kernel/r4k_switch.S         | 2 +-
 arch/sh/kernel/process.c              | 2 +-
 arch/sh/kernel/process_32.c           | 2 +-
 arch/x86/entry/entry_32.S             | 2 +-
 arch/x86/entry/entry_64.S             | 2 +-
 arch/x86/include/asm/processor.h      | 2 +-
 arch/x86/include/asm/segment.h        | 2 +-
 arch/x86/include/asm/stackprotector.h | 6 +++---
 arch/x86/kernel/asm-offsets.c         | 2 +-
 arch/x86/kernel/asm-offsets_32.c      | 2 +-
 arch/x86/kernel/asm-offsets_64.c      | 2 +-
 arch/x86/kernel/cpu/common.c          | 2 +-
 arch/x86/kernel/head_32.S             | 2 +-
 arch/xtensa/kernel/asm-offsets.c      | 2 +-
 arch/xtensa/kernel/entry.S            | 2 +-
 arch/xtensa/kernel/process.c          | 2 +-
 25 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index ebbb45096191..c302b3dd0058 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -558,7 +558,7 @@ config HAVE_CC_STACKPROTECTOR
 config CC_HAS_STACKPROTECTOR_NONE
 	def_bool $(cc-option,-fno-stack-protector)
 
-config CC_STACKPROTECTOR
+config STACKPROTECTOR
 	bool "Stack Protector buffer overflow detection"
 	depends on HAVE_CC_STACKPROTECTOR
 	depends on $(cc-option,-fstack-protector)
@@ -582,9 +582,9 @@ config CC_STACKPROTECTOR
 	  about 3% of all kernel functions, which increases kernel code size
 	  by about 0.3%.
 
-config CC_STACKPROTECTOR_STRONG
+config STACKPROTECTOR_STRONG
 	bool "Strong Stack Protector"
-	depends on CC_STACKPROTECTOR
+	depends on STACKPROTECTOR
 	depends on $(cc-option,-fstack-protector-strong)
 	default y
 	help
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 27c5381518d8..974d8d7d1bcd 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -61,7 +61,7 @@
 int main(void)
 {
   DEFINE(TSK_ACTIVE_MM,		offsetof(struct task_struct, active_mm));
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
   DEFINE(TSK_STACK_CANARY,	offsetof(struct task_struct, stack_canary));
 #endif
   BLANK();
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 1752033b0070..179a9f6bd1e3 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -791,7 +791,7 @@ ENTRY(__switch_to)
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
 	switch_tls r1, r4, r5, r3, r7
-#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
 	.if (TSK_STACK_CANARY > IMM12_MASK)
@@ -807,7 +807,7 @@ ENTRY(__switch_to)
 	ldr	r0, =thread_notify_head
 	mov	r1, #THREAD_NOTIFY_SWITCH
 	bl	atomic_notifier_call_chain
-#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	str	r7, [r8]
 #endif
  THUMB(	mov	ip, r4			   )
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 1523cb18b109..225d1c58d2de 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,7 +39,7 @@
 #include <asm/tls.h>
 #include <asm/vdso.h>
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index f08a2ed9db0d..e10bc363f533 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -59,7 +59,7 @@
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index c1cd41456d42..cbe4742d2fff 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -83,7 +83,7 @@ void output_task_defines(void)
 	OFFSET(TASK_FLAGS, task_struct, flags);
 	OFFSET(TASK_MM, task_struct, mm);
 	OFFSET(TASK_PID, task_struct, pid);
-#if defined(CONFIG_CC_STACKPROTECTOR)
+#if defined(CONFIG_STACKPROTECTOR)
 	OFFSET(TASK_STACK_CANARY, task_struct, stack_canary);
 #endif
 	DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct));
diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S
index e42113fe2762..896080b445c2 100644
--- a/arch/mips/kernel/octeon_switch.S
+++ b/arch/mips/kernel/octeon_switch.S
@@ -61,7 +61,7 @@
 #endif
 3:
 
-#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	PTR_LA	t8, __stack_chk_guard
 	LONG_L	t9, TASK_STACK_CANARY(a1)
 	LONG_S	t9, 0(t8)
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 3775a8d694fb..8d85046adcc8 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -180,7 +180,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S
index 665897139f30..71b1aafae1bb 100644
--- a/arch/mips/kernel/r2300_switch.S
+++ b/arch/mips/kernel/r2300_switch.S
@@ -36,7 +36,7 @@ LEAF(resume)
 	cpu_save_nonscratch a0
 	sw	ra, THREAD_REG31(a0)
 
-#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	PTR_LA	t8, __stack_chk_guard
 	LONG_L	t9, TASK_STACK_CANARY(a1)
 	LONG_S	t9, 0(t8)
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 17cf9341c1cf..58232ae6cfae 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -31,7 +31,7 @@
 	cpu_save_nonscratch a0
 	LONG_S	ra, THREAD_REG31(a0)
 
-#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	PTR_LA	t8, __stack_chk_guard
 	LONG_L	t9, TASK_STACK_CANARY(a1)
 	LONG_S	t9, 0(t8)
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 68b1a67533ce..4d1bfc848dd3 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -12,7 +12,7 @@
 struct kmem_cache *task_xstate_cachep = NULL;
 unsigned int xstate_size;
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
 #endif
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 93522069cb15..27fddb56b3e1 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -177,7 +177,7 @@ __switch_to(struct task_struct *prev, struct task_struct *next)
 {
 	struct thread_struct *next_t = &next->thread;
 
-#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	__stack_chk_guard = next->stack_canary;
 #endif
 
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index bef8e2b202a8..2582881d19ce 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -239,7 +239,7 @@ ENTRY(__switch_to_asm)
 	movl	%esp, TASK_threadsp(%eax)
 	movl	TASK_threadsp(%edx), %esp
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 	movl	TASK_stack_canary(%edx), %ebx
 	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3166b9674429..73a522d53b53 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -357,7 +357,7 @@ ENTRY(__switch_to_asm)
 	movq	%rsp, TASK_threadsp(%rdi)
 	movq	TASK_threadsp(%rsi), %rsp
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 	movq	TASK_stack_canary(%rsi), %rbx
 	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e28add6b791f..cfd29ee8c3da 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -412,7 +412,7 @@ extern asmlinkage void ignore_sysret(void);
 void save_fsgs_for_kvm(void);
 #endif
 #else	/* X86_64 */
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 /*
  * Make sure stack canary segment base is cached-aligned:
  *   "For Intel Atom processors, avoid non zero segment base address
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 8f09012b92e7..e293c122d0d5 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -146,7 +146,7 @@
 # define __KERNEL_PERCPU		0
 #endif
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 # define __KERNEL_STACK_CANARY		(GDT_ENTRY_STACK_CANARY*8)
 #else
 # define __KERNEL_STACK_CANARY		0
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 371b3a4af000..8ec97a62c245 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -34,7 +34,7 @@
 #ifndef _ASM_STACKPROTECTOR_H
 #define _ASM_STACKPROTECTOR_H 1
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 
 #include <asm/tsc.h>
 #include <asm/processor.h>
@@ -105,7 +105,7 @@ static inline void load_stack_canary_segment(void)
 #endif
 }
 
-#else	/* CC_STACKPROTECTOR */
+#else	/* STACKPROTECTOR */
 
 #define GDT_STACK_CANARY_INIT
 
@@ -121,5 +121,5 @@ static inline void load_stack_canary_segment(void)
 #endif
 }
 
-#endif	/* CC_STACKPROTECTOR */
+#endif	/* STACKPROTECTOR */
 #endif	/* _ASM_STACKPROTECTOR_H */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 76417a9aab73..dcb008c320fe 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -32,7 +32,7 @@
 void common(void) {
 	BLANK();
 	OFFSET(TASK_threadsp, task_struct, thread.sp);
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 	OFFSET(TASK_stack_canary, task_struct, stack_canary);
 #endif
 
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index f91ba53e06c8..a4a3be399f4b 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -50,7 +50,7 @@ void foo(void)
 	DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
 	       offsetofend(struct cpu_entry_area, entry_stack_page.stack));
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 	BLANK();
 	OFFSET(stack_canary_offset, stack_canary, canary);
 #endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bf51e51d808d..b2dcd161f514 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -69,7 +69,7 @@ int main(void)
 	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
 	BLANK();
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 	DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
 	BLANK();
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 910b47ee8078..0df7151cfef4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1599,7 +1599,7 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
 	(unsigned long)&init_thread_union + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b59e4fb40fd9..abe6df15a8fb 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -375,7 +375,7 @@ ENDPROC(startup_32_smp)
  */
 __INIT
 setup_once:
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 	/*
 	 * Configure the stack canary. The linker can't handle this by
 	 * relocation.  Manually set base address in stack canary
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 022cf918ec20..67904f55f188 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -76,7 +76,7 @@ int main(void)
 	DEFINE(TASK_PID, offsetof (struct task_struct, pid));
 	DEFINE(TASK_THREAD, offsetof (struct task_struct, thread));
 	DEFINE(TASK_THREAD_INFO, offsetof (struct task_struct, stack));
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 	DEFINE(TASK_STACK_CANARY, offsetof(struct task_struct, stack_canary));
 #endif
 	DEFINE(TASK_STRUCT_SIZE, sizeof (struct task_struct));
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 5caff0744f3c..9cbc380e9572 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1971,7 +1971,7 @@ ENTRY(_switch_to)
 	s32i	a1, a2, THREAD_SP	# save stack pointer
 #endif
 
-#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	movi	a6, __stack_chk_guard
 	l32i	a8, a3, TASK_STACK_CANARY
 	s32i	a8, a6, 0
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 8dd0593fb2c4..483dcfb6e681 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -58,7 +58,7 @@ void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
 
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
-- 
cgit v1.2.3


From 273ba45796c14b4a2b669098f13d576b9e233dd8 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 11 Jun 2018 14:12:10 -0300
Subject: KVM: x86: fix typo at kvm_arch_hardware_setup comment

Fix typo in sentence about min value calculation.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6bcecc325e7e..0046aa70205a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8567,7 +8567,7 @@ int kvm_arch_hardware_setup(void)
 		/*
 		 * Make sure the user can only configure tsc_khz values that
 		 * fit into a signed integer.
-		 * A min value is not calculated needed because it will always
+		 * A min value is not calculated because it will always
 		 * be 1 on all machines.
 		 */
 		u64 max = min(0x7fffffffULL,
-- 
cgit v1.2.3


From 1f008e114b1ba17e1d73e61149070c502174bb89 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 25 May 2018 17:36:17 +0200
Subject: KVM: x86: VMX: redo fix for link error without CONFIG_HYPERV

Arnd had sent this patch to the KVM mailing list, but it slipped through
the cracks of maintainers hand-off, and therefore wasn't included in
the pull request.

The same issue had been fixed by Linus in commit dbee3d0 ("KVM: x86:
VMX: fix build without hyper-v", 2018-06-12) as a self-described
"quick-and-hacky build fix".  However, checking the compile-time
configuration symbol with IS_ENABLED is cleaner and it is enough to
avoid the link error, so switch to Arnd's solution.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[Rewritten commit message. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d0dd35d582da..559a12b6184d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4429,16 +4429,14 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 			goto out_vmcs;
 		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
 
-#if IS_ENABLED(CONFIG_HYPERV)
-		if (static_branch_unlikely(&enable_evmcs) &&
+		if (IS_ENABLED(CONFIG_HYPERV) &&
+		    static_branch_unlikely(&enable_evmcs) &&
 		    (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
 			struct hv_enlightened_vmcs *evmcs =
 				(struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
 
 			evmcs->hv_enlightenments_control.msr_bitmap = 1;
 		}
-#endif
-
 	}
 	return 0;
 
-- 
cgit v1.2.3


From 8458f8c2d4e1f7362c65c37ccf2d1d5c00f77fa5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 14 Jun 2018 19:36:43 +0900
Subject: x86: fix dependency of X86_32_LAZY_GS

Commit 2a61f4747eea ("stack-protector: test compiler capability in
Kconfig and drop AUTO mode") replaced the 'choice' with two boolean
symbols, so CC_STACKPROTECTOR_NONE no longer exists.

Prior to commit 2bc2f688fdf8 ("Makefile: move stack-protector
availability out of Kconfig"), this line was like this:

  depends on X86_32 && !CC_STACKPROTECTOR

The CC_ prefix was dropped by commit 050e9baa9dc9 ("Kbuild: rename
CC_STACKPROTECTOR[_STRONG] config variables"), so the dependency now
should be:

  depends on X86_32 && !STACKPROTECTOR

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 455a670ab239..d6c6ee6af26b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -327,7 +327,7 @@ config X86_64_SMP
 
 config X86_32_LAZY_GS
 	def_bool y
-	depends on X86_32 && CC_STACKPROTECTOR_NONE
+	depends on X86_32 && !STACKPROTECTOR
 
 config ARCH_SUPPORTS_UPROBES
 	def_bool y
-- 
cgit v1.2.3


From d148eac0e70f06485dbd4cce6ed01cb07c650cec Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 14 Jun 2018 19:36:45 +0900
Subject: Kbuild: rename HAVE_CC_STACKPROTECTOR config variable

HAVE_CC_STACKPROTECTOR should be selected by architectures with stack
canary implementation.  It is not about the compiler support.

For the consistency with commit 050e9baa9dc9 ("Kbuild: rename
CC_STACKPROTECTOR[_STRONG] config variables"), remove 'CC_' from the
config symbol.

I moved the 'select' lines to keep the alphabetical sorting.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig        | 4 ++--
 arch/arm/Kconfig    | 2 +-
 arch/arm64/Kconfig  | 2 +-
 arch/mips/Kconfig   | 2 +-
 arch/sh/Kconfig     | 2 +-
 arch/x86/Kconfig    | 2 +-
 arch/xtensa/Kconfig | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index c302b3dd0058..47b235d43909 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -549,7 +549,7 @@ config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
 	  in structures.  This reduces the performance hit of RANDSTRUCT
 	  at the cost of weakened randomization.
 
-config HAVE_CC_STACKPROTECTOR
+config HAVE_STACKPROTECTOR
 	bool
 	help
 	  An arch should select this symbol if:
@@ -560,7 +560,7 @@ config CC_HAS_STACKPROTECTOR_NONE
 
 config STACKPROTECTOR
 	bool "Stack Protector buffer overflow detection"
-	depends on HAVE_CC_STACKPROTECTOR
+	depends on HAVE_STACKPROTECTOR
 	depends on $(cc-option,-fstack-protector)
 	default y
 	help
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2a78bdef9a24..0be4397f3ccc 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -57,7 +57,6 @@ config ARM
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARM_SMCCC if CPU_V7
 	select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
-	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_KMEMLEAK
@@ -92,6 +91,7 @@ config ARM
 	select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RSEQ
+	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UID16
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 14f204c45450..42c090cf0292 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -103,7 +103,6 @@ config ARM64
 	select HAVE_ARM_SMCCC
 	select HAVE_EBPF_JIT
 	select HAVE_C_RECORDMCOUNT
-	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_CONTEXT_TRACKING
@@ -128,6 +127,7 @@ config ARM64
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RCU_TABLE_FREE
+	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index fe98e459a416..3f9deec70b92 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -41,7 +41,6 @@ config MIPS
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT
 	select HAVE_CBPF_JIT if (!64BIT && !CPU_MICROMIPS)
 	select HAVE_EBPF_JIT if (64BIT && !CPU_MICROMIPS)
-	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_COPY_THREAD_TLS
 	select HAVE_C_RECORDMCOUNT
@@ -66,6 +65,7 @@ config MIPS
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
 	select IRQ_FORCED_THREADING
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4d61a085982b..4bedd1c97f29 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -77,7 +77,7 @@ config SUPERH32
 	select PERF_EVENTS
 	select ARCH_HIBERNATION_POSSIBLE if MMU
 	select SPARSE_IRQ
-	select HAVE_CC_STACKPROTECTOR
+	select HAVE_STACKPROTECTOR
 
 config SUPERH64
 	def_bool "$(ARCH)" = "sh64"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d6c6ee6af26b..f1dbb4ee19d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -130,7 +130,6 @@ config X86
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
 	select HAVE_ARCH_VMAP_STACK		if X86_64
 	select HAVE_ARCH_WITHIN_STACK_FRAMES
-	select HAVE_CC_STACKPROTECTOR		if CC_HAS_SANE_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_CONTEXT_TRACKING		if X86_64
@@ -182,6 +181,7 @@ config X86
 	select HAVE_RCU_TABLE_FREE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
+	select HAVE_STACKPROTECTOR		if CC_HAS_SANE_STACKPROTECTOR
 	select HAVE_STACK_VALIDATION		if X86_64
 	select HAVE_RSEQ
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 17df332269b2..d575e8701955 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -17,7 +17,6 @@ config XTENSA
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_STRNCPY_FROM_USER if KASAN
 	select HAVE_ARCH_KASAN if MMU
-	select HAVE_CC_STACKPROTECTOR
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_EXIT_THREAD
@@ -28,6 +27,7 @@ config XTENSA
 	select HAVE_MEMBLOCK
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
+	select HAVE_STACKPROTECTOR
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
 	select NO_BOOTMEM
-- 
cgit v1.2.3


From 2bdce74412c249ac01dfe36b6b0043ffd7a5361e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 14 Jun 2018 15:26:24 -0700
Subject: mm: fix devmem_is_allowed() for sub-page System RAM intersections

Hussam reports:

    I was poking around and for no real reason, I did cat /dev/mem and
    strings /dev/mem.  Then I saw the following warning in dmesg. I saved it
    and rebooted immediately.

     memremap attempted on mixed range 0x000000000009c000 size: 0x1000
     ------------[ cut here ]------------
     WARNING: CPU: 0 PID: 11810 at kernel/memremap.c:98 memremap+0x104/0x170
     [..]
     Call Trace:
      xlate_dev_mem_ptr+0x25/0x40
      read_mem+0x89/0x1a0
      __vfs_read+0x36/0x170

The memremap() implementation checks for attempts to remap System RAM
with MEMREMAP_WB and instead redirects those mapping attempts to the
linear map.  However, that only works if the physical address range
being remapped is page aligned.  In low memory we have situations like
the following:

    00000000-00000fff : Reserved
    00001000-0009fbff : System RAM
    0009fc00-0009ffff : Reserved

...where System RAM intersects Reserved ranges on a sub-page page
granularity.

Given that devmem_is_allowed() special cases any attempt to map System
RAM in the first 1MB of memory, replace page_is_ram() with the more
precise region_intersects() to trap attempts to map disallowed ranges.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199999
Link: http://lkml.kernel.org/r/152856436164.18127.2847888121707136898.stgit@dwillia2-desk3.amr.corp.intel.com
Fixes: 92281dee825f ("arch: introduce memremap()")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Hussam Al-Tayeb <me@hussam.eu.org>
Tested-by: Hussam Al-Tayeb <me@hussam.eu.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fec82b577c18..cee58a972cb2 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -706,7 +706,9 @@ void __init init_mem_mapping(void)
  */
 int devmem_is_allowed(unsigned long pagenr)
 {
-	if (page_is_ram(pagenr)) {
+	if (region_intersects(PFN_PHYS(pagenr), PAGE_SIZE,
+				IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+			!= REGION_DISJOINT) {
 		/*
 		 * For disallowed memory regions in the low 1MB range,
 		 * request that the page be shown as all zeros.
-- 
cgit v1.2.3


From 758517202bd2e427664857c9f2aa59da36848aca Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 14 Jun 2018 15:27:44 -0700
Subject: arm: port KCOV to arm

KCOV is code coverage collection facility used, in particular, by
syzkaller system call fuzzer.  There is some interest in using syzkaller
on arm devices.  So port KCOV to arm.

On implementation level this merely declares that KCOV is supported and
disables instrumentation of 3 special cases.  Reasons for disabling are
commented in code.

Tested with qemu-system-arm/vexpress-a15.

Link: http://lkml.kernel.org/r/20180511143248.112484-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Abbott Liu <liuwenliang@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Koguchi Takuo <takuo.koguchi.sw@hitachi.com>
Cc: <syzkaller@googlegroups.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig                  | 3 ++-
 arch/arm/boot/compressed/Makefile | 3 +++
 arch/arm/kvm/hyp/Makefile         | 8 ++++++++
 arch/arm/vdso/Makefile            | 3 +++
 4 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2a78bdef9a24..47eb26dc2427 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,9 +8,10 @@ config ARM
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
+	select ARCH_HAS_KCOV
 	select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
-	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_PHYS_TO_DMA
+	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_STRICT_MODULE_RWX if MMU
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index a3c5fbcad4ab..1f5a5ffe7fcf 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -25,6 +25,9 @@ endif
 
 GCOV_PROFILE		:= n
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 #
 # Architecture dependencies
 #
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 7fc0638f263a..d2b5ec9c4b92 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -23,3 +23,11 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
 CFLAGS_switch.o		   += $(CFLAGS_ARMV7VE)
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
+
+# KVM code is run at a different exception code with a different map, so
+# compiler instrumentation that inserts callbacks or checks into the code may
+# cause crashes. Just disable it.
+GCOV_PROFILE	:= n
+KASAN_SANITIZE	:= n
+UBSAN_SANITIZE	:= n
+KCOV_INSTRUMENT	:= n
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index bb4118213fee..f4efff9d3afb 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -30,6 +30,9 @@ CFLAGS_vgettimeofday.o = -O2
 # Disable gcov profiling for VDSO code
 GCOV_PROFILE := n
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
 # Force dependency
 $(obj)/vdso.o : $(obj)/vdso.so
 
-- 
cgit v1.2.3


From d7dc899abefb4412388a5d3ec690070197d07d20 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Thu, 14 Jun 2018 15:28:02 -0700
Subject: treewide: use PHYS_ADDR_MAX to avoid type casting ULLONG_MAX

With PHYS_ADDR_MAX there is now a type safe variant for all bits set.
Make use of it.

Patch created using a semantic patch as follows:

// <smpl>
@@
typedef phys_addr_t;
@@
-(phys_addr_t)ULLONG_MAX
+PHYS_ADDR_MAX
// </smpl>

Link: http://lkml.kernel.org/r/20180419214204.19322-1-stefan@agner.ch
Signed-off-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>	[arm64]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/init.c     | 6 +++---
 arch/mips/kernel/setup.c | 4 ++--
 arch/powerpc/mm/mem.c    | 2 +-
 arch/sparc/mm/init_64.c  | 2 +-
 arch/x86/mm/init_32.c    | 2 +-
 arch/x86/mm/init_64.c    | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 1b18b4722420..325cfb3b858a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -310,7 +310,7 @@ static void __init arm64_memory_present(void)
 }
 #endif
 
-static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX;
+static phys_addr_t memory_limit = PHYS_ADDR_MAX;
 
 /*
  * Limit the memory size that was specified via FDT.
@@ -401,7 +401,7 @@ void __init arm64_memblock_init(void)
 	 * high up in memory, add back the kernel region that must be accessible
 	 * via the linear mapping.
 	 */
-	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+	if (memory_limit != PHYS_ADDR_MAX) {
 		memblock_mem_limit_remove_map(memory_limit);
 		memblock_add(__pa_symbol(_text), (u64)(_end - _text));
 	}
@@ -666,7 +666,7 @@ __setup("keepinitrd", keepinitrd_setup);
  */
 static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
 {
-	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+	if (memory_limit != PHYS_ADDR_MAX) {
 		pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
 	} else {
 		pr_emerg("Memory Limit: none\n");
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 563188ac6fa2..2c96c0c68116 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -93,7 +93,7 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type)
 	 * If the region reaches the top of the physical address space, adjust
 	 * the size slightly so that (start + size) doesn't overflow
 	 */
-	if (start + size - 1 == (phys_addr_t)ULLONG_MAX)
+	if (start + size - 1 == PHYS_ADDR_MAX)
 		--size;
 
 	/* Sanity check */
@@ -376,7 +376,7 @@ static void __init bootmem_init(void)
 	unsigned long reserved_end;
 	unsigned long mapstart = ~0UL;
 	unsigned long bootmap_size;
-	phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
+	phys_addr_t ramstart = PHYS_ADDR_MAX;
 	bool bootmap_valid = false;
 	int i;
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 8cecda4bd66a..5c8530d0c611 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -215,7 +215,7 @@ void __init mem_topology_setup(void)
 	/* Place all memblock_regions in the same node and merge contiguous
 	 * memblock_regions
 	 */
-	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+	memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
 }
 
 void __init initmem_init(void)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 8aeb1aabe76e..f396048a0d68 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1620,7 +1620,7 @@ static void __init bootmem_init_nonnuma(void)
 	       (top_of_ram - total_ram) >> 20);
 
 	init_node_masks_nonnuma();
-	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+	memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
 	allocate_node_data(0);
 	node_set_online(0);
 }
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c893c6a3d707..979e0a02cbe1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -692,7 +692,7 @@ void __init initmem_init(void)
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
-	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+	memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
 	sparse_memory_present_with_active_regions(0);
 
 #ifdef CONFIG_FLATMEM
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 17383f9677fa..045f492d5f68 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -742,7 +742,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 #ifndef CONFIG_NUMA
 void __init initmem_init(void)
 {
-	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+	memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
 }
 #endif
 
-- 
cgit v1.2.3


From 2738f359b1dcae50b704efe4ab799ea4861fc490 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 14 Jun 2018 15:28:09 -0700
Subject: hexagon: fix printk format warning in setup.c

Fix printk format warning in hexagon/kernel/setup.c:

../arch/hexagon/kernel/setup.c: In function 'setup_arch':
../arch/hexagon/kernel/setup.c:69:2: warning: format '%x' expects argument of type 'unsigned int', but argument 2 has type 'long unsigned int' [-Wformat]

where:
extern unsigned long	__phys_offset;
#define PHYS_OFFSET	__phys_offset

Link: http://lkml.kernel.org/r/adce8db5-4b01-dc10-7fbb-6a64e0787eb5@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/hexagon/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
index 6981949f5df3..dc8c7e75b5d1 100644
--- a/arch/hexagon/kernel/setup.c
+++ b/arch/hexagon/kernel/setup.c
@@ -66,7 +66,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	__vmsetvec(_K_VM_event_vector);
 
-	printk(KERN_INFO "PHYS_OFFSET=0x%08x\n", PHYS_OFFSET);
+	printk(KERN_INFO "PHYS_OFFSET=0x%08lx\n", PHYS_OFFSET);
 
 	/*
 	 * Simulator has a few differences from the hardware.
-- 
cgit v1.2.3


From 608dbdfb1f0299f4500e56d62b0d84c44dcfa3be Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Thu, 14 Jun 2018 15:28:12 -0700
Subject: hexagon: drop the unused variable zero_page_mask

Hexagon arch does not seem to have subscribed to _HAVE_COLOR_ZERO_PAGE
framework.  Hence zero_page_mask variable is not needed.

Link: http://lkml.kernel.org/r/20180517061105.30447-1-khandual@linux.vnet.ibm.com
Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/hexagon/include/asm/pgtable.h | 1 -
 arch/hexagon/mm/init.c             | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index aef02f7ca8aa..65125d0b02dd 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -30,7 +30,6 @@
 
 /* A handy thing to have if one has the RAM. Declared in head.S */
 extern unsigned long empty_zero_page;
-extern unsigned long zero_page_mask;
 
 /*
  * The PTE model described here is that of the Hexagon Virtual Machine,
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index 192584d5ac2f..1495d45e472d 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -39,9 +39,6 @@ unsigned long __phys_offset;	/*  physical kernel offset >> 12  */
 /*  Set as variable to limit PMD copies  */
 int max_kernel_seg = 0x303;
 
-/*  think this should be (page_size-1) the way it's used...*/
-unsigned long zero_page_mask;
-
 /*  indicate pfn's of high memory  */
 unsigned long highstart_pfn, highend_pfn;
 
-- 
cgit v1.2.3


From ecf38679349f8f8ad4d67c69246e7f6c33460c95 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 8 May 2018 23:44:08 -0300
Subject: arch/*: Kconfig: fix documentation for NMI watchdog

Changeset 9919cba7ff71 ("watchdog: Update documentation") updated
the documentation, removing the old nmi_watchdog.txt and adding
a file with a new content.

Update Kconfig files accordingly.

Fixes: 9919cba7ff71 ("watchdog: Update documentation")

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Jonathan Corbet <corbet@lwn.net>
---
 arch/arm/Kconfig    | 2 +-
 arch/parisc/Kconfig | 2 +-
 arch/sh/Kconfig     | 2 +-
 arch/sparc/Kconfig  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 483d2858e367..54eeb8d00bc6 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1302,7 +1302,7 @@ config SMP
 	  will run faster if you say N here.
 
 	  See also <file:Documentation/x86/i386/IO-APIC.txt>,
-	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+	  <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
 	  <http://tldp.org/HOWTO/SMP-HOWTO.html>.
 
 	  If you don't know what to do here, say N.
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 4d8f64d48597..c480770fabcd 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -275,7 +275,7 @@ config SMP
 	  machines, but will use only one CPU of a multiprocessor machine.
 	  On a uniprocessor machine, the kernel will run faster if you say N.
 
-	  See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO
+	  See also <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO
 	  available at <http://www.tldp.org/docs.html#howto>.
 
 	  If you don't know what to do here, say N.
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4bedd1c97f29..dd4f3d3e644f 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -687,7 +687,7 @@ config SMP
 	  People using multiprocessor machines who say Y here should also say
 	  Y to "Enhanced Real Time Clock Support", below.
 
-	  See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO
+	  See also <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO
 	  available at <http://www.tldp.org/docs.html#howto>.
 
 	  If you don't know what to do here, say N.
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 9a2b8877f174..0f535debf802 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -178,7 +178,7 @@ config SMP
 	  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
 	  Management" code will be disabled if you say Y here.
 
-	  See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO
+	  See also <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO
 	  available at <http://www.tldp.org/docs.html#howto>.
 
 	  If you don't know what to do here, say N.
-- 
cgit v1.2.3


From 5fb94e9ca333f0fe1d96de06704a79942b3832c3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 8 May 2018 15:14:57 -0300
Subject: docs: Fix some broken references

As we move stuff around, some doc references are broken. Fix some of
them via this script:
	./scripts/documentation-file-ref-check --fix

Manually checked if the produced result is valid, removing a few
false-positives.

Acked-by: Takashi Iwai <tiwai@suse.de>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Acked-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Coly Li <colyli@suse.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Jonathan Corbet <corbet@lwn.net>
---
 arch/Kconfig                             | 2 +-
 arch/arm/include/asm/cacheflush.h        | 2 +-
 arch/arm64/include/asm/cacheflush.h      | 2 +-
 arch/microblaze/include/asm/cacheflush.h | 2 +-
 arch/um/Kconfig.um                       | 2 +-
 arch/unicore32/include/asm/cacheflush.h  | 2 +-
 arch/x86/entry/vsyscall/vsyscall_64.c    | 2 +-
 arch/xtensa/include/asm/cacheflush.h     | 4 ++--
 8 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/Kconfig b/arch/Kconfig
index 47b235d43909..1aa59063f1fd 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -403,7 +403,7 @@ config SECCOMP_FILTER
 	  in terms of Berkeley Packet Filter programs which implement
 	  task-defined system call filtering polices.
 
-	  See Documentation/prctl/seccomp_filter.txt for details.
+	  See Documentation/userspace-api/seccomp_filter.rst for details.
 
 preferred-plugin-hostcc := $(if-success,[ $(gcc-version) -ge 40800 ],$(HOSTCXX),$(HOSTCC))
 
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 869080bedb89..ec1a5fd0d294 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -35,7 +35,7 @@
  *	Start addresses are inclusive and end addresses are exclusive;
  *	start addresses should be rounded down, end addresses up.
  *
- *	See Documentation/cachetlb.txt for more information.
+ *	See Documentation/core-api/cachetlb.rst for more information.
  *	Please note that the implementation of these, and the required
  *	effects are cache-type (VIVT/VIPT/PIPT) specific.
  *
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 0094c6653b06..d264a7274811 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -36,7 +36,7 @@
  *	Start addresses are inclusive and end addresses are exclusive; start
  *	addresses should be rounded down, end addresses up.
  *
- *	See Documentation/cachetlb.txt for more information. Please note that
+ *	See Documentation/core-api/cachetlb.rst for more information. Please note that
  *	the implementation assumes non-aliasing VIPT D-cache and (aliasing)
  *	VIPT I-cache.
  *
diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h
index ffea82a16d2c..b091de77b15b 100644
--- a/arch/microblaze/include/asm/cacheflush.h
+++ b/arch/microblaze/include/asm/cacheflush.h
@@ -19,7 +19,7 @@
 #include <linux/mm.h>
 #include <linux/io.h>
 
-/* Look at Documentation/cachetlb.txt */
+/* Look at Documentation/core-api/cachetlb.rst */
 
 /*
  * Cache handling functions.
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um
index 3e7f228b22e1..20da5a8ca949 100644
--- a/arch/um/Kconfig.um
+++ b/arch/um/Kconfig.um
@@ -80,7 +80,7 @@ config MAGIC_SYSRQ
 	  On UML, this is accomplished by sending a "sysrq" command with
 	  mconsole, followed by the letter for the requested command.
 
-	  The keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
+	  The keys are documented in <file:Documentation/admin-guide/sysrq.rst>. Don't say Y
 	  unless you really know what this hack does.
 
 config KERNEL_STACK_ORDER
diff --git a/arch/unicore32/include/asm/cacheflush.h b/arch/unicore32/include/asm/cacheflush.h
index 1d9132b66039..1c8b9f13a9e1 100644
--- a/arch/unicore32/include/asm/cacheflush.h
+++ b/arch/unicore32/include/asm/cacheflush.h
@@ -33,7 +33,7 @@
  *	Start addresses are inclusive and end addresses are exclusive;
  *	start addresses should be rounded down, end addresses up.
  *
- *	See Documentation/cachetlb.txt for more information.
+ *	See Documentation/core-api/cachetlb.rst for more information.
  *	Please note that the implementation of these, and the required
  *	effects are cache-type (VIVT/VIPT/PIPT) specific.
  *
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 7782cdbcd67d..82ed001e8909 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -201,7 +201,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
 	/*
 	 * Handle seccomp.  regs->ip must be the original value.
-	 * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
+	 * See seccomp_send_sigsys and Documentation/userspace-api/seccomp_filter.rst.
 	 *
 	 * We could optimize the seccomp disabled case, but performance
 	 * here doesn't matter.
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index 397d6a1a4224..a0d50be5a8cb 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -88,7 +88,7 @@ static inline void __invalidate_icache_page_alias(unsigned long virt,
  *
  * Pages can get remapped. Because this might change the 'color' of that page,
  * we have to flush the cache before the PTE is changed.
- * (see also Documentation/cachetlb.txt)
+ * (see also Documentation/core-api/cachetlb.rst)
  */
 
 #if defined(CONFIG_MMU) && \
@@ -152,7 +152,7 @@ void local_flush_cache_page(struct vm_area_struct *vma,
 		__invalidate_icache_range(start,(end) - (start));	\
 	} while (0)
 
-/* This is not required, see Documentation/cachetlb.txt */
+/* This is not required, see Documentation/core-api/cachetlb.rst */
 #define	flush_icache_page(vma,page)			do { } while (0)
 
 #define flush_dcache_mmap_lock(mapping)			do { } while (0)
-- 
cgit v1.2.3