diff options
Diffstat (limited to 'arch/arm64')
90 files changed, 1145 insertions, 1758 deletions
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 13a0e63afeaf..2c64d834a2c4 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -152,28 +152,12 @@ vcc-pg-supply = <®_aldo1>; }; -&r_ir { - linux,rc-map-name = "rc-beelink-gs1"; - status = "okay"; -}; - -&r_pio { - /* - * FIXME: We can't add that supply for now since it would - * create a circular dependency between pinctrl, the regulator - * and the RSB Bus. - * - * vcc-pl-supply = <®_aldo1>; - */ - vcc-pm-supply = <®_aldo1>; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; @@ -291,6 +275,22 @@ }; }; +&r_ir { + linux,rc-map-name = "rc-beelink-gs1"; + status = "okay"; +}; + +&r_pio { + /* + * PL0 and PL1 are used for PMIC I2C + * don't enable the pl-supply else + * it will fail at boot + * + * vcc-pl-supply = <®_aldo1>; + */ + vcc-pm-supply = <®_aldo1>; +}; + &spdif { pinctrl-names = "default"; pinctrl-0 = <&spdif_tx_pin>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts index ab87c3447cd7..f005072c68a1 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts @@ -176,16 +176,12 @@ vcc-pg-supply = <®_vcc_wifi_io>; }; -&r_ir { - status = "okay"; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; @@ -296,6 +292,10 @@ }; }; +&r_ir { + status = "okay"; +}; + &rtc { clocks = <&ext_osc32k>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi index d05dc5d6e6b9..e34dbb992021 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi @@ -113,20 +113,12 @@ vcc-pg-supply = <®_aldo1>; }; -&r_ir { - status = "okay"; -}; - -&r_pio { - vcc-pm-supply = <®_bldo3>; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; @@ -241,6 +233,14 @@ }; }; +&r_ir { + status = "okay"; +}; + +&r_pio { + vcc-pm-supply = <®_bldo3>; +}; + &rtc { clocks = <&ext_osc32k>; }; diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi index da9de4986660..5a72f0b64247 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi @@ -151,7 +151,7 @@ al,msi-num-spis = <160>; }; - io-fabric@fc000000 { + io-bus@fc000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi index 8b6156b5af65..dea60d136c2e 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi @@ -361,7 +361,7 @@ interrupt-parent = <&gic>; }; - io-fabric@fc000000 { + io-bus@fc000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index ab2b3f15ef19..69834b49673d 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -2313,7 +2313,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x19000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; @@ -2325,7 +2325,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x1a000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; @@ -2337,7 +2337,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x1b000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi index de35fa2d7a6d..8e3e3354ed67 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi @@ -116,6 +116,10 @@ status = "okay"; }; +&clkc_audio { + status = "okay"; +}; + &frddr_a { status = "okay"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index 8ebce7114a60..6c134592c7bb 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -741,7 +741,7 @@ &pwm_ab { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -752,14 +752,14 @@ &pwm_cd { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; &pwm_ef { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 2dc2fdaecf9f..19b8a39de6a0 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -811,7 +811,7 @@ &pwm_ab { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -822,14 +822,14 @@ &pwm_cd { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; &pwm_ef { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts index 2dfe7b895b2b..e2d9439397f7 100644 --- a/arch/arm64/boot/dts/apple/t8103-j293.dts +++ b/arch/arm64/boot/dts/apple/t8103-j293.dts @@ -77,6 +77,16 @@ }; }; +/* + * The driver depends on boot loader initialized state which resets when this + * power-domain is powered off. This happens on suspend or when the driver is + * missing during boot. Mark the domain as always on until the driver can + * handle this. + */ +&ps_dispdfr_be { + apple,always-on; +}; + &display_dfr { status = "okay"; }; diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts index 3d73f9ee2f46..be86d34c6696 100644 --- a/arch/arm64/boot/dts/apple/t8112-j493.dts +++ b/arch/arm64/boot/dts/apple/t8112-j493.dts @@ -40,6 +40,16 @@ }; }; +/* + * The driver depends on boot loader initialized state which resets when this + * power-domain is powered off. This happens on suspend or when the driver is + * missing during boot. Mark the domain as always on until the driver can + * handle this. + */ +&ps_dispdfr_be { + apple,always-on; +}; + &display_dfr { status = "okay"; }; diff --git a/arch/arm64/boot/dts/arm/morello.dtsi b/arch/arm64/boot/dts/arm/morello.dtsi index 0bab0b3ea969..5bc1c725dc86 100644 --- a/arch/arm64/boot/dts/arm/morello.dtsi +++ b/arch/arm64/boot/dts/arm/morello.dtsi @@ -44,7 +44,7 @@ next-level-cache = <&l2_0>; clocks = <&scmi_dvfs 0>; - l2_0: l2-cache-0 { + l2_0: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -53,13 +53,6 @@ cache-sets = <2048>; cache-unified; next-level-cache = <&l3_0>; - - l3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; - cache-size = <0x100000>; - cache-unified; - }; }; }; @@ -78,7 +71,7 @@ next-level-cache = <&l2_1>; clocks = <&scmi_dvfs 0>; - l2_1: l2-cache-1 { + l2_1: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -105,7 +98,7 @@ next-level-cache = <&l2_2>; clocks = <&scmi_dvfs 1>; - l2_2: l2-cache-2 { + l2_2: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -132,7 +125,7 @@ next-level-cache = <&l2_3>; clocks = <&scmi_dvfs 1>; - l2_3: l2-cache-3 { + l2_3: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -143,6 +136,13 @@ next-level-cache = <&l3_0>; }; }; + + l3_0: l3-cache { + compatible = "cache"; + cache-level = <3>; + cache-size = <0x100000>; + cache-unified; + }; }; firmware { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 7251ad3a0017..b46566f3ce20 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -144,6 +144,19 @@ startup-delay-us = <20000>; }; + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc2_vsel>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; + regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; + states = <1800000 0x1>, + <3300000 0x0>; + regulator-name = "PMIC_USDHC_VSELECT"; + vin-supply = <®_nvcc_sd>; + }; + reserved-memory { #address-cells = <2>; #size-cells = <2>; @@ -269,7 +282,7 @@ "SODIMM_19", "", "", - "", + "PMIC_USDHC_VSELECT", "", "", "", @@ -785,6 +798,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>; pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; }; &wdog1 { @@ -1206,13 +1220,17 @@ <MX8MM_IOMUXC_NAND_CLE_GPIO3_IO5 0x6>; /* SODIMM 76 */ }; + pinctrl_usdhc2_vsel: usdhc2vselgrp { + fsl,pins = + <MX8MM_IOMUXC_GPIO1_IO04_GPIO1_IO4 0x10>; /* PMIC_USDHC_VSELECT */ + }; + /* * Note: Due to ERR050080 we use discrete external on-module resistors pulling-up to the * on-module +V3.3_1.8_SD (LDO5) rail and explicitly disable the internal pull-ups here. */ pinctrl_usdhc2: usdhc2grp { fsl,pins = - <MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x10>, <MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90>, /* SODIMM 78 */ <MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x90>, /* SODIMM 74 */ <MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x90>, /* SODIMM 80 */ @@ -1223,7 +1241,6 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = - <MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x10>, <MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94>, <MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x94>, <MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x94>, @@ -1234,7 +1251,6 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = - <MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x10>, <MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96>, <MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x96>, <MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x96>, @@ -1246,7 +1262,6 @@ /* Avoid backfeeding with removed card power */ pinctrl_usdhc2_sleep: usdhc2slpgrp { fsl,pins = - <MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x0>, <MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x0>, <MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x0>, <MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x0>, diff --git a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi index a1b75c9068b2..2ce1860b244d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi @@ -24,6 +24,20 @@ fsl,operating-mode = "nominal"; }; +&gpu2d { + assigned-clocks = <&clk IMX8MP_CLK_GPU2D_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>; +}; + +&gpu3d { + assigned-clocks = <&clk IMX8MP_CLK_GPU3D_CORE>, + <&clk IMX8MP_CLK_GPU3D_SHADER_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, <800000000>; +}; + &pgc_hdmimix { assigned-clocks = <&clk IMX8MP_CLK_HDMI_AXI>, <&clk IMX8MP_CLK_HDMI_APB>; @@ -46,6 +60,18 @@ assigned-clock-rates = <600000000>, <300000000>; }; +&pgc_mlmix { + assigned-clocks = <&clk IMX8MP_CLK_ML_CORE>, + <&clk IMX8MP_CLK_ML_AXI>, + <&clk IMX8MP_CLK_ML_AHB>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, + <800000000>, + <300000000>; +}; + &media_blk_ctrl { assigned-clocks = <&clk IMX8MP_CLK_MEDIA_AXI>, <&clk IMX8MP_CLK_MEDIA_APB>, @@ -62,3 +88,5 @@ <0>, <0>, <400000000>, <1039500000>; }; + +/delete-node/ &{noc_opp_table/opp-1000000000}; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi index b2ac2583a592..b59da91fdd04 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi @@ -35,7 +35,6 @@ <0x1 0x00000000 0 0xc0000000>; }; - reg_usdhc2_vmmc: regulator-usdhc2-vmmc { compatible = "regulator-fixed"; regulator-name = "VSD_3V3"; @@ -46,6 +45,16 @@ startup-delay-us = <100>; off-on-delay-us = <12000>; }; + + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + regulator-name = "VSD_VSEL"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + gpios = <&gpio2 12 GPIO_ACTIVE_HIGH>; + states = <3300000 0x0 1800000 0x1>; + vin-supply = <&ldo5>; + }; }; &A53_0 { @@ -205,6 +214,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>; cd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; bus-width = <4>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ce6793b2d57e..7c1c87eab54c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1645,6 +1645,12 @@ opp-hz = /bits/ 64 <200000000>; }; + /* Nominal drive mode maximum */ + opp-800000000 { + opp-hz = /bits/ 64 <800000000>; + }; + + /* Overdrive mode maximum */ opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 9bb26b466a06..59f057ba6fa7 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1626,7 +1626,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x60100000 0 0xfe00000>, <0 0x4c360000 0 0x10000>, - <0 0x4c340000 0 0x2000>; + <0 0x4c340000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0x0 0x00000000 0x0 0x6ff00000 0 0x00100000>, <0x82000000 0x0 0x10000000 0x9 0x10000000 0 0x10000000>; @@ -1673,7 +1673,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x4c360000 0 0x1000>, <0 0x4c320000 0 0x1000>, - <0 0x4c340000 0 0x2000>, + <0 0x4c340000 0 0x4000>, <0 0x4c370000 0 0x10000>, <0x9 0 1 0>; reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space"; @@ -1700,7 +1700,7 @@ reg = <0 0x4c380000 0 0x10000>, <8 0x80100000 0 0xfe00000>, <0 0x4c3e0000 0 0x10000>, - <0 0x4c3c0000 0 0x2000>; + <0 0x4c3c0000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0 0x00000000 0x8 0x8ff00000 0 0x00100000>, <0x82000000 0 0x10000000 0xa 0x10000000 0 0x10000000>; @@ -1749,7 +1749,7 @@ reg = <0 0x4c380000 0 0x10000>, <0 0x4c3e0000 0 0x1000>, <0 0x4c3a0000 0 0x1000>, - <0 0x4c3c0000 0 0x2000>, + <0 0x4c3c0000 0 0x4000>, <0 0x4c3f0000 0 0x10000>, <0xa 0 1 0>; reg-names = "dbi", "atu", "dbi2", "app", "dma", "addr_space"; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi index 3a9b6907185d..242820845707 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi @@ -26,6 +26,8 @@ leds { compatible = "gpio-leds"; + pinctrl-names = "default"; + pinctrl-0 = <&spi_quad_pins>; led-power1 { label = "udpu:green:power"; @@ -82,8 +84,6 @@ &spi0 { status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&spi_quad_pins>; flash@0 { compatible = "jedec,spi-nor"; @@ -108,6 +108,10 @@ }; }; +&spi_quad_pins { + function = "gpio"; +}; + &pinctrl_nb { i2c2_recovery_pins: i2c2-recovery-pins { groups = "i2c2"; diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi index 1edfd643b25a..a334ef0629d1 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi @@ -31,7 +31,7 @@ }; vcc3v3_btreg: vcc3v3-btreg { - compatible = "regulator-gpio"; + compatible = "regulator-fixed"; enable-active-high; pinctrl-names = "default"; pinctrl-0 = <&bt_enable_h>; @@ -39,7 +39,6 @@ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; regulator-always-on; - states = <3300000 0x0>; }; vcc3v3_rf_aux_mod: regulator-vcc3v3-rf-aux-mod { diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi index 80db778c9684..b60e68faa83a 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi @@ -26,5 +26,5 @@ }; &vcc3v3_btreg { - enable-gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>; + gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts index 165d09ccb942..5886b802c520 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts @@ -39,5 +39,5 @@ }; &vcc3v3_btreg { - enable-gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; + gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 541dca12bf1a..046dbe329017 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -43,7 +43,7 @@ sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; clocks = <&rk808 1>; - clock-names = "lpo"; + clock-names = "ext_clock"; pinctrl-names = "default"; pinctrl-0 = <&wifi_enable_h>; reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi index a48351471764..e7ba477e75f9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi @@ -775,7 +775,7 @@ rockchip,default-sample-phase = <90>; status = "okay"; - sdio-wifi@1 { + wifi@1 { compatible = "brcm,bcm4329-fmac"; reg = <1>; interrupt-parent = <&gpio2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts index 7bd32d230ad2..b80d628c426b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts @@ -619,6 +619,8 @@ bus-width = <8>; max-frequency = <200000000>; non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd &emmc_datastrobe>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts index 828bde7fab68..314067ba6f3c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts @@ -610,7 +610,7 @@ reg = <0x51>; clock-output-names = "hym8563"; interrupt-parent = <&gpio0>; - interrupts = <RK_PB0 IRQ_TYPE_LEVEL_LOW>; + interrupts = <RK_PA0 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; wakeup-source; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi index 1af0a30866f6..af431fdcbea7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi @@ -222,6 +222,10 @@ compatible = "realtek,rt5616"; reg = <0x1b>; #sound-dai-cells = <0>; + assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; + assigned-clock-rates = <12288000>; + clocks = <&cru I2S0_8CH_MCLKOUT>; + clock-names = "mclk"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index 711ac4f2c7cb..60ad272982ad 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -214,6 +214,8 @@ }; &package_thermal { + polling-delay = <1000>; + trips { package_active1: trip-active1 { temperature = <45000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi index bce72bac4503..3045cb3bd68c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi @@ -11,20 +11,15 @@ compatible = "operating-points-v2"; opp-shared; - opp-1416000000 { - opp-hz = /bits/ 64 <1416000000>; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <750000 750000 950000>; clock-latency-ns = <40000>; opp-suspend; }; - opp-1608000000 { - opp-hz = /bits/ 64 <1608000000>; - opp-microvolt = <887500 887500 950000>; - clock-latency-ns = <40000>; - }; - opp-1704000000 { - opp-hz = /bits/ 64 <1704000000>; - opp-microvolt = <937500 937500 950000>; + opp-1296000000 { + opp-hz = /bits/ 64 <1296000000>; + opp-microvolt = <775000 775000 950000>; clock-latency-ns = <40000>; }; }; @@ -33,9 +28,14 @@ compatible = "operating-points-v2"; opp-shared; + opp-1200000000{ + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; opp-1416000000 { opp-hz = /bits/ 64 <1416000000>; - opp-microvolt = <750000 750000 950000>; + opp-microvolt = <762500 762500 950000>; clock-latency-ns = <40000>; }; opp-1608000000 { @@ -43,25 +43,20 @@ opp-microvolt = <787500 787500 950000>; clock-latency-ns = <40000>; }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; - opp-microvolt = <875000 875000 950000>; - clock-latency-ns = <40000>; - }; - opp-2016000000 { - opp-hz = /bits/ 64 <2016000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; cluster2_opp_table: opp-table-cluster2 { compatible = "operating-points-v2"; opp-shared; + opp-1200000000{ + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; opp-1416000000 { opp-hz = /bits/ 64 <1416000000>; - opp-microvolt = <750000 750000 950000>; + opp-microvolt = <762500 762500 950000>; clock-latency-ns = <40000>; }; opp-1608000000 { @@ -69,16 +64,6 @@ opp-microvolt = <787500 787500 950000>; clock-latency-ns = <40000>; }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; - opp-microvolt = <875000 875000 950000>; - clock-latency-ns = <40000>; - }; - opp-2016000000 { - opp-hz = /bits/ 64 <2016000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; gpu_opp_table: opp-table { @@ -104,10 +89,6 @@ opp-hz = /bits/ 64 <700000000>; opp-microvolt = <750000 750000 850000>; }; - opp-850000000 { - opp-hz = /bits/ 64 <800000000>; - opp-microvolt = <787500 787500 850000>; - }; }; }; diff --git a/arch/arm64/boot/dts/st/stm32mp211.dtsi b/arch/arm64/boot/dts/st/stm32mp211.dtsi index 6dd1377f3e1d..bf888d60cd4f 100644 --- a/arch/arm64/boot/dts/st/stm32mp211.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp211.dtsi @@ -116,11 +116,11 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x0 0x1000>, - <0x4ac20000 0x0 0x2000>, - <0x4ac40000 0x0 0x2000>, - <0x4ac60000 0x0 0x2000>; + <0x4ac20000 0x0 0x20000>, + <0x4ac40000 0x0 0x20000>, + <0x4ac60000 0x0 0x20000>; #interrupt-cells = <3>; interrupt-controller; }; diff --git a/arch/arm64/boot/dts/st/stm32mp231.dtsi b/arch/arm64/boot/dts/st/stm32mp231.dtsi index 8820d219a33e..75697acd1345 100644 --- a/arch/arm64/boot/dts/st/stm32mp231.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp231.dtsi @@ -1201,13 +1201,12 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x1000>, - <0x4ac20000 0x2000>, - <0x4ac40000 0x2000>, - <0x4ac60000 0x2000>; + <0x4ac20000 0x20000>, + <0x4ac40000 0x20000>, + <0x4ac60000 0x20000>; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; }; }; diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index f3c6cdfd7008..87110f91e489 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -115,14 +115,13 @@ }; intc: interrupt-controller@4ac00000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, - <0x0 0x4ac20000 0x0 0x2000>, - <0x0 0x4ac40000 0x0 0x2000>, - <0x0 0x4ac60000 0x0 0x2000>; + <0x0 0x4ac20000 0x0 0x20000>, + <0x0 0x4ac40000 0x0 0x20000>, + <0x0 0x4ac60000 0x0 0x20000>; }; psci { diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5bb8f09422a2..370ad70b4be8 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1729,15 +1729,14 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_SECURITY=y CONFIG_CRYPTO_USER=y -CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CHACHA20=m +CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_SHA1_ARM64_CE=y -CONFIG_CRYPTO_SHA2_ARM64_CE=y CONFIG_CRYPTO_SHA512_ARM64_CE=m CONFIG_CRYPTO_SHA3_ARM64=m CONFIG_CRYPTO_SM3_ARM64_CE=m diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 3418c8d3c78d..c44b0f202a1f 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -25,18 +25,6 @@ config CRYPTO_NHPOLY1305_NEON Architecture: arm64 using: - NEON (Advanced SIMD) extensions -config CRYPTO_POLY1305_NEON - tristate - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_ARCH_HAVE_LIB_POLY1305 - default CRYPTO_LIB_POLY1305_INTERNAL - help - Poly1305 authenticator algorithm (RFC7539) - - Architecture: arm64 using: - - NEON (Advanced SIMD) extensions - config CRYPTO_SHA1_ARM64_CE tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON @@ -48,25 +36,6 @@ config CRYPTO_SHA1_ARM64_CE Architecture: arm64 using: - ARMv8 Crypto Extensions -config CRYPTO_SHA256_ARM64 - tristate "Hash functions: SHA-224 and SHA-256" - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: arm64 - -config CRYPTO_SHA2_ARM64_CE - tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_SHA256_ARM64 - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: arm64 using: - - ARMv8 Crypto Extensions - config CRYPTO_SHA512_ARM64 tristate "Hash functions: SHA-384 and SHA-512" select CRYPTO_HASH @@ -101,7 +70,7 @@ config CRYPTO_SM3_NEON tristate "Hash functions: SM3 (NEON)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_SM3 + select CRYPTO_LIB_SM3 help SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012) @@ -112,7 +81,7 @@ config CRYPTO_SM3_ARM64_CE tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_SM3 + select CRYPTO_LIB_SM3 help SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012) @@ -143,7 +112,7 @@ config CRYPTO_AES_ARM64 config CRYPTO_AES_ARM64_CE tristate "Ciphers: AES (ARMv8 Crypto Extensions)" - depends on ARM64 && KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI select CRYPTO_LIB_AES help @@ -186,20 +155,6 @@ config CRYPTO_AES_ARM64_NEON_BLK Architecture: arm64 using: - NEON (Advanced SIMD) extensions -config CRYPTO_CHACHA20_NEON - tristate - depends on KERNEL_MODE_NEON - select CRYPTO_SKCIPHER - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA - default CRYPTO_LIB_CHACHA_INTERNAL - help - Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 - stream cipher algorithms - - Architecture: arm64 using: - - NEON (Advanced SIMD) extensions - config CRYPTO_AES_ARM64_BS tristate "Ciphers: AES, modes: ECB/CBC/CTR/XCTR/XTS modes (bit-sliced NEON)" depends on KERNEL_MODE_NEON @@ -267,7 +222,7 @@ config CRYPTO_SM4_ARM64_NEON_BLK config CRYPTO_AES_ARM64_CE_CCM tristate "AEAD cipher: AES in CCM mode (ARMv8 Crypto Extensions)" - depends on ARM64 && KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI select CRYPTO_AES_ARM64_CE select CRYPTO_AES_ARM64_CE_BLK diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index e7139c4768ce..c231c980c514 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -8,9 +8,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o -obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o -sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o - obj-$(CONFIG_CRYPTO_SHA512_ARM64_CE) += sha512-ce.o sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o @@ -56,19 +53,9 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o aes-neon-blk-y := aes-glue-neon.o aes-neon.o -obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o -sha256-arm64-y := sha256-glue.o sha256-core.o - obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o sha512-arm64-y := sha512-glue.o sha512-core.o -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o -chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o - -obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o -poly1305-neon-y := poly1305-core.o poly1305-glue.o -AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64 - obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o @@ -81,10 +68,7 @@ aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) -$(obj)/%-core.S: $(src)/%-armv8.pl - $(call cmd,perlasm) - -$(obj)/sha256-core.S: $(src)/sha512-armv8.pl +$(obj)/sha512-core.S: $(src)/../lib/crypto/sha2-armv8.pl $(call cmd,perlasm) -clean-files += poly1305-core.S sha256-core.S sha512-core.S +clean-files += sha512-core.S diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index b0150999743f..81560f722b9d 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -5,19 +5,20 @@ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <asm/hwcap.h> -#include <asm/simd.h> +#include <asm/neon.h> #include <crypto/aes.h> #include <crypto/ctr.h> -#include <crypto/sha2.h> #include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> -#include <linux/module.h> -#include <linux/cpufeature.h> +#include <crypto/sha2.h> +#include <crypto/utils.h> #include <crypto/xts.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> #include "aes-ce-setkey.h" @@ -130,7 +131,6 @@ struct mac_tfm_ctx { }; struct mac_desc_ctx { - unsigned int len; u8 dg[AES_BLOCK_SIZE]; }; @@ -869,109 +869,64 @@ static int mac_init(struct shash_desc *desc) struct mac_desc_ctx *ctx = shash_desc_ctx(desc); memset(ctx->dg, 0, AES_BLOCK_SIZE); - ctx->len = 0; - return 0; } static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, - u8 dg[], int enc_before, int enc_after) + u8 dg[], int enc_before) { int rounds = 6 + ctx->key_length / 4; + int rem; - if (crypto_simd_usable()) { - int rem; - - do { - kernel_neon_begin(); - rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, - dg, enc_before, enc_after); - kernel_neon_end(); - in += (blocks - rem) * AES_BLOCK_SIZE; - blocks = rem; - enc_before = 0; - } while (blocks); - } else { - if (enc_before) - aes_encrypt(ctx, dg, dg); - - while (blocks--) { - crypto_xor(dg, in, AES_BLOCK_SIZE); - in += AES_BLOCK_SIZE; - - if (blocks || enc_after) - aes_encrypt(ctx, dg, dg); - } - } + do { + kernel_neon_begin(); + rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, + dg, enc_before, !enc_before); + kernel_neon_end(); + in += (blocks - rem) * AES_BLOCK_SIZE; + blocks = rem; + } while (blocks); } static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len) { struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct mac_desc_ctx *ctx = shash_desc_ctx(desc); + int blocks = len / AES_BLOCK_SIZE; - while (len > 0) { - unsigned int l; - - if ((ctx->len % AES_BLOCK_SIZE) == 0 && - (ctx->len + len) > AES_BLOCK_SIZE) { - - int blocks = len / AES_BLOCK_SIZE; - - len %= AES_BLOCK_SIZE; - - mac_do_update(&tctx->key, p, blocks, ctx->dg, - (ctx->len != 0), (len != 0)); - - p += blocks * AES_BLOCK_SIZE; - - if (!len) { - ctx->len = AES_BLOCK_SIZE; - break; - } - ctx->len = 0; - } - - l = min(len, AES_BLOCK_SIZE - ctx->len); - - if (l <= AES_BLOCK_SIZE) { - crypto_xor(ctx->dg + ctx->len, p, l); - ctx->len += l; - len -= l; - p += l; - } - } - - return 0; + len %= AES_BLOCK_SIZE; + mac_do_update(&tctx->key, p, blocks, ctx->dg, 0); + return len; } -static int cbcmac_final(struct shash_desc *desc, u8 *out) +static int cbcmac_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct mac_desc_ctx *ctx = shash_desc_ctx(desc); - mac_do_update(&tctx->key, NULL, 0, ctx->dg, (ctx->len != 0), 0); - + if (len) { + crypto_xor(ctx->dg, src, len); + mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1); + } memcpy(out, ctx->dg, AES_BLOCK_SIZE); - return 0; } -static int cmac_final(struct shash_desc *desc, u8 *out) +static int cmac_finup(struct shash_desc *desc, const u8 *src, unsigned int len, + u8 *out) { struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct mac_desc_ctx *ctx = shash_desc_ctx(desc); u8 *consts = tctx->consts; - if (ctx->len != AES_BLOCK_SIZE) { - ctx->dg[ctx->len] ^= 0x80; + crypto_xor(ctx->dg, src, len); + if (len != AES_BLOCK_SIZE) { + ctx->dg[len] ^= 0x80; consts += AES_BLOCK_SIZE; } - - mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1); - + mac_do_update(&tctx->key, consts, 1, ctx->dg, 0); memcpy(out, ctx->dg, AES_BLOCK_SIZE); - return 0; } @@ -979,6 +934,8 @@ static struct shash_alg mac_algs[] = { { .base.cra_name = "cmac(aes)", .base.cra_driver_name = "cmac-aes-" MODE, .base.cra_priority = PRIO, + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINAL_NONZERO, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) + 2 * AES_BLOCK_SIZE, @@ -987,13 +944,15 @@ static struct shash_alg mac_algs[] = { { .digestsize = AES_BLOCK_SIZE, .init = mac_init, .update = mac_update, - .final = cmac_final, + .finup = cmac_finup, .setkey = cmac_setkey, .descsize = sizeof(struct mac_desc_ctx), }, { .base.cra_name = "xcbc(aes)", .base.cra_driver_name = "xcbc-aes-" MODE, .base.cra_priority = PRIO, + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINAL_NONZERO, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) + 2 * AES_BLOCK_SIZE, @@ -1002,21 +961,22 @@ static struct shash_alg mac_algs[] = { { .digestsize = AES_BLOCK_SIZE, .init = mac_init, .update = mac_update, - .final = cmac_final, + .finup = cmac_finup, .setkey = xcbc_setkey, .descsize = sizeof(struct mac_desc_ctx), }, { .base.cra_name = "cbcmac(aes)", .base.cra_driver_name = "cbcmac-aes-" MODE, .base.cra_priority = PRIO, - .base.cra_blocksize = 1, + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, + .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct mac_tfm_ctx), .base.cra_module = THIS_MODULE, .digestsize = AES_BLOCK_SIZE, .init = mac_init, .update = mac_update, - .final = cbcmac_final, + .finup = cbcmac_finup, .setkey = cbcmac_setkey, .descsize = sizeof(struct mac_desc_ctx), } }; diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c deleted file mode 100644 index 229876acfc58..000000000000 --- a/arch/arm64/crypto/chacha-neon-glue.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, - * including ChaCha20 (RFC7539) - * - * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <crypto/algapi.h> -#include <crypto/internal/chacha.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> -#include <linux/jump_label.h> -#include <linux/kernel.h> -#include <linux/module.h> - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> - -asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src, - int nrounds); -asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src, - int nrounds, int bytes); -asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, - int bytes, int nrounds) -{ - while (bytes > 0) { - int l = min(bytes, CHACHA_BLOCK_SIZE * 5); - - if (l <= CHACHA_BLOCK_SIZE) { - u8 buf[CHACHA_BLOCK_SIZE]; - - memcpy(buf, src, l); - chacha_block_xor_neon(state, buf, buf, nrounds); - memcpy(dst, buf, l); - state[12] += 1; - break; - } - chacha_4block_xor_neon(state, dst, src, nrounds, l); - bytes -= l; - src += l; - dst += l; - state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); - } -} - -void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) -{ - if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { - hchacha_block_generic(state, stream, nrounds); - } else { - kernel_neon_begin(); - hchacha_block_neon(state, stream, nrounds); - kernel_neon_end(); - } -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, - int nrounds) -{ - if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || - !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - kernel_neon_begin(); - chacha_doneon(state, dst, src, todo, nrounds); - kernel_neon_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int chacha_neon_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) -{ - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - chacha_init(state, ctx->key, iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = rounddown(nbytes, walk.stride); - - if (!static_branch_likely(&have_neon) || - !crypto_simd_usable()) { - chacha_crypt_generic(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - ctx->nrounds); - } else { - kernel_neon_begin(); - chacha_doneon(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, ctx->nrounds); - kernel_neon_end(); - } - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static int chacha_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - - return chacha_neon_stream_xor(req, ctx, req->iv); -} - -static int xchacha_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct chacha_ctx subctx; - u32 state[16]; - u8 real_iv[16]; - - chacha_init(state, ctx->key, req->iv); - hchacha_block_arch(state, subctx.key, ctx->nrounds); - subctx.nrounds = ctx->nrounds; - - memcpy(&real_iv[0], req->iv + 24, 8); - memcpy(&real_iv[8], req->iv + 16, 8); - return chacha_neon_stream_xor(req, &subctx, real_iv); -} - -static struct skcipher_alg algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 5 * CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = chacha_neon, - .decrypt = chacha_neon, - }, { - .base.cra_name = "xchacha20", - .base.cra_driver_name = "xchacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 5 * CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - }, { - .base.cra_name = "xchacha12", - .base.cra_driver_name = "xchacha12-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 5 * CHACHA_BLOCK_SIZE, - .setkey = chacha12_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - } -}; - -static int __init chacha_simd_mod_init(void) -{ - if (!cpu_have_named_feature(ASIMD)) - return 0; - - static_branch_enable(&have_neon); - - return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? - crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; -} - -static void __exit chacha_simd_mod_fini(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD)) - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -} - -module_init(chacha_simd_mod_init); -module_exit(chacha_simd_mod_fini); - -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-neon"); -MODULE_ALIAS_CRYPTO("xchacha20"); -MODULE_ALIAS_CRYPTO("xchacha20-neon"); -MODULE_ALIAS_CRYPTO("xchacha12"); -MODULE_ALIAS_CRYPTO("xchacha12-neon"); diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 071e122f9c37..4995b6e22335 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -6,30 +6,27 @@ */ #include <asm/neon.h> -#include <asm/simd.h> -#include <linux/unaligned.h> #include <crypto/aes.h> -#include <crypto/gcm.h> -#include <crypto/algapi.h> #include <crypto/b128ops.h> +#include <crypto/gcm.h> +#include <crypto/ghash.h> #include <crypto/gf128mul.h> #include <crypto/internal/aead.h> #include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/cpufeature.h> -#include <linux/crypto.h> +#include <linux/errno.h> +#include <linux/kernel.h> #include <linux/module.h> +#include <linux/string.h> +#include <linux/unaligned.h> MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("ghash"); -#define GHASH_BLOCK_SIZE 16 -#define GHASH_DIGEST_SIZE 16 - #define RFC4106_NONCE_SIZE 4 struct ghash_key { @@ -37,10 +34,8 @@ struct ghash_key { u64 h[][2]; }; -struct ghash_desc_ctx { +struct arm_ghash_desc_ctx { u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; - u8 buf[GHASH_BLOCK_SIZE]; - u32 count; }; struct gcm_aes_ctx { @@ -65,36 +60,12 @@ asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], static int ghash_init(struct shash_desc *desc) { - struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); - *ctx = (struct ghash_desc_ctx){}; + *ctx = (struct arm_ghash_desc_ctx){}; return 0; } -static void ghash_do_update(int blocks, u64 dg[], const char *src, - struct ghash_key *key, const char *head) -{ - be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) }; - - do { - const u8 *in = src; - - if (head) { - in = head; - blocks++; - head = NULL; - } else { - src += GHASH_BLOCK_SIZE; - } - - crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE); - gf128mul_lle(&dst, &key->k); - } while (--blocks); - - dg[0] = be64_to_cpu(dst.b); - dg[1] = be64_to_cpu(dst.a); -} - static __always_inline void ghash_do_simd_update(int blocks, u64 dg[], const char *src, struct ghash_key *key, const char *head, @@ -103,13 +74,9 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src, u64 const h[][2], const char *head)) { - if (likely(crypto_simd_usable())) { - kernel_neon_begin(); - simd_update(blocks, dg, src, key->h, head); - kernel_neon_end(); - } else { - ghash_do_update(blocks, dg, src, key, head); - } + kernel_neon_begin(); + simd_update(blocks, dg, src, key->h, head); + kernel_neon_end(); } /* avoid hogging the CPU for too long */ @@ -118,61 +85,59 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src, static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int len) { - struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); - unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + int blocks; - ctx->count += len; + blocks = len / GHASH_BLOCK_SIZE; + len -= blocks * GHASH_BLOCK_SIZE; - if ((partial + len) >= GHASH_BLOCK_SIZE) { - struct ghash_key *key = crypto_shash_ctx(desc->tfm); - int blocks; - - if (partial) { - int p = GHASH_BLOCK_SIZE - partial; + do { + int chunk = min(blocks, MAX_BLOCKS); - memcpy(ctx->buf + partial, src, p); - src += p; - len -= p; - } + ghash_do_simd_update(chunk, ctx->digest, src, key, NULL, + pmull_ghash_update_p8); + blocks -= chunk; + src += chunk * GHASH_BLOCK_SIZE; + } while (unlikely(blocks > 0)); + return len; +} - blocks = len / GHASH_BLOCK_SIZE; - len %= GHASH_BLOCK_SIZE; +static int ghash_export(struct shash_desc *desc, void *out) +{ + struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); + u8 *dst = out; - do { - int chunk = min(blocks, MAX_BLOCKS); + put_unaligned_be64(ctx->digest[1], dst); + put_unaligned_be64(ctx->digest[0], dst + 8); + return 0; +} - ghash_do_simd_update(chunk, ctx->digest, src, key, - partial ? ctx->buf : NULL, - pmull_ghash_update_p8); +static int ghash_import(struct shash_desc *desc, const void *in) +{ + struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); + const u8 *src = in; - blocks -= chunk; - src += chunk * GHASH_BLOCK_SIZE; - partial = 0; - } while (unlikely(blocks > 0)); - } - if (len) - memcpy(ctx->buf + partial, src, len); + ctx->digest[1] = get_unaligned_be64(src); + ctx->digest[0] = get_unaligned_be64(src + 8); return 0; } -static int ghash_final(struct shash_desc *desc, u8 *dst) +static int ghash_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *dst) { - struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); - unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; - - if (partial) { - struct ghash_key *key = crypto_shash_ctx(desc->tfm); + struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); + struct ghash_key *key = crypto_shash_ctx(desc->tfm); - memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); + if (len) { + u8 buf[GHASH_BLOCK_SIZE] = {}; - ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL, + memcpy(buf, src, len); + ghash_do_simd_update(1, ctx->digest, src, key, NULL, pmull_ghash_update_p8); + memzero_explicit(buf, sizeof(buf)); } - put_unaligned_be64(ctx->digest[1], dst); - put_unaligned_be64(ctx->digest[0], dst + 8); - - memzero_explicit(ctx, sizeof(*ctx)); - return 0; + return ghash_export(desc, dst); } static void ghash_reflect(u64 h[], const be128 *k) @@ -205,6 +170,7 @@ static struct shash_alg ghash_alg = { .base.cra_name = "ghash", .base.cra_driver_name = "ghash-neon", .base.cra_priority = 150, + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = GHASH_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]), .base.cra_module = THIS_MODULE, @@ -212,9 +178,12 @@ static struct shash_alg ghash_alg = { .digestsize = GHASH_DIGEST_SIZE, .init = ghash_init, .update = ghash_update, - .final = ghash_final, + .finup = ghash_finup, .setkey = ghash_setkey, - .descsize = sizeof(struct ghash_desc_ctx), + .export = ghash_export, + .import = ghash_import, + .descsize = sizeof(struct arm_ghash_desc_ctx), + .statesize = sizeof(struct ghash_desc_ctx), }; static int num_rounds(struct crypto_aes_ctx *ctx) diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c deleted file mode 100644 index 18883ea438f3..000000000000 --- a/arch/arm64/crypto/poly1305-glue.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 - * - * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> - */ - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> -#include <linux/unaligned.h> -#include <crypto/algapi.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/poly1305.h> -#include <crypto/internal/simd.h> -#include <linux/cpufeature.h> -#include <linux/crypto.h> -#include <linux/jump_label.h> -#include <linux/module.h> - -asmlinkage void poly1305_init_arm64(void *state, const u8 *key); -asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - poly1305_init_arm64(&dctx->h, key); - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; -} -EXPORT_SYMBOL(poly1305_init_arch); - -static int neon_poly1305_init(struct shash_desc *desc) -{ - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - dctx->buflen = 0; - dctx->rset = 0; - dctx->sset = false; - - return 0; -} - -static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, - u32 len, u32 hibit, bool do_neon) -{ - if (unlikely(!dctx->sset)) { - if (!dctx->rset) { - poly1305_init_arm64(&dctx->h, src); - src += POLY1305_BLOCK_SIZE; - len -= POLY1305_BLOCK_SIZE; - dctx->rset = 1; - } - if (len >= POLY1305_BLOCK_SIZE) { - dctx->s[0] = get_unaligned_le32(src + 0); - dctx->s[1] = get_unaligned_le32(src + 4); - dctx->s[2] = get_unaligned_le32(src + 8); - dctx->s[3] = get_unaligned_le32(src + 12); - src += POLY1305_BLOCK_SIZE; - len -= POLY1305_BLOCK_SIZE; - dctx->sset = true; - } - if (len < POLY1305_BLOCK_SIZE) - return; - } - - len &= ~(POLY1305_BLOCK_SIZE - 1); - - if (static_branch_likely(&have_neon) && likely(do_neon)) - poly1305_blocks_neon(&dctx->h, src, len, hibit); - else - poly1305_blocks(&dctx->h, src, len, hibit); -} - -static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, - const u8 *src, u32 len, bool do_neon) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - len -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - neon_poly1305_blocks(dctx, dctx->buf, - POLY1305_BLOCK_SIZE, 1, false); - dctx->buflen = 0; - } - } - - if (likely(len >= POLY1305_BLOCK_SIZE)) { - neon_poly1305_blocks(dctx, src, len, 1, do_neon); - src += round_down(len, POLY1305_BLOCK_SIZE); - len %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(len)) { - dctx->buflen = len; - memcpy(dctx->buf, src, len); - } -} - -static int neon_poly1305_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - bool do_neon = crypto_simd_usable() && srclen > 128; - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - if (static_branch_likely(&have_neon) && do_neon) - kernel_neon_begin(); - neon_poly1305_do_update(dctx, src, srclen, do_neon); - if (static_branch_likely(&have_neon) && do_neon) - kernel_neon_end(); - return 0; -} - -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int nbytes) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && crypto_simd_usable()) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(&dctx->h, src, todo, 1); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else { - poly1305_blocks(&dctx->h, src, len, 1); - src += len; - } - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - dctx->buflen = nbytes; - memcpy(dctx->buf, src, nbytes); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit(&dctx->h, dst, dctx->s); - memzero_explicit(dctx, sizeof(*dctx)); -} -EXPORT_SYMBOL(poly1305_final_arch); - -static int neon_poly1305_final(struct shash_desc *desc, u8 *dst) -{ - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - if (unlikely(!dctx->sset)) - return -ENOKEY; - - poly1305_final_arch(dctx, dst); - return 0; -} - -static struct shash_alg neon_poly1305_alg = { - .init = neon_poly1305_init, - .update = neon_poly1305_update, - .final = neon_poly1305_final, - .digestsize = POLY1305_DIGEST_SIZE, - .descsize = sizeof(struct poly1305_desc_ctx), - - .base.cra_name = "poly1305", - .base.cra_driver_name = "poly1305-neon", - .base.cra_priority = 200, - .base.cra_blocksize = POLY1305_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}; - -static int __init neon_poly1305_mod_init(void) -{ - if (!cpu_have_named_feature(ASIMD)) - return 0; - - static_branch_enable(&have_neon); - - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shash(&neon_poly1305_alg) : 0; -} - -static void __exit neon_poly1305_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD)) - crypto_unregister_shash(&neon_poly1305_alg); -} - -module_init(neon_poly1305_mod_init); -module_exit(neon_poly1305_mod_exit); - -MODULE_DESCRIPTION("Poly1305 transform using NEON instructions"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("poly1305"); -MODULE_ALIAS_CRYPTO("poly1305-neon"); diff --git a/arch/arm64/crypto/polyval-ce-glue.c b/arch/arm64/crypto/polyval-ce-glue.c index 0a3b5718df85..c4e653688ea0 100644 --- a/arch/arm64/crypto/polyval-ce-glue.c +++ b/arch/arm64/crypto/polyval-ce-glue.c @@ -15,17 +15,15 @@ * ARMv8 Crypto Extensions instructions to implement the finite field operations. */ -#include <crypto/algapi.h> +#include <asm/neon.h> #include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> #include <crypto/polyval.h> -#include <linux/crypto.h> -#include <linux/init.h> +#include <crypto/utils.h> +#include <linux/cpufeature.h> +#include <linux/errno.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/cpufeature.h> -#include <asm/neon.h> -#include <asm/simd.h> +#include <linux/string.h> #define NUM_KEY_POWERS 8 @@ -38,7 +36,6 @@ struct polyval_tfm_ctx { struct polyval_desc_ctx { u8 buffer[POLYVAL_BLOCK_SIZE]; - u32 bytes; }; asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys, @@ -48,25 +45,16 @@ asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2); static void internal_polyval_update(const struct polyval_tfm_ctx *keys, const u8 *in, size_t nblocks, u8 *accumulator) { - if (likely(crypto_simd_usable())) { - kernel_neon_begin(); - pmull_polyval_update(keys, in, nblocks, accumulator); - kernel_neon_end(); - } else { - polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in, - nblocks, accumulator); - } + kernel_neon_begin(); + pmull_polyval_update(keys, in, nblocks, accumulator); + kernel_neon_end(); } static void internal_polyval_mul(u8 *op1, const u8 *op2) { - if (likely(crypto_simd_usable())) { - kernel_neon_begin(); - pmull_polyval_mul(op1, op2); - kernel_neon_end(); - } else { - polyval_mul_non4k(op1, op2); - } + kernel_neon_begin(); + pmull_polyval_mul(op1, op2); + kernel_neon_end(); } static int polyval_arm64_setkey(struct crypto_shash *tfm, @@ -103,49 +91,27 @@ static int polyval_arm64_update(struct shash_desc *desc, { struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - u8 *pos; unsigned int nblocks; - unsigned int n; - - if (dctx->bytes) { - n = min(srclen, dctx->bytes); - pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes; - - dctx->bytes -= n; - srclen -= n; - while (n--) - *pos++ ^= *src++; - - if (!dctx->bytes) - internal_polyval_mul(dctx->buffer, - tctx->key_powers[NUM_KEY_POWERS-1]); - } - - while (srclen >= POLYVAL_BLOCK_SIZE) { + do { /* allow rescheduling every 4K bytes */ nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE; internal_polyval_update(tctx, src, nblocks, dctx->buffer); srclen -= nblocks * POLYVAL_BLOCK_SIZE; src += nblocks * POLYVAL_BLOCK_SIZE; - } + } while (srclen >= POLYVAL_BLOCK_SIZE); - if (srclen) { - dctx->bytes = POLYVAL_BLOCK_SIZE - srclen; - pos = dctx->buffer; - while (srclen--) - *pos++ ^= *src++; - } - - return 0; + return srclen; } -static int polyval_arm64_final(struct shash_desc *desc, u8 *dst) +static int polyval_arm64_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *dst) { struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - if (dctx->bytes) { + if (len) { + crypto_xor(dctx->buffer, src, len); internal_polyval_mul(dctx->buffer, tctx->key_powers[NUM_KEY_POWERS-1]); } @@ -159,13 +125,14 @@ static struct shash_alg polyval_alg = { .digestsize = POLYVAL_DIGEST_SIZE, .init = polyval_arm64_init, .update = polyval_arm64_update, - .final = polyval_arm64_final, + .finup = polyval_arm64_finup, .setkey = polyval_arm64_setkey, .descsize = sizeof(struct polyval_desc_ctx), .base = { .cra_name = "polyval", .cra_driver_name = "polyval-ce", .cra_priority = 200, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = POLYVAL_BLOCK_SIZE, .cra_ctxsize = sizeof(struct polyval_tfm_ctx), .cra_module = THIS_MODULE, diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index cbd14f208f83..65b6980817e5 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -7,14 +7,14 @@ #include <asm/neon.h> #include <asm/simd.h> -#include <linux/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <linux/cpufeature.h> -#include <linux/crypto.h> +#include <linux/kernel.h> #include <linux/module.h> +#include <linux/string.h> MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); @@ -56,79 +56,49 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data, { struct sha1_ce_state *sctx = shash_desc_ctx(desc); - if (!crypto_simd_usable()) - return crypto_sha1_update(desc, data, len); - sctx->finalize = 0; - sha1_base_do_update(desc, data, len, sha1_ce_transform); - - return 0; + return sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform); } static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct sha1_ce_state *sctx = shash_desc_ctx(desc); - bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE) && len; - - if (!crypto_simd_usable()) - return crypto_sha1_finup(desc, data, len, out); + bool finalized = false; /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. */ - sctx->finalize = finalize; - - sha1_base_do_update(desc, data, len, sha1_ce_transform); - if (!finalize) - sha1_base_do_finalize(desc, sha1_ce_transform); - return sha1_base_finish(desc, out); -} - -static int sha1_ce_final(struct shash_desc *desc, u8 *out) -{ - struct sha1_ce_state *sctx = shash_desc_ctx(desc); - - if (!crypto_simd_usable()) - return crypto_sha1_finup(desc, NULL, 0, out); - - sctx->finalize = 0; - sha1_base_do_finalize(desc, sha1_ce_transform); + if (len >= SHA1_BLOCK_SIZE) { + unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE); + + finalized = !remain; + sctx->finalize = finalized; + sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform); + data += len - remain; + len = remain; + } + if (!finalized) { + sctx->finalize = 0; + sha1_base_do_finup(desc, data, len, sha1_ce_transform); + } return sha1_base_finish(desc, out); } -static int sha1_ce_export(struct shash_desc *desc, void *out) -{ - struct sha1_ce_state *sctx = shash_desc_ctx(desc); - - memcpy(out, &sctx->sst, sizeof(struct sha1_state)); - return 0; -} - -static int sha1_ce_import(struct shash_desc *desc, const void *in) -{ - struct sha1_ce_state *sctx = shash_desc_ctx(desc); - - memcpy(&sctx->sst, in, sizeof(struct sha1_state)); - sctx->finalize = 0; - return 0; -} - static struct shash_alg alg = { .init = sha1_base_init, .update = sha1_ce_update, - .final = sha1_ce_final, .finup = sha1_ce_finup, - .import = sha1_ce_import, - .export = sha1_ce_export, .descsize = sizeof(struct sha1_ce_state), - .statesize = sizeof(struct sha1_state), + .statesize = SHA1_STATE_SIZE, .digestsize = SHA1_DIGEST_SIZE, .base = { .cra_name = "sha1", .cra_driver_name = "sha1-ce", .cra_priority = 200, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c deleted file mode 100644 index 6b4866a88ded..000000000000 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions - * - * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#include <asm/neon.h> -#include <asm/simd.h> -#include <linux/unaligned.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> -#include <crypto/sha2.h> -#include <crypto/sha256_base.h> -#include <linux/cpufeature.h> -#include <linux/crypto.h> -#include <linux/module.h> - -MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_ALIAS_CRYPTO("sha256"); - -struct sha256_ce_state { - struct sha256_state sst; - u32 finalize; -}; - -extern const u32 sha256_ce_offsetof_count; -extern const u32 sha256_ce_offsetof_finalize; - -asmlinkage int __sha256_ce_transform(struct sha256_ce_state *sst, u8 const *src, - int blocks); - -static void sha256_ce_transform(struct sha256_state *sst, u8 const *src, - int blocks) -{ - while (blocks) { - int rem; - - kernel_neon_begin(); - rem = __sha256_ce_transform(container_of(sst, - struct sha256_ce_state, - sst), src, blocks); - kernel_neon_end(); - src += (blocks - rem) * SHA256_BLOCK_SIZE; - blocks = rem; - } -} - -const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, - sst.count); -const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, - finalize); - -asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks); - -static void sha256_arm64_transform(struct sha256_state *sst, u8 const *src, - int blocks) -{ - sha256_block_data_order(sst->state, src, blocks); -} - -static int sha256_ce_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha256_ce_state *sctx = shash_desc_ctx(desc); - - if (!crypto_simd_usable()) - return sha256_base_do_update(desc, data, len, - sha256_arm64_transform); - - sctx->finalize = 0; - sha256_base_do_update(desc, data, len, sha256_ce_transform); - - return 0; -} - -static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct sha256_ce_state *sctx = shash_desc_ctx(desc); - bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE) && len; - - if (!crypto_simd_usable()) { - if (len) - sha256_base_do_update(desc, data, len, - sha256_arm64_transform); - sha256_base_do_finalize(desc, sha256_arm64_transform); - return sha256_base_finish(desc, out); - } - - /* - * Allow the asm code to perform the finalization if there is no - * partial data and the input is a round multiple of the block size. - */ - sctx->finalize = finalize; - - sha256_base_do_update(desc, data, len, sha256_ce_transform); - if (!finalize) - sha256_base_do_finalize(desc, sha256_ce_transform); - return sha256_base_finish(desc, out); -} - -static int sha256_ce_final(struct shash_desc *desc, u8 *out) -{ - struct sha256_ce_state *sctx = shash_desc_ctx(desc); - - if (!crypto_simd_usable()) { - sha256_base_do_finalize(desc, sha256_arm64_transform); - return sha256_base_finish(desc, out); - } - - sctx->finalize = 0; - sha256_base_do_finalize(desc, sha256_ce_transform); - return sha256_base_finish(desc, out); -} - -static int sha256_ce_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - sha256_base_init(desc); - return sha256_ce_finup(desc, data, len, out); -} - -static int sha256_ce_export(struct shash_desc *desc, void *out) -{ - struct sha256_ce_state *sctx = shash_desc_ctx(desc); - - memcpy(out, &sctx->sst, sizeof(struct sha256_state)); - return 0; -} - -static int sha256_ce_import(struct shash_desc *desc, const void *in) -{ - struct sha256_ce_state *sctx = shash_desc_ctx(desc); - - memcpy(&sctx->sst, in, sizeof(struct sha256_state)); - sctx->finalize = 0; - return 0; -} - -static struct shash_alg algs[] = { { - .init = sha224_base_init, - .update = sha256_ce_update, - .final = sha256_ce_final, - .finup = sha256_ce_finup, - .export = sha256_ce_export, - .import = sha256_ce_import, - .descsize = sizeof(struct sha256_ce_state), - .statesize = sizeof(struct sha256_state), - .digestsize = SHA224_DIGEST_SIZE, - .base = { - .cra_name = "sha224", - .cra_driver_name = "sha224-ce", - .cra_priority = 200, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .init = sha256_base_init, - .update = sha256_ce_update, - .final = sha256_ce_final, - .finup = sha256_ce_finup, - .digest = sha256_ce_digest, - .export = sha256_ce_export, - .import = sha256_ce_import, - .descsize = sizeof(struct sha256_ce_state), - .statesize = sizeof(struct sha256_state), - .digestsize = SHA256_DIGEST_SIZE, - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-ce", - .cra_priority = 200, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha2_ce_mod_init(void) -{ - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha2_ce_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_cpu_feature_match(SHA2, sha2_ce_mod_init); -module_exit(sha2_ce_mod_fini); diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c deleted file mode 100644 index 35356987cc1e..000000000000 --- a/arch/arm64/crypto/sha256-glue.c +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Linux/arm64 port of the OpenSSL SHA256 implementation for AArch64 - * - * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org> - */ - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> -#include <crypto/sha2.h> -#include <crypto/sha256_base.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/types.h> - -MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash for arm64"); -MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_ALIAS_CRYPTO("sha256"); - -asmlinkage void sha256_block_data_order(u32 *digest, const void *data, - unsigned int num_blks); -EXPORT_SYMBOL(sha256_block_data_order); - -static void sha256_arm64_transform(struct sha256_state *sst, u8 const *src, - int blocks) -{ - sha256_block_data_order(sst->state, src, blocks); -} - -asmlinkage void sha256_block_neon(u32 *digest, const void *data, - unsigned int num_blks); - -static void sha256_neon_transform(struct sha256_state *sst, u8 const *src, - int blocks) -{ - sha256_block_neon(sst->state, src, blocks); -} - -static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - return sha256_base_do_update(desc, data, len, sha256_arm64_transform); -} - -static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - if (len) - sha256_base_do_update(desc, data, len, sha256_arm64_transform); - sha256_base_do_finalize(desc, sha256_arm64_transform); - - return sha256_base_finish(desc, out); -} - -static int crypto_sha256_arm64_final(struct shash_desc *desc, u8 *out) -{ - return crypto_sha256_arm64_finup(desc, NULL, 0, out); -} - -static struct shash_alg algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = crypto_sha256_arm64_update, - .final = crypto_sha256_arm64_final, - .finup = crypto_sha256_arm64_finup, - .descsize = sizeof(struct sha256_state), - .base.cra_name = "sha256", - .base.cra_driver_name = "sha256-arm64", - .base.cra_priority = 125, - .base.cra_blocksize = SHA256_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = crypto_sha256_arm64_update, - .final = crypto_sha256_arm64_final, - .finup = crypto_sha256_arm64_finup, - .descsize = sizeof(struct sha256_state), - .base.cra_name = "sha224", - .base.cra_driver_name = "sha224-arm64", - .base.cra_priority = 125, - .base.cra_blocksize = SHA224_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -} }; - -static int sha256_update_neon(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - if (!crypto_simd_usable()) - return sha256_base_do_update(desc, data, len, - sha256_arm64_transform); - - while (len > 0) { - unsigned int chunk = len; - - /* - * Don't hog the CPU for the entire time it takes to process all - * input when running on a preemptible kernel, but process the - * data block by block instead. - */ - if (IS_ENABLED(CONFIG_PREEMPTION) && - chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE) - chunk = SHA256_BLOCK_SIZE - - sctx->count % SHA256_BLOCK_SIZE; - - kernel_neon_begin(); - sha256_base_do_update(desc, data, chunk, sha256_neon_transform); - kernel_neon_end(); - data += chunk; - len -= chunk; - } - return 0; -} - -static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - if (!crypto_simd_usable()) { - if (len) - sha256_base_do_update(desc, data, len, - sha256_arm64_transform); - sha256_base_do_finalize(desc, sha256_arm64_transform); - } else { - if (len) - sha256_update_neon(desc, data, len); - kernel_neon_begin(); - sha256_base_do_finalize(desc, sha256_neon_transform); - kernel_neon_end(); - } - return sha256_base_finish(desc, out); -} - -static int sha256_final_neon(struct shash_desc *desc, u8 *out) -{ - return sha256_finup_neon(desc, NULL, 0, out); -} - -static struct shash_alg neon_algs[] = { { - .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_base_init, - .update = sha256_update_neon, - .final = sha256_final_neon, - .finup = sha256_finup_neon, - .descsize = sizeof(struct sha256_state), - .base.cra_name = "sha256", - .base.cra_driver_name = "sha256-arm64-neon", - .base.cra_priority = 150, - .base.cra_blocksize = SHA256_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}, { - .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_base_init, - .update = sha256_update_neon, - .final = sha256_final_neon, - .finup = sha256_finup_neon, - .descsize = sizeof(struct sha256_state), - .base.cra_name = "sha224", - .base.cra_driver_name = "sha224-arm64-neon", - .base.cra_priority = 150, - .base.cra_blocksize = SHA224_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -} }; - -static int __init sha256_mod_init(void) -{ - int ret = crypto_register_shashes(algs, ARRAY_SIZE(algs)); - if (ret) - return ret; - - if (cpu_have_named_feature(ASIMD)) { - ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs)); - if (ret) - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); - } - return ret; -} - -static void __exit sha256_mod_fini(void) -{ - if (cpu_have_named_feature(ASIMD)) - crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs)); - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_init(sha256_mod_init); -module_exit(sha256_mod_fini); diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 5662c3ac49e9..b4f1001046c9 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -12,13 +12,13 @@ #include <asm/hwcap.h> #include <asm/neon.h> #include <asm/simd.h> -#include <linux/unaligned.h> #include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> #include <crypto/sha3.h> #include <linux/cpufeature.h> -#include <linux/crypto.h> +#include <linux/kernel.h> #include <linux/module.h> +#include <linux/string.h> +#include <linux/unaligned.h> MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); @@ -35,74 +35,55 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha3_state *sctx = shash_desc_ctx(desc); - unsigned int digest_size = crypto_shash_digestsize(desc->tfm); - - if (!crypto_simd_usable()) - return crypto_sha3_update(desc, data, len); - - if ((sctx->partial + len) >= sctx->rsiz) { - int blocks; - - if (sctx->partial) { - int p = sctx->rsiz - sctx->partial; - - memcpy(sctx->buf + sctx->partial, data, p); - kernel_neon_begin(); - sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size); - kernel_neon_end(); - - data += p; - len -= p; - sctx->partial = 0; - } - - blocks = len / sctx->rsiz; - len %= sctx->rsiz; - - while (blocks) { - int rem; - - kernel_neon_begin(); - rem = sha3_ce_transform(sctx->st, data, blocks, - digest_size); - kernel_neon_end(); - data += (blocks - rem) * sctx->rsiz; - blocks = rem; - } - } - - if (len) { - memcpy(sctx->buf + sctx->partial, data, len); - sctx->partial += len; - } - return 0; + struct crypto_shash *tfm = desc->tfm; + unsigned int bs, ds; + int blocks; + + ds = crypto_shash_digestsize(tfm); + bs = crypto_shash_blocksize(tfm); + blocks = len / bs; + len -= blocks * bs; + do { + int rem; + + kernel_neon_begin(); + rem = sha3_ce_transform(sctx->st, data, blocks, ds); + kernel_neon_end(); + data += (blocks - rem) * bs; + blocks = rem; + } while (blocks); + return len; } -static int sha3_final(struct shash_desc *desc, u8 *out) +static int sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len, + u8 *out) { struct sha3_state *sctx = shash_desc_ctx(desc); - unsigned int digest_size = crypto_shash_digestsize(desc->tfm); + struct crypto_shash *tfm = desc->tfm; __le64 *digest = (__le64 *)out; + u8 block[SHA3_224_BLOCK_SIZE]; + unsigned int bs, ds; int i; - if (!crypto_simd_usable()) - return crypto_sha3_final(desc, out); + ds = crypto_shash_digestsize(tfm); + bs = crypto_shash_blocksize(tfm); + memcpy(block, src, len); - sctx->buf[sctx->partial++] = 0x06; - memset(sctx->buf + sctx->partial, 0, sctx->rsiz - sctx->partial); - sctx->buf[sctx->rsiz - 1] |= 0x80; + block[len++] = 0x06; + memset(block + len, 0, bs - len); + block[bs - 1] |= 0x80; kernel_neon_begin(); - sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size); + sha3_ce_transform(sctx->st, block, 1, ds); kernel_neon_end(); + memzero_explicit(block , sizeof(block)); - for (i = 0; i < digest_size / 8; i++) + for (i = 0; i < ds / 8; i++) put_unaligned_le64(sctx->st[i], digest++); - if (digest_size & 4) + if (ds & 4) put_unaligned_le32(sctx->st[i], (__le32 *)digest); - memzero_explicit(sctx, sizeof(*sctx)); return 0; } @@ -110,10 +91,11 @@ static struct shash_alg algs[] = { { .digestsize = SHA3_224_DIGEST_SIZE, .init = crypto_sha3_init, .update = sha3_update, - .final = sha3_final, - .descsize = sizeof(struct sha3_state), + .finup = sha3_finup, + .descsize = SHA3_STATE_SIZE, .base.cra_name = "sha3-224", .base.cra_driver_name = "sha3-224-ce", + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = SHA3_224_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, @@ -121,10 +103,11 @@ static struct shash_alg algs[] = { { .digestsize = SHA3_256_DIGEST_SIZE, .init = crypto_sha3_init, .update = sha3_update, - .final = sha3_final, - .descsize = sizeof(struct sha3_state), + .finup = sha3_finup, + .descsize = SHA3_STATE_SIZE, .base.cra_name = "sha3-256", .base.cra_driver_name = "sha3-256-ce", + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = SHA3_256_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, @@ -132,10 +115,11 @@ static struct shash_alg algs[] = { { .digestsize = SHA3_384_DIGEST_SIZE, .init = crypto_sha3_init, .update = sha3_update, - .final = sha3_final, - .descsize = sizeof(struct sha3_state), + .finup = sha3_finup, + .descsize = SHA3_STATE_SIZE, .base.cra_name = "sha3-384", .base.cra_driver_name = "sha3-384-ce", + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = SHA3_384_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, @@ -143,10 +127,11 @@ static struct shash_alg algs[] = { { .digestsize = SHA3_512_DIGEST_SIZE, .init = crypto_sha3_init, .update = sha3_update, - .final = sha3_final, - .descsize = sizeof(struct sha3_state), + .finup = sha3_finup, + .descsize = SHA3_STATE_SIZE, .base.cra_name = "sha3-512", .base.cra_driver_name = "sha3-512-ce", + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = SHA3_512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index 071f64293227..6fb3001fa2c9 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -10,14 +10,11 @@ */ #include <asm/neon.h> -#include <asm/simd.h> -#include <linux/unaligned.h> #include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> #include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <linux/cpufeature.h> -#include <linux/crypto.h> +#include <linux/kernel.h> #include <linux/module.h> MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions"); @@ -29,12 +26,10 @@ MODULE_ALIAS_CRYPTO("sha512"); asmlinkage int __sha512_ce_transform(struct sha512_state *sst, u8 const *src, int blocks); -asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks); - static void sha512_ce_transform(struct sha512_state *sst, u8 const *src, int blocks) { - while (blocks) { + do { int rem; kernel_neon_begin(); @@ -42,67 +37,47 @@ static void sha512_ce_transform(struct sha512_state *sst, u8 const *src, kernel_neon_end(); src += (blocks - rem) * SHA512_BLOCK_SIZE; blocks = rem; - } -} - -static void sha512_arm64_transform(struct sha512_state *sst, u8 const *src, - int blocks) -{ - sha512_block_data_order(sst->state, src, blocks); + } while (blocks); } static int sha512_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - sha512_block_fn *fn = crypto_simd_usable() ? sha512_ce_transform - : sha512_arm64_transform; - - sha512_base_do_update(desc, data, len, fn); - return 0; + return sha512_base_do_update_blocks(desc, data, len, + sha512_ce_transform); } static int sha512_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - sha512_block_fn *fn = crypto_simd_usable() ? sha512_ce_transform - : sha512_arm64_transform; - - sha512_base_do_update(desc, data, len, fn); - sha512_base_do_finalize(desc, fn); - return sha512_base_finish(desc, out); -} - -static int sha512_ce_final(struct shash_desc *desc, u8 *out) -{ - sha512_block_fn *fn = crypto_simd_usable() ? sha512_ce_transform - : sha512_arm64_transform; - - sha512_base_do_finalize(desc, fn); + sha512_base_do_finup(desc, data, len, sha512_ce_transform); return sha512_base_finish(desc, out); } static struct shash_alg algs[] = { { .init = sha384_base_init, .update = sha512_ce_update, - .final = sha512_ce_final, .finup = sha512_ce_finup, - .descsize = sizeof(struct sha512_state), + .descsize = SHA512_STATE_SIZE, .digestsize = SHA384_DIGEST_SIZE, .base.cra_name = "sha384", .base.cra_driver_name = "sha384-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .base.cra_blocksize = SHA512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { .init = sha512_base_init, .update = sha512_ce_update, - .final = sha512_ce_final, .finup = sha512_ce_finup, - .descsize = sizeof(struct sha512_state), + .descsize = SHA512_STATE_SIZE, .digestsize = SHA512_DIGEST_SIZE, .base.cra_name = "sha512", .base.cra_driver_name = "sha512-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .base.cra_blocksize = SHA512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c index 62f129dea83d..15aa9d8b7b2c 100644 --- a/arch/arm64/crypto/sha512-glue.c +++ b/arch/arm64/crypto/sha512-glue.c @@ -6,11 +6,10 @@ */ #include <crypto/internal/hash.h> -#include <linux/types.h> -#include <linux/string.h> #include <crypto/sha2.h> #include <crypto/sha512_base.h> -#include <asm/neon.h> +#include <linux/kernel.h> +#include <linux/module.h> MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash for arm64"); MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>"); @@ -19,59 +18,53 @@ MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("sha384"); MODULE_ALIAS_CRYPTO("sha512"); -asmlinkage void sha512_block_data_order(u64 *digest, const void *data, - unsigned int num_blks); -EXPORT_SYMBOL(sha512_block_data_order); +asmlinkage void sha512_blocks_arch(u64 *digest, const void *data, + unsigned int num_blks); static void sha512_arm64_transform(struct sha512_state *sst, u8 const *src, int blocks) { - sha512_block_data_order(sst->state, src, blocks); + sha512_blocks_arch(sst->state, src, blocks); } static int sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha512_base_do_update(desc, data, len, sha512_arm64_transform); + return sha512_base_do_update_blocks(desc, data, len, + sha512_arm64_transform); } static int sha512_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (len) - sha512_base_do_update(desc, data, len, sha512_arm64_transform); - sha512_base_do_finalize(desc, sha512_arm64_transform); - + sha512_base_do_finup(desc, data, len, sha512_arm64_transform); return sha512_base_finish(desc, out); } -static int sha512_final(struct shash_desc *desc, u8 *out) -{ - return sha512_finup(desc, NULL, 0, out); -} - static struct shash_alg algs[] = { { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_base_init, .update = sha512_update, - .final = sha512_final, .finup = sha512_finup, - .descsize = sizeof(struct sha512_state), + .descsize = SHA512_STATE_SIZE, .base.cra_name = "sha512", .base.cra_driver_name = "sha512-arm64", .base.cra_priority = 150, + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .base.cra_blocksize = SHA512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { .digestsize = SHA384_DIGEST_SIZE, .init = sha384_base_init, .update = sha512_update, - .final = sha512_final, .finup = sha512_finup, - .descsize = sizeof(struct sha512_state), + .descsize = SHA512_STATE_SIZE, .base.cra_name = "sha384", .base.cra_driver_name = "sha384-arm64", .base.cra_priority = 150, + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .base.cra_blocksize = SHA384_BLOCK_SIZE, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index 1a71788c4cda..eac6f5fa0abe 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -6,14 +6,11 @@ */ #include <asm/neon.h> -#include <asm/simd.h> -#include <linux/unaligned.h> #include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> #include <linux/cpufeature.h> -#include <linux/crypto.h> +#include <linux/kernel.h> #include <linux/module.h> MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions"); @@ -26,50 +23,20 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src, static int sm3_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - if (!crypto_simd_usable()) { - sm3_update(shash_desc_ctx(desc), data, len); - return 0; - } + int remain; kernel_neon_begin(); - sm3_base_do_update(desc, data, len, sm3_ce_transform); + remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); kernel_neon_end(); - - return 0; -} - -static int sm3_ce_final(struct shash_desc *desc, u8 *out) -{ - if (!crypto_simd_usable()) { - sm3_final(shash_desc_ctx(desc), out); - return 0; - } - - kernel_neon_begin(); - sm3_base_do_finalize(desc, sm3_ce_transform); - kernel_neon_end(); - - return sm3_base_finish(desc, out); + return remain; } static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!crypto_simd_usable()) { - struct sm3_state *sctx = shash_desc_ctx(desc); - - if (len) - sm3_update(sctx, data, len); - sm3_final(sctx, out); - return 0; - } - kernel_neon_begin(); - if (len) - sm3_base_do_update(desc, data, len, sm3_ce_transform); - sm3_base_do_finalize(desc, sm3_ce_transform); + sm3_base_do_finup(desc, data, len, sm3_ce_transform); kernel_neon_end(); - return sm3_base_finish(desc, out); } @@ -77,11 +44,12 @@ static struct shash_alg sm3_alg = { .digestsize = SM3_DIGEST_SIZE, .init = sm3_base_init, .update = sm3_ce_update, - .final = sm3_ce_final, .finup = sm3_ce_finup, - .descsize = sizeof(struct sm3_state), + .descsize = SM3_STATE_SIZE, .base.cra_name = "sm3", .base.cra_driver_name = "sm3-ce", + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .base.cra_blocksize = SM3_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 400, diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c index 8dd71ce79b69..6c4611a503a3 100644 --- a/arch/arm64/crypto/sm3-neon-glue.c +++ b/arch/arm64/crypto/sm3-neon-glue.c @@ -6,14 +6,11 @@ */ #include <asm/neon.h> -#include <asm/simd.h> -#include <linux/unaligned.h> #include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> #include <linux/cpufeature.h> -#include <linux/crypto.h> +#include <linux/kernel.h> #include <linux/module.h> @@ -23,50 +20,20 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src, static int sm3_neon_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - if (!crypto_simd_usable()) { - sm3_update(shash_desc_ctx(desc), data, len); - return 0; - } + int remain; kernel_neon_begin(); - sm3_base_do_update(desc, data, len, sm3_neon_transform); + remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform); kernel_neon_end(); - - return 0; -} - -static int sm3_neon_final(struct shash_desc *desc, u8 *out) -{ - if (!crypto_simd_usable()) { - sm3_final(shash_desc_ctx(desc), out); - return 0; - } - - kernel_neon_begin(); - sm3_base_do_finalize(desc, sm3_neon_transform); - kernel_neon_end(); - - return sm3_base_finish(desc, out); + return remain; } static int sm3_neon_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!crypto_simd_usable()) { - struct sm3_state *sctx = shash_desc_ctx(desc); - - if (len) - sm3_update(sctx, data, len); - sm3_final(sctx, out); - return 0; - } - kernel_neon_begin(); - if (len) - sm3_base_do_update(desc, data, len, sm3_neon_transform); - sm3_base_do_finalize(desc, sm3_neon_transform); + sm3_base_do_finup(desc, data, len, sm3_neon_transform); kernel_neon_end(); - return sm3_base_finish(desc, out); } @@ -74,11 +41,12 @@ static struct shash_alg sm3_alg = { .digestsize = SM3_DIGEST_SIZE, .init = sm3_base_init, .update = sm3_neon_update, - .final = sm3_neon_final, .finup = sm3_neon_finup, - .descsize = sizeof(struct sm3_state), + .descsize = SM3_STATE_SIZE, .base.cra_name = "sm3", .base.cra_driver_name = "sm3-neon", + .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .base.cra_blocksize = SM3_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .base.cra_priority = 200, diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 43741bed874e..7a60e7b559dc 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -8,19 +8,18 @@ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ -#include <linux/module.h> -#include <linux/crypto.h> -#include <linux/kernel.h> -#include <linux/cpufeature.h> #include <asm/neon.h> -#include <asm/simd.h> #include <crypto/b128ops.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> #include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> -#include <crypto/xts.h> #include <crypto/sm4.h> +#include <crypto/utils.h> +#include <crypto/xts.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> #define BYTES2BLKS(nbytes) ((nbytes) >> 4) @@ -64,7 +63,6 @@ struct sm4_mac_tfm_ctx { }; struct sm4_mac_desc_ctx { - unsigned int len; u8 digest[SM4_BLOCK_SIZE]; }; @@ -591,8 +589,6 @@ static int sm4_mac_init(struct shash_desc *desc) struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); memset(ctx->digest, 0, SM4_BLOCK_SIZE); - ctx->len = 0; - return 0; } @@ -601,87 +597,50 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p, { struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); - unsigned int l, nblocks; - - if (len == 0) - return 0; - - if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) { - l = min(len, SM4_BLOCK_SIZE - ctx->len); - - crypto_xor(ctx->digest + ctx->len, p, l); - ctx->len += l; - len -= l; - p += l; - } - - if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) { - kernel_neon_begin(); - - if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) { - sm4_ce_crypt_block(tctx->key.rkey_enc, - ctx->digest, ctx->digest); - ctx->len = 0; - } else { - nblocks = len / SM4_BLOCK_SIZE; - len %= SM4_BLOCK_SIZE; + unsigned int nblocks = len / SM4_BLOCK_SIZE; - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, - nblocks, (ctx->len == SM4_BLOCK_SIZE), - (len != 0)); - - p += nblocks * SM4_BLOCK_SIZE; - - if (len == 0) - ctx->len = SM4_BLOCK_SIZE; - } - - kernel_neon_end(); - - if (len) { - crypto_xor(ctx->digest, p, len); - ctx->len = len; - } - } - - return 0; + len %= SM4_BLOCK_SIZE; + kernel_neon_begin(); + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, + nblocks, false, true); + kernel_neon_end(); + return len; } -static int sm4_cmac_final(struct shash_desc *desc, u8 *out) +static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); const u8 *consts = tctx->consts; - if (ctx->len != SM4_BLOCK_SIZE) { - ctx->digest[ctx->len] ^= 0x80; + crypto_xor(ctx->digest, src, len); + if (len != SM4_BLOCK_SIZE) { + ctx->digest[len] ^= 0x80; consts += SM4_BLOCK_SIZE; } - kernel_neon_begin(); sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, false, true); kernel_neon_end(); - memcpy(out, ctx->digest, SM4_BLOCK_SIZE); - return 0; } -static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out) +static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); - if (ctx->len) { + if (len) { + crypto_xor(ctx->digest, src, len); kernel_neon_begin(); sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, ctx->digest); kernel_neon_end(); } - memcpy(out, ctx->digest, SM4_BLOCK_SIZE); - return 0; } @@ -691,6 +650,8 @@ static struct shash_alg sm4_mac_algs[] = { .cra_name = "cmac(sm4)", .cra_driver_name = "cmac-sm4-ce", .cra_priority = 400, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINAL_NONZERO, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx) + SM4_BLOCK_SIZE * 2, @@ -699,7 +660,7 @@ static struct shash_alg sm4_mac_algs[] = { .digestsize = SM4_BLOCK_SIZE, .init = sm4_mac_init, .update = sm4_mac_update, - .final = sm4_cmac_final, + .finup = sm4_cmac_finup, .setkey = sm4_cmac_setkey, .descsize = sizeof(struct sm4_mac_desc_ctx), }, { @@ -707,6 +668,8 @@ static struct shash_alg sm4_mac_algs[] = { .cra_name = "xcbc(sm4)", .cra_driver_name = "xcbc-sm4-ce", .cra_priority = 400, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINAL_NONZERO, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx) + SM4_BLOCK_SIZE * 2, @@ -715,7 +678,7 @@ static struct shash_alg sm4_mac_algs[] = { .digestsize = SM4_BLOCK_SIZE, .init = sm4_mac_init, .update = sm4_mac_update, - .final = sm4_cmac_final, + .finup = sm4_cmac_finup, .setkey = sm4_xcbc_setkey, .descsize = sizeof(struct sm4_mac_desc_ctx), }, { @@ -723,14 +686,15 @@ static struct shash_alg sm4_mac_algs[] = { .cra_name = "cbcmac(sm4)", .cra_driver_name = "cbcmac-sm4-ce", .cra_priority = 400, - .cra_blocksize = 1, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, + .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx), .cra_module = THIS_MODULE, }, .digestsize = SM4_BLOCK_SIZE, .init = sm4_mac_init, .update = sm4_mac_update, - .final = sm4_cbcmac_final, + .finup = sm4_cbcmac_finup, .setkey = sm4_cbcmac_setkey, .descsize = sizeof(struct sm4_mac_desc_ctx), } diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index d1cc0571798b..dffff6763812 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -81,6 +81,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D @@ -168,6 +169,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ebceaae3c749..d40e427ddad9 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -52,7 +52,7 @@ mrs x0, id_aa64mmfr1_el1 ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ - mov_q x0, HCRX_HOST_FLAGS + mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) /* Enable GCS if supported */ mrs_s x1, SYS_ID_AA64PFR1_EL1 diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d1b1a33f9a8b..e4f77757937e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -121,6 +121,15 @@ #define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +#define ESR_ELx_FSC_ADDRSZ (0x00) + +/* + * Annoyingly, the negative levels for Address size faults aren't laid out + * contiguously (or in the desired order) + */ +#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C) +#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \ + (ESR_ELx_FSC_ADDRSZ + (n))) /* Status codes for individual page table levels */ #define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) @@ -161,8 +170,6 @@ #define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) /* ISS field definitions for exceptions taken in to Hyp */ -#define ESR_ELx_FSC_ADDRSZ (0x00) -#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -464,6 +471,39 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) (esr == ESR_ELx_FSC_ACCESS_L(0)); } +static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) +{ + esr &= ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(2)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(1)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(0)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(-1)); +} + +static inline bool esr_fsc_is_sea_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SEA_TTW(3)) || + (esr == ESR_ELx_FSC_SEA_TTW(2)) || + (esr == ESR_ELx_FSC_SEA_TTW(1)) || + (esr == ESR_ELx_FSC_SEA_TTW(0)) || + (esr == ESR_ELx_FSC_SEA_TTW(-1)); +} + +static inline bool esr_fsc_is_secc_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SECC_TTW(3)) || + (esr == ESR_ELx_FSC_SECC_TTW(2)) || + (esr == ESR_ELx_FSC_SECC_TTW(1)) || + (esr == ESR_ELx_FSC_SECC_TTW(0)) || + (esr == ESR_ELx_FSC_SECC_TTW(-1)); +} + /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ static inline bool esr_iss_is_eretax(unsigned long esr) { diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 39577f1d079a..18c7811774d3 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -706,6 +706,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, enum aarch64_insn_system_register sysreg); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 974d72b5905b..e9c8a581e16f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -100,9 +100,8 @@ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) -#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) -#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) #define MPAMHCR_HOST_FLAGS 0 /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d7cf66573aca..bd020fc28aa9 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -305,7 +305,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) { - return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; + u64 hpfar = vcpu->arch.fault.hpfar_el2; + + if (unlikely(!(hpfar & HPFAR_EL2_NS))) + return INVALID_GPA; + + return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; } static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e98cfe7855a6..08ba91e6fb03 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1588,4 +1588,9 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1poe(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h index 87e10d9a635b..9398ade632aa 100644 --- a/arch/arm64/include/asm/kvm_ras.h +++ b/arch/arm64/include/asm/kvm_ras.h @@ -14,7 +14,7 @@ * Was this synchronous external abort a RAS notification? * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. */ -static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr) +static inline int kvm_handle_guest_sea(void) { /* apei_claim_sea(NULL) expects to mask interrupts itself */ lockdep_assert_irqs_enabled(); diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 30a29e88994b..6e8aa8e72601 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -94,17 +94,6 @@ static inline bool kaslr_requires_kpti(void) return false; } - /* - * Systems affected by Cavium erratum 24756 are incompatible - * with KPTI. - */ - if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { - extern const struct midr_range cavium_erratum_27456_cpus[]; - - if (is_midr_in_range_list(cavium_erratum_27456_cpus)) - return false; - } - return true; } diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h index 5b80785324b6..9ea0a74e5892 100644 --- a/arch/arm64/include/asm/rqspinlock.h +++ b/arch/arm64/include/asm/rqspinlock.h @@ -86,7 +86,7 @@ #endif -#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) +#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) #include <asm-generic/rqspinlock.h> diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index f1524cdeacf1..8fef12626090 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,6 +97,9 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +extern bool __nospectre_bhb; +u8 get_spectre_bhb_loop_value(void); +bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 92a2b59a9f3d..3322c7047d84 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -99,6 +99,19 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return res; } +#if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) +{ + const struct vdso_time_data *ret = &vdso_u_time_data; + + /* Work around invalid absolute relocations */ + OPTIMIZER_HIDE_VAR(ret); + + return ret; +} +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data +#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */ + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b55f5f705750..6b0ad5070d3e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -335,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 -const struct midr_range cavium_erratum_27456_cpus[] = { +static const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), /* Cavium ThunderX, T81 pass 1.0 */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9c4d6d552b25..4c46d80aa64b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -114,7 +114,14 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NC DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); -bool arm64_use_ng_mappings = false; +/* + * arm64_use_ng_mappings must be placed in the .data section, otherwise it + * ends up in the .bss section where it is initialized in early_map_kernel() + * after the MMU (with the idmap) was enabled. create_init_idmap() - which + * runs before early_map_kernel() and reads the variable via PTE_MAYBE_NG - + * may end up generating an incorrect idmap page table attributes. + */ +bool arm64_use_ng_mappings __read_mostly = false; EXPORT_SYMBOL(arm64_use_ng_mappings); DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 5e3c4b58f279..2004b4f41ade 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -47,10 +47,6 @@ PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override); PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override); PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override); PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings); -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus); -PROVIDE(__pi_is_midr_in_range_list = is_midr_in_range_list); -#endif PROVIDE(__pi__ctype = _ctype); PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e57b043f324b..c6650cfe706c 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -207,6 +207,29 @@ static void __init map_fdt(u64 fdt) dsb(ishst); } +/* + * PI version of the Cavium Eratum 27456 detection, which makes it + * impossible to use non-global mappings. + */ +static bool __init ng_mappings_allowed(void) +{ + static const struct midr_range cavium_erratum_27456_cpus[] __initconst = { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), + /* Cavium ThunderX, T81 pass 1.0 */ + MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), + {}, + }; + + for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) { + if (midr_is_cpu_model_range(read_cpuid_id(), r->model, + r->rv_min, r->rv_max)) + return false; + } + + return true; +} + asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) { static char const chosen_str[] __initconst = "/chosen"; @@ -246,7 +269,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) u64 kaslr_seed = kaslr_early_init(fdt, chosen); if (kaslr_seed && kaslr_requires_kpti()) - arm64_use_ng_mappings = true; + arm64_use_ng_mappings = ng_mappings_allowed(); kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1); } diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index b198dde79e59..edf1783ffc81 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -879,16 +879,19 @@ static u8 spectre_bhb_loop_affected(void) static const struct midr_range spectre_bhb_k132_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, }; static const struct midr_range spectre_bhb_k38_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + {}, }; static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), @@ -997,6 +1000,11 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, return true; } +u8 get_spectre_bhb_loop_value(void) +{ + return max_bhb_k; +} + static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) { const char *v = arm64_get_bp_hardening_vector(slot); @@ -1014,7 +1022,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) isb(); } -static bool __read_mostly __nospectre_bhb; +bool __read_mostly __nospectre_bhb; static int __init parse_spectre_bhb_param(char *str) { __nospectre_bhb = true; @@ -1092,6 +1100,11 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) update_mitigation_state(&spectre_bhb_state, state); } +bool is_spectre_bhb_fw_mitigated(void) +{ + return test_bit(BHB_FW, &system_bhb_mitigations); +} + /* Patched to NOP when enabled */ void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 68fec8c95fee..19ca57def629 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2743,11 +2743,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } -bool kvm_arch_has_irq_bypass(void) -{ - return true; -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 17df94570f03..fc573fc767b0 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -12,6 +12,16 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> +static inline bool __fault_safe_to_translate(u64 esr) +{ + u64 fsc = esr & ESR_ELx_FSC; + + if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr)) + return false; + + return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV)); +} + static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) { int ret; @@ -44,34 +54,50 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) return true; } -static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +/* + * Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR. + */ +static inline bool __hpfar_valid(u64 esr) { - u64 hpfar, far; - - far = read_sysreg_el2(SYS_FAR); - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 + * CPUs affected by ARM erratum #834220 may incorrectly report a + * stage-2 translation fault when a stage-1 permission fault occurs. * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. + * Re-walk the page tables to determine if a stage-1 fault actually + * occurred. */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - esr_fsc_is_permission_fault(esr))) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { + if (cpus_have_final_cap(ARM64_WORKAROUND_834220) && + esr_fsc_is_translation_fault(esr)) + return false; + + if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr)) + return true; + + if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr)) + return true; + + return esr_fsc_is_addr_sz_fault(esr); +} + +static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +{ + u64 hpfar; + + fault->far_el2 = read_sysreg_el2(SYS_FAR); + fault->hpfar_el2 = 0; + + if (__hpfar_valid(esr)) hpfar = read_sysreg(hpfar_el2); - } + else if (unlikely(!__fault_safe_to_translate(esr))) + return true; + else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar)) + return false; - fault->far_el2 = far; - fault->hpfar_el2 = hpfar; + /* + * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid + * HPFAR value. + */ + fault->hpfar_el2 = hpfar | HPFAR_EL2_NS; return true; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b741ea6aefa5..96f625dc7256 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -235,6 +235,8 @@ static inline void __deactivate_traps_mpam(void) static inline void __activate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); @@ -245,11 +247,8 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * EL1 instead of being trapped to EL2. */ if (system_supports_pmuv3()) { - struct kvm_cpu_context *hctxt; - write_sysreg(0, pmselr_el0); - hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -269,6 +268,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) hcrx &= ~clr; } + ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2); write_sysreg_s(hcrx, SYS_HCRX_EL2); } @@ -278,19 +278,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); if (system_supports_pmuv3()) { - struct kvm_cpu_context *hctxt; - - hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2); __deactivate_traps_hfgxtr(vcpu); __deactivate_traps_mpam(); diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index e433dfab882a..3369dd0c4009 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_ffa_version = ffa_req_version; } - if (hyp_ffa_post_init()) + if (hyp_ffa_post_init()) { res->a0 = FFA_RET_NOT_SUPPORTED; - else { - has_version_negotiated = true; + } else { + smp_store_release(&has_version_negotiated, true); res->a0 = hyp_ffa_version; } unlock: @@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) if (!is_ffa_call(func_id)) return false; - if (!has_version_negotiated && func_id != FFA_VERSION) { + if (func_id != FFA_VERSION && + !smp_load_acquire(&has_version_negotiated)) { ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS); goto out_handled; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index f34f11c720d7..e80f3ebd3e2a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -503,7 +503,7 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { int ret; - if (!addr_is_memory(addr)) + if (!range_is_memory(addr, addr + size)) return -EPERM; ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, @@ -578,7 +578,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) return; } - addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; + + /* + * Yikes, we couldn't resolve the fault IPA. This should reinject an + * abort into the host when we figure out how to do that. + */ + BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS)); + addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; + ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index ed363aa3027e..50aa8dbcae75 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -429,23 +429,27 @@ u64 __vgic_v3_get_gic_config(void) /* * To check whether we have a MMIO-based (GICv2 compatible) * CPU interface, we need to disable the system register - * view. To do that safely, we have to prevent any interrupt - * from firing (which would be deadly). + * view. * - * Note that this only makes sense on VHE, as interrupts are - * already masked for nVHE as part of the exception entry to - * EL2. - */ - if (has_vhe()) - flags = local_daif_save(); - - /* * Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates * that to be able to set ICC_SRE_EL1.SRE to 0, all the * interrupt overrides must be set. You've got to love this. + * + * As we always run VHE with HCR_xMO set, no extra xMO + * manipulation is required in that case. + * + * To safely disable SRE, we have to prevent any interrupt + * from firing (which would be deadly). This only makes sense + * on VHE, as interrupts are already masked for nVHE as part + * of the exception entry to EL2. */ - sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); - isb(); + if (has_vhe()) { + flags = local_daif_save(); + } else { + sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); + isb(); + } + write_gicreg(0, ICC_SRE_EL1); isb(); @@ -453,11 +457,13 @@ u64 __vgic_v3_get_gic_config(void) write_gicreg(sre, ICC_SRE_EL1); isb(); - sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); - isb(); - if (has_vhe()) + if (has_vhe()) { local_daif_restore(flags); + } else { + sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); + isb(); + } val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63); val |= read_gicreg(ICH_VTR_EL2); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2feb6c6b63af..eeda92330ade 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1501,6 +1501,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + if (!is_protected_kvm_enabled()) + memcache = &vcpu->arch.mmu_page_cache; + else + memcache = &vcpu->arch.pkvm_memcache; + /* * Permission faults just need to update the existing leaf entry, * and so normally don't require allocations from the memcache. The @@ -1510,13 +1515,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!fault_is_perm || (logging_active && write_fault)) { int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); - if (!is_protected_kvm_enabled()) { - memcache = &vcpu->arch.mmu_page_cache; + if (!is_protected_kvm_enabled()) ret = kvm_mmu_topup_memory_cache(memcache, min_pages); - } else { - memcache = &vcpu->arch.pkvm_memcache; + else ret = topup_hyp_memcache(memcache, min_pages); - } + if (ret) return ret; } @@ -1794,9 +1797,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; + /* Synchronous External Abort? */ + if (kvm_vcpu_abt_issea(vcpu)) { + /* + * For RAS the host kernel may handle this abort. + * There is no need to pass the error into the guest. + */ + if (kvm_handle_guest_sea()) + kvm_inject_vabt(vcpu); + + return 1; + } + esr = kvm_vcpu_get_esr(vcpu); + /* + * The fault IPA should be reliable at this point as we're not dealing + * with an SEA. + */ ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm)) + return -EFAULT; + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (esr_fsc_is_translation_fault(esr)) { @@ -1818,18 +1840,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } } - /* Synchronous External Abort? */ - if (kvm_vcpu_abt_issea(vcpu)) { - /* - * For RAS the host kernel may handle this abort. - * There is no need to pass the error into the guest. - */ - if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) - kvm_inject_vabt(vcpu); - - return 1; - } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 005ad28f7306..5dde9285afc8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1945,6 +1945,12 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, if ((hw_val & mpam_mask) == (user_val & mpam_mask)) user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + /* Fail the guest's request to disable the AA64 ISA at EL{0,1,2} */ + if (!FIELD_GET(ID_AA64PFR0_EL1_EL0, user_val) || + !FIELD_GET(ID_AA64PFR0_EL1_EL1, user_val) || + (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val))) + return -EINVAL; + return set_id_reg(vcpu, rd, user_val); } diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 4d49dff721a8..027bfa9689c6 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 + +obj-y += crypto/ + lib-y := clear_user.o delay.o copy_from_user.o \ copy_to_user.o copy_page.o \ clear_page.o csum.o insn.o memchr.o memcpy.o \ @@ -14,10 +17,10 @@ endif lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o -crc32-arm64-y := crc32.o crc32-glue.o +crc32-arm64-y := crc32.o crc32-core.o obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm64.o -crc-t10dif-arm64-y := crc-t10dif-glue.o crc-t10dif-core.o +crc-t10dif-arm64-y := crc-t10dif.o crc-t10dif-core.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/arm64/lib/crc-t10dif-glue.c b/arch/arm64/lib/crc-t10dif.c index bacd18f23168..c2ffe4fdb59d 100644 --- a/arch/arm64/lib/crc-t10dif-glue.c +++ b/arch/arm64/lib/crc-t10dif.c @@ -17,8 +17,8 @@ #include <asm/neon.h> #include <asm/simd.h> -static DEFINE_STATIC_KEY_FALSE(have_asimd); -static DEFINE_STATIC_KEY_FALSE(have_pmull); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull); #define CRC_T10DIF_PMULL_CHUNK_SIZE 16U @@ -61,7 +61,7 @@ static int __init crc_t10dif_arm64_init(void) } return 0; } -arch_initcall(crc_t10dif_arm64_init); +subsys_initcall(crc_t10dif_arm64_init); static void __exit crc_t10dif_arm64_exit(void) { diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32-core.S index 68825317460f..68825317460f 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32-core.S diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32.c index ed3acd71178f..ed3acd71178f 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32.c diff --git a/arch/arm64/lib/crypto/.gitignore b/arch/arm64/lib/crypto/.gitignore new file mode 100644 index 000000000000..12d74d8b03d0 --- /dev/null +++ b/arch/arm64/lib/crypto/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +poly1305-core.S +sha256-core.S diff --git a/arch/arm64/lib/crypto/Kconfig b/arch/arm64/lib/crypto/Kconfig new file mode 100644 index 000000000000..129a7685cb4c --- /dev/null +++ b/arch/arm64/lib/crypto/Kconfig @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config CRYPTO_CHACHA20_NEON + tristate + depends on KERNEL_MODE_NEON + default CRYPTO_LIB_CHACHA + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + +config CRYPTO_POLY1305_NEON + tristate + depends on KERNEL_MODE_NEON + default CRYPTO_LIB_POLY1305 + select CRYPTO_ARCH_HAVE_LIB_POLY1305 + +config CRYPTO_SHA256_ARM64 + tristate + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD diff --git a/arch/arm64/lib/crypto/Makefile b/arch/arm64/lib/crypto/Makefile new file mode 100644 index 000000000000..946c09903711 --- /dev/null +++ b/arch/arm64/lib/crypto/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o +chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o + +obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o +poly1305-neon-y := poly1305-core.o poly1305-glue.o +AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_block_init_arch +AFLAGS_poly1305-core.o += -Dpoly1305_emit=poly1305_emit_arch + +obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o +sha256-arm64-y := sha256.o sha256-core.o +sha256-arm64-$(CONFIG_KERNEL_MODE_NEON) += sha256-ce.o + +quiet_cmd_perlasm = PERLASM $@ + cmd_perlasm = $(PERL) $(<) void $(@) + +$(obj)/%-core.S: $(src)/%-armv8.pl + $(call cmd,perlasm) + +$(obj)/sha256-core.S: $(src)/sha2-armv8.pl + $(call cmd,perlasm) + +clean-files += poly1305-core.S sha256-core.S diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/lib/crypto/chacha-neon-core.S index b70ac76f2610..80079586ecc7 100644 --- a/arch/arm64/crypto/chacha-neon-core.S +++ b/arch/arm64/lib/crypto/chacha-neon-core.S @@ -1,5 +1,5 @@ /* - * ChaCha/XChaCha NEON helper functions + * ChaCha/HChaCha NEON helper functions * * Copyright (C) 2016-2018 Linaro, Ltd. <ard.biesheuvel@linaro.org> * diff --git a/arch/arm64/lib/crypto/chacha-neon-glue.c b/arch/arm64/lib/crypto/chacha-neon-glue.c new file mode 100644 index 000000000000..d0188f974ca5 --- /dev/null +++ b/arch/arm64/lib/crypto/chacha-neon-glue.c @@ -0,0 +1,119 @@ +/* + * ChaCha and HChaCha functions (ARM64 optimized) + * + * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <crypto/chacha.h> +#include <crypto/internal/simd.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds); +asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, + int nrounds, int bytes); +asmlinkage void hchacha_block_neon(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, + int bytes, int nrounds) +{ + while (bytes > 0) { + int l = min(bytes, CHACHA_BLOCK_SIZE * 5); + + if (l <= CHACHA_BLOCK_SIZE) { + u8 buf[CHACHA_BLOCK_SIZE]; + + memcpy(buf, src, l); + chacha_block_xor_neon(state, buf, buf, nrounds); + memcpy(dst, buf, l); + state->x[12] += 1; + break; + } + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + } +} + +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { + hchacha_block_generic(state, out, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, out, nrounds); + kernel_neon_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || + !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +bool chacha_is_arch_optimized(void) +{ + return static_key_enabled(&have_neon); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + +static int __init chacha_simd_mod_init(void) +{ + if (cpu_have_named_feature(ASIMD)) + static_branch_enable(&have_neon); + return 0; +} +subsys_initcall(chacha_simd_mod_init); + +static void __exit chacha_simd_mod_exit(void) +{ +} +module_exit(chacha_simd_mod_exit); + +MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM64 optimized)"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/lib/crypto/poly1305-armv8.pl index 22c9069c0650..22c9069c0650 100644 --- a/arch/arm64/crypto/poly1305-armv8.pl +++ b/arch/arm64/lib/crypto/poly1305-armv8.pl diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c new file mode 100644 index 000000000000..6a661cf04821 --- /dev/null +++ b/arch/arm64/lib/crypto/poly1305-glue.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 + * + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <crypto/internal/poly1305.h> +#include <linux/cpufeature.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/unaligned.h> + +asmlinkage void poly1305_block_init_arch( + struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +EXPORT_SYMBOL_GPL(poly1305_emit_arch); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + len = round_down(len, POLY1305_BLOCK_SIZE); + if (static_branch_likely(&have_neon)) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, 1); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks(state, src, len, 1); +} +EXPORT_SYMBOL_GPL(poly1305_blocks_arch); + +bool poly1305_is_arch_optimized(void) +{ + /* We always can use at least the ARM64 scalar implementation. */ + return true; +} +EXPORT_SYMBOL(poly1305_is_arch_optimized); + +static int __init neon_poly1305_mod_init(void) +{ + if (cpu_have_named_feature(ASIMD)) + static_branch_enable(&have_neon); + return 0; +} +subsys_initcall(neon_poly1305_mod_init); + +static void __exit neon_poly1305_mod_exit(void) +{ +} +module_exit(neon_poly1305_mod_exit); + +MODULE_DESCRIPTION("Poly1305 authenticator (ARM64 optimized)"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/lib/crypto/sha2-armv8.pl index 35ec9ae99fe1..4aebd20c498b 100644 --- a/arch/arm64/crypto/sha512-armv8.pl +++ b/arch/arm64/lib/crypto/sha2-armv8.pl @@ -95,7 +95,7 @@ if ($output =~ /512/) { $reg_t="w"; } -$func="sha${BITS}_block_data_order"; +$func="sha${BITS}_blocks_arch"; ($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30)); diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/lib/crypto/sha256-ce.S index fce84d88ddb2..f3e21c6d87d2 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/lib/crypto/sha256-ce.S @@ -71,8 +71,8 @@ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 /* - * int __sha256_ce_transform(struct sha256_ce_state *sst, u8 const *src, - * int blocks) + * size_t __sha256_ce_transform(u32 state[SHA256_STATE_WORDS], + * const u8 *data, size_t nblocks); */ .text SYM_FUNC_START(__sha256_ce_transform) @@ -86,20 +86,16 @@ SYM_FUNC_START(__sha256_ce_transform) /* load state */ ld1 {dgav.4s, dgbv.4s}, [x0] - /* load sha256_ce_state::finalize */ - ldr_l w4, sha256_ce_offsetof_finalize, x4 - ldr w4, [x0, x4] - /* load input */ 0: ld1 {v16.4s-v19.4s}, [x1], #64 - sub w2, w2, #1 + sub x2, x2, #1 CPU_LE( rev32 v16.16b, v16.16b ) CPU_LE( rev32 v17.16b, v17.16b ) CPU_LE( rev32 v18.16b, v18.16b ) CPU_LE( rev32 v19.16b, v19.16b ) -1: add t0.4s, v16.4s, v0.4s + add t0.4s, v16.4s, v0.4s mov dg0v.16b, dgav.16b mov dg1v.16b, dgbv.16b @@ -127,31 +123,14 @@ CPU_LE( rev32 v19.16b, v19.16b ) add dgav.4s, dgav.4s, dg0v.4s add dgbv.4s, dgbv.4s, dg1v.4s - /* handled all input blocks? */ - cbz w2, 2f - cond_yield 3f, x5, x6 - b 0b + /* return early if voluntary preemption is needed */ + cond_yield 1f, x5, x6 - /* - * Final block: add padding and total bit count. - * Skip if the input size was not a round multiple of the block size, - * the padding is handled by the C code in that case. - */ -2: cbz x4, 3f - ldr_l w4, sha256_ce_offsetof_count, x4 - ldr x4, [x0, x4] - movi v17.2d, #0 - mov x8, #0x80000000 - movi v18.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) - fmov d16, x8 - mov x4, #0 - mov v19.d[0], xzr - mov v19.d[1], x7 - b 1b + /* handled all input blocks? */ + cbnz x2, 0b /* store new state */ -3: st1 {dgav.4s, dgbv.4s}, [x0] - mov w0, w2 +1: st1 {dgav.4s, dgbv.4s}, [x0] + mov x0, x2 ret SYM_FUNC_END(__sha256_ce_transform) diff --git a/arch/arm64/lib/crypto/sha256.c b/arch/arm64/lib/crypto/sha256.c new file mode 100644 index 000000000000..bcf7a3adc0c4 --- /dev/null +++ b/arch/arm64/lib/crypto/sha256.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 optimized for ARM64 + * + * Copyright 2025 Google LLC + */ +#include <asm/neon.h> +#include <crypto/internal/sha2.h> +#include <linux/kernel.h> +#include <linux/module.h> + +asmlinkage void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +EXPORT_SYMBOL_GPL(sha256_blocks_arch); +asmlinkage void sha256_block_neon(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +asmlinkage size_t __sha256_ce_transform(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); + +void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon)) { + if (static_branch_likely(&have_ce)) { + do { + size_t rem; + + kernel_neon_begin(); + rem = __sha256_ce_transform(state, + data, nblocks); + kernel_neon_end(); + data += (nblocks - rem) * SHA256_BLOCK_SIZE; + nblocks = rem; + } while (nblocks); + } else { + kernel_neon_begin(); + sha256_block_neon(state, data, nblocks); + kernel_neon_end(); + } + } else { + sha256_blocks_arch(state, data, nblocks); + } +} +EXPORT_SYMBOL_GPL(sha256_blocks_simd); + +bool sha256_is_arch_optimized(void) +{ + /* We always can use at least the ARM64 scalar implementation. */ + return true; +} +EXPORT_SYMBOL_GPL(sha256_is_arch_optimized); + +static int __init sha256_arm64_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + cpu_have_named_feature(ASIMD)) { + static_branch_enable(&have_neon); + if (cpu_have_named_feature(SHA2)) + static_branch_enable(&have_ce); + } + return 0; +} +subsys_initcall(sha256_arm64_mod_init); + +static void __exit sha256_arm64_mod_exit(void) +{ +} +module_exit(sha256_arm64_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-256 optimized for ARM64"); diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 9bef696e2230..4e298baddc2e 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -5,6 +5,7 @@ * * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> */ +#include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/bug.h> #include <linux/printk.h> @@ -1500,43 +1501,41 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } -u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +static u32 __get_barrier_crm_val(enum aarch64_insn_mb_type type) { - u32 opt; - u32 insn; - switch (type) { case AARCH64_INSN_MB_SY: - opt = 0xf; - break; + return 0xf; case AARCH64_INSN_MB_ST: - opt = 0xe; - break; + return 0xe; case AARCH64_INSN_MB_LD: - opt = 0xd; - break; + return 0xd; case AARCH64_INSN_MB_ISH: - opt = 0xb; - break; + return 0xb; case AARCH64_INSN_MB_ISHST: - opt = 0xa; - break; + return 0xa; case AARCH64_INSN_MB_ISHLD: - opt = 0x9; - break; + return 0x9; case AARCH64_INSN_MB_NSH: - opt = 0x7; - break; + return 0x7; case AARCH64_INSN_MB_NSHST: - opt = 0x6; - break; + return 0x6; case AARCH64_INSN_MB_NSHLD: - opt = 0x5; - break; + return 0x5; default: - pr_err("%s: unknown dmb type %d\n", __func__, type); + pr_err("%s: unknown barrier type %d\n", __func__, type); return AARCH64_BREAK_FAULT; } +} + +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +{ + u32 opt; + u32 insn; + + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; insn = aarch64_insn_get_dmb_value(); insn &= ~GENMASK(11, 8); @@ -1545,6 +1544,21 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) return insn; } +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type) +{ + u32 opt, insn; + + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_dsb_base_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} + u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, enum aarch64_insn_system_register sysreg) { diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 70d7c89d3ac9..634d78422adb 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include <linux/arm-smccc.h> #include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> @@ -17,6 +18,7 @@ #include <asm/asm-extable.h> #include <asm/byteorder.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/insn.h> #include <asm/text-patching.h> @@ -939,7 +941,51 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +/* Clobbers BPF registers 1-4, aka x0-x3 */ +static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) +{ + const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */ + u8 k = get_spectre_bhb_loop_value(); + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + cpu_mitigations_off() || __nospectre_bhb || + arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) + return; + + if (capable(CAP_SYS_ADMIN)) + return; + + if (supports_clearbhb(SCOPE_SYSTEM)) { + emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); + return; + } + + if (k) { + emit_a64_mov_i64(r1, k, ctx); + emit(A64_B(1), ctx); + emit(A64_SUBS_I(true, r1, r1, 1), ctx); + emit(A64_B_(A64_COND_NE, -2), ctx); + emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx); + emit(aarch64_insn_get_isb_value(), ctx); + } + + if (is_spectre_bhb_fw_mitigated()) { + emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR, + ARM_SMCCC_ARCH_WORKAROUND_3), ctx); + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + emit(aarch64_insn_get_hvc_value(), ctx); + break; + case SMCCC_CONDUIT_SMC: + emit(aarch64_insn_get_smc_value(), ctx); + break; + default: + pr_err_once("Firmware mitigation enabled with unknown conduit\n"); + } + } +} + +static void build_epilogue(struct jit_ctx *ctx, bool was_classic) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 ptr = bpf2a64[TCCNT_PTR]; @@ -952,10 +998,13 @@ static void build_epilogue(struct jit_ctx *ctx) emit(A64_POP(A64_ZR, ptr, A64_SP), ctx); + if (was_classic) + build_bhb_mitigation(ctx); + /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Set return value */ + /* Move the return value from bpf:r0 (aka x7) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); /* Authenticate lr */ @@ -1898,7 +1947,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1961,7 +2010,7 @@ skip_init_ctx: goto out_free_hdr; } - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); /* Extra pass to validate JITed code. */ diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index f9476848a2ed..bdf044c5d11b 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3536,3 +3536,10 @@ Field 5 F Field 4 P Field 3:0 Align EndSysreg + +Sysreg HPFAR_EL2 3 4 6 0 4 +Field 63 NS +Res0 62:48 +Field 47:4 FIPA +Res0 3:0 +EndSysreg |