diff options
1191 files changed, 13455 insertions, 6501 deletions
@@ -27,6 +27,7 @@ Alan Cox <alan@lxorguk.ukuu.org.uk> Alan Cox <root@hraefn.swansea.linux.org.uk> Aleksandar Markovic <aleksandar.markovic@mips.com> <aleksandar.markovic@imgtec.com> Aleksey Gorelov <aleksey_gorelov@phoenix.com> +Alex Williamson <alex@shazbot.org> <alex.williamson@redhat.com> Alexander Lobakin <alobakin@pm.me> <alobakin@dlink.ru> Alexander Lobakin <alobakin@pm.me> <alobakin@marvell.com> Alexander Lobakin <alobakin@pm.me> <bloodyreaper@yandex.ru> @@ -205,6 +206,7 @@ Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com> David Brownell <david-b@pacbell.net> David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org> David Heidelberg <david@ixit.cz> <d.okias@gmail.com> +David Hildenbrand <david@kernel.org> <david@redhat.com> David Rheinsberg <david@readahead.eu> <dh.herrmann@gmail.com> David Rheinsberg <david@readahead.eu> <dh.herrmann@googlemail.com> David Rheinsberg <david@readahead.eu> <david.rheinsberg@gmail.com> @@ -425,7 +427,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com> Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org> Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org> Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com> -Kirill A. Shutemov <kas@kernel.org> <kirill.shutemov@linux.intel.com> +Kiryl Shutsemau <kas@kernel.org> <kirill.shutemov@linux.intel.com> Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com> Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@linaro.org> Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@somainline.org> @@ -604,7 +606,8 @@ Oleksij Rempel <o.rempel@pengutronix.de> Oleksij Rempel <o.rempel@pengutronix.de> <ore@pengutronix.de> Oliver Hartkopp <socketcan@hartkopp.net> <oliver.hartkopp@volkswagen.de> Oliver Hartkopp <socketcan@hartkopp.net> <oliver@hartkopp.net> -Oliver Upton <oliver.upton@linux.dev> <oupton@google.com> +Oliver Upton <oupton@kernel.org> <oupton@google.com> +Oliver Upton <oupton@kernel.org> <oliver.upton@linux.dev> OndÅ™ej Jirman <megi@xff.cz> <megous@megous.com> Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org> Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com> @@ -643,6 +646,7 @@ Qais Yousef <qyousef@layalina.io> <qais.yousef@arm.com> Quentin Monnet <qmo@kernel.org> <quentin.monnet@netronome.com> Quentin Monnet <qmo@kernel.org> <quentin@isovalent.com> Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com> +Rae Moar <raemoar63@gmail.com> <rmoar@google.com> Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl> Rajeev Nandan <quic_rajeevny@quicinc.com> <rajeevny@codeaurora.org> Rajendra Nayak <quic_rjendra@quicinc.com> <rnayak@codeaurora.org> @@ -2036,6 +2036,10 @@ S: Botanicka' 68a S: 602 00 Brno S: Czech Republic +N: Karsten Keil +E: isdn@linux-pingi.de +D: ISDN subsystem maintainer + N: Jakob Kemi E: jakob.kemi@telia.com D: V4L W9966 Webcam driver diff --git a/Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml b/Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml index 5e3e199fd9a4..96d50d14c071 100644 --- a/Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/ti,twl4030-gpio.yaml# +$id: http://devicetree.org/schemas/gpio/ti,twl4030-gpio.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: TI TWL4030 GPIO controller diff --git a/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml b/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml new file mode 100644 index 000000000000..9460c64071f2 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/apm,xgene-slimpro-i2c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: APM X-Gene SLIMpro Mailbox I2C + +maintainers: + - Khuong Dinh <khuong@os.amperecomputing.com> + +description: + An I2C controller accessed over the "SLIMpro" mailbox. + +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + +properties: + compatible: + const: apm,xgene-slimpro-i2c + + mboxes: + maxItems: 1 + +required: + - compatible + - mboxes + +unevaluatedProperties: false + +examples: + - | + i2c { + compatible = "apm,xgene-slimpro-i2c"; + mboxes = <&mailbox 0>; + }; diff --git a/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt b/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt deleted file mode 100644 index f6b2c20cfbf6..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt +++ /dev/null @@ -1,15 +0,0 @@ -APM X-Gene SLIMpro Mailbox I2C Driver - -An I2C controller accessed over the "SLIMpro" mailbox. - -Required properties : - - - compatible : should be "apm,xgene-slimpro-i2c" - - mboxes : use the label reference for the mailbox as the first parameter. - The second parameter is the channel number. - -Example : - i2cslimpro { - compatible = "apm,xgene-slimpro-i2c"; - mboxes = <&mailbox 0>; - }; diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml index 5caa3779660d..5491d0775ede 100644 --- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml @@ -180,9 +180,9 @@ allOf: then: properties: reg: - minItems: 2 + maxItems: 2 reg-names: - minItems: 2 + maxItems: 2 else: properties: reg: diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml index 6a47e08e0e97..f9cffbb2df07 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml @@ -142,7 +142,9 @@ allOf: required: - orientation-switch then: - $ref: /schemas/usb/usb-switch.yaml# + allOf: + - $ref: /schemas/usb/usb-switch.yaml# + - $ref: /schemas/usb/usb-switch-ports.yaml# unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml index a58370a6a5d3..fba7b2549dde 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml @@ -24,6 +24,10 @@ properties: - enum: - qcom,qcs8300-qmp-ufs-phy - const: qcom,sa8775p-qmp-ufs-phy + - items: + - enum: + - qcom,kaanapali-qmp-ufs-phy + - const: qcom,sm8750-qmp-ufs-phy - enum: - qcom,msm8996-qmp-ufs-phy - qcom,msm8998-qmp-ufs-phy diff --git a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml index e906403208c0..ea1135c91fb7 100644 --- a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml @@ -125,7 +125,9 @@ allOf: contains: const: google,gs101-usb31drd-phy then: - $ref: /schemas/usb/usb-switch.yaml# + allOf: + - $ref: /schemas/usb/usb-switch.yaml# + - $ref: /schemas/usb/usb-switch-ports.yaml# properties: clocks: diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml index e925cd4c3ac8..72483bc3274d 100644 --- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml @@ -197,6 +197,7 @@ allOf: - renesas,rcar-gen2-scif - renesas,rcar-gen3-scif - renesas,rcar-gen4-scif + - renesas,rcar-gen5-scif then: properties: interrupts: diff --git a/Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml b/Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml index 92aa47ec72c7..88eb20bb008f 100644 --- a/Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml +++ b/Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml @@ -79,6 +79,7 @@ properties: - fsl,imx-audio-nau8822 - fsl,imx-audio-sgtl5000 - fsl,imx-audio-si476x + - fsl,imx-audio-tlv320 - fsl,imx-audio-tlv320aic31xx - fsl,imx-audio-tlv320aic32x4 - fsl,imx-audio-wm8524 diff --git a/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml b/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml index 23624f32ac30..769e4cb5b99b 100644 --- a/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml @@ -32,7 +32,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 - maxItems: 2 + maxItems: 4 items: enum: [1, 2, 3, 4] @@ -48,7 +48,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 - maxItems: 2 + maxItems: 5 items: enum: [1, 2, 3, 4, 5] diff --git a/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml b/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml index 8ac91625dce5..b49a920af704 100644 --- a/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml @@ -33,6 +33,7 @@ properties: - qcom,apq8096-sndcard - qcom,glymur-sndcard - qcom,qcm6490-idp-sndcard + - qcom,qcs615-sndcard - qcom,qcs6490-rb3gen2-sndcard - qcom,qcs8275-sndcard - qcom,qcs9075-sndcard diff --git a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml index bd00afa47d62..7f84f506013c 100644 --- a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml +++ b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml @@ -24,10 +24,10 @@ description: | Instruments Smart Amp speaker protection algorithm. The integrated speaker voltage and current sense provides for real time monitoring of loudspeaker behavior. - The TAS5825/TAS5827 is a stereo, digital input Class-D audio - amplifier optimized for efficiently driving high peak power into - small loudspeakers. An integrated on-chip DSP supports Texas - Instruments Smart Amp speaker protection algorithm. + The TAS5802/TAS5815/TAS5825/TAS5827/TAS5828 is a stereo, digital input + Class-D audio amplifier optimized for efficiently driving high peak + power into small loudspeakers. An integrated on-chip DSP supports + Texas Instruments Smart Amp speaker protection algorithm. Specifications about the audio amplifier can be found at: https://www.ti.com/lit/gpn/tas2120 @@ -35,8 +35,10 @@ description: | https://www.ti.com/lit/gpn/tas2563 https://www.ti.com/lit/gpn/tas2572 https://www.ti.com/lit/gpn/tas2781 + https://www.ti.com/lit/gpn/tas5815 https://www.ti.com/lit/gpn/tas5825m https://www.ti.com/lit/gpn/tas5827 + https://www.ti.com/lit/gpn/tas5828m properties: compatible: @@ -65,11 +67,21 @@ properties: Protection and Audio Processing, 16/20/24/32bit stereo I2S or multichannel TDM. + ti,tas5802: 22-W, Inductor-Less, Digital Input, Closed-Loop Class-D + Audio Amplifier with 96-Khz Extended Processing and Low Idle Power + Dissipation. + + ti,tas5815: 30-W, Digital Input, Stereo, Closed-loop Class-D Audio + Amplifier with 96 kHz Enhanced Processing + ti,tas5825: 38-W Stereo, Inductor-Less, Digital Input, Closed-Loop 4.5V to 26.4V Class-D Audio Amplifier with 192-kHz Extended Audio Processing. - ti,tas5827: 47-W Stereo, Digital Input, High Efficiency Closed-Loop Class-D - Amplifier with Class-H Algorithm + ti,tas5827: 47-W Stereo, Digital Input, High Efficiency Closed-Loop + Class-D Amplifier with Class-H Algorithm + + ti,tas5828: 50-W Stereo, Digital Input, High Efficiency Closed-Loop + Class-D Amplifier with Hybrid-Pro Algorithm oneOf: - items: - enum: @@ -80,8 +92,11 @@ properties: - ti,tas2563 - ti,tas2570 - ti,tas2572 + - ti,tas5802 + - ti,tas5815 - ti,tas5825 - ti,tas5827 + - ti,tas5828 - const: ti,tas2781 - enum: - ti,tas2781 @@ -182,7 +197,23 @@ allOf: compatible: contains: enum: + - ti,tas5802 + - ti,tas5815 + then: + properties: + reg: + maxItems: 4 + items: + minimum: 0x54 + maximum: 0x57 + + - if: + properties: + compatible: + contains: + enum: - ti,tas5827 + - ti,tas5828 then: properties: reg: diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.yaml b/Documentation/devicetree/bindings/spi/spi-cadence.yaml index 8de96abe9da1..27414b78d61d 100644 --- a/Documentation/devicetree/bindings/spi/spi-cadence.yaml +++ b/Documentation/devicetree/bindings/spi/spi-cadence.yaml @@ -14,9 +14,14 @@ allOf: properties: compatible: - enum: - - cdns,spi-r1p6 - - xlnx,zynq-spi-r1p6 + oneOf: + - enum: + - xlnx,zynq-spi-r1p6 + - items: + - enum: + - xlnx,zynqmp-spi-r1p6 + - xlnx,versal-net-spi-r1p6 + - const: cdns,spi-r1p6 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 748faf7f7081..ce6762c92fda 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -34,6 +34,7 @@ properties: - rockchip,rk3328-spi - rockchip,rk3368-spi - rockchip,rk3399-spi + - rockchip,rk3506-spi - rockchip,rk3528-spi - rockchip,rk3562-spi - rockchip,rk3568-spi diff --git a/Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml b/Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml index aaa0bbb5bfe1..cea84ab2204f 100644 --- a/Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml +++ b/Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml @@ -15,6 +15,7 @@ select: compatible: contains: enum: + - qcom,kaanapali-ufshc - qcom,sm8650-ufshc - qcom,sm8750-ufshc required: @@ -24,6 +25,7 @@ properties: compatible: items: - enum: + - qcom,kaanapali-ufshc - qcom,sm8650-ufshc - qcom,sm8750-ufshc - const: qcom,ufshc diff --git a/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml b/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml index e3a7df91f7f1..89b1fb90aeeb 100644 --- a/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml +++ b/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml @@ -76,6 +76,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml b/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml index baf130669c38..73e7a60a0060 100644 --- a/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml @@ -89,13 +89,21 @@ required: - reg - "#address-cells" - "#size-cells" - - dma-ranges - ranges - clocks - clock-names - interrupts - power-domains +allOf: + - if: + properties: + compatible: + const: fsl,imx8mp-dwc3 + then: + required: + - dma-ranges + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml b/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml index e588514fab2d..793662f6f3bf 100644 --- a/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml +++ b/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml @@ -52,6 +52,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# - if: required: - mode-switch diff --git a/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml b/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml index d805dde80796..4d2fcaa71870 100644 --- a/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml +++ b/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml @@ -46,6 +46,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml b/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml index 589914d22bf2..25fab5fdc2cd 100644 --- a/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml +++ b/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml @@ -91,6 +91,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/parade,ps8830.yaml b/Documentation/devicetree/bindings/usb/parade,ps8830.yaml index aeb33667818e..eaeab1c01a59 100644 --- a/Documentation/devicetree/bindings/usb/parade,ps8830.yaml +++ b/Documentation/devicetree/bindings/usb/parade,ps8830.yaml @@ -81,6 +81,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml index dfd084ed9024..d49a58d5478f 100644 --- a/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml @@ -68,6 +68,7 @@ properties: - qcom,sm8550-dwc3 - qcom,sm8650-dwc3 - qcom,x1e80100-dwc3 + - qcom,x1e80100-dwc3-mp - const: qcom,snps-dwc3 reg: @@ -460,8 +461,10 @@ allOf: then: properties: interrupts: + minItems: 4 maxItems: 5 interrupt-names: + minItems: 4 items: - const: dwc_usb3 - const: pwr_event diff --git a/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml b/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml index 96346723f3e9..96dcec9b7620 100644 --- a/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml @@ -60,6 +60,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml b/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml index f713cac4a8ac..e1501ea6b50b 100644 --- a/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml +++ b/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml @@ -11,6 +11,7 @@ maintainers: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml b/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml new file mode 100644 index 000000000000..6bf0c97e30ae --- /dev/null +++ b/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/usb-switch-ports.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: USB Orientation and Mode Switches Ports Graph Properties + +maintainers: + - Greg Kroah-Hartman <gregkh@linuxfoundation.org> + +description: + Ports Graph properties for devices handling USB mode and orientation switching. + +properties: + port: + $ref: /schemas/graph.yaml#/$defs/port-base + description: + A port node to link the device to a TypeC controller for the purpose of + handling altmode muxing and orientation switching. + + properties: + endpoint: + $ref: /schemas/graph.yaml#/$defs/endpoint-base + unevaluatedProperties: false + properties: + data-lanes: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 8 + uniqueItems: true + items: + maximum: 8 + + ports: + $ref: /schemas/graph.yaml#/properties/ports + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: + Super Speed (SS) Output endpoint to the Type-C connector + + port@1: + $ref: /schemas/graph.yaml#/$defs/port-base + description: + Super Speed (SS) Input endpoint from the Super-Speed PHY + unevaluatedProperties: false + + properties: + endpoint: + $ref: /schemas/graph.yaml#/$defs/endpoint-base + unevaluatedProperties: false + properties: + data-lanes: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 8 + uniqueItems: true + items: + maximum: 8 + +oneOf: + - required: + - port + - required: + - ports + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/usb/usb-switch.yaml b/Documentation/devicetree/bindings/usb/usb-switch.yaml index 896201912630..f77731493dc4 100644 --- a/Documentation/devicetree/bindings/usb/usb-switch.yaml +++ b/Documentation/devicetree/bindings/usb/usb-switch.yaml @@ -25,56 +25,4 @@ properties: description: Possible handler of SuperSpeed signals retiming type: boolean - port: - $ref: /schemas/graph.yaml#/$defs/port-base - description: - A port node to link the device to a TypeC controller for the purpose of - handling altmode muxing and orientation switching. - - properties: - endpoint: - $ref: /schemas/graph.yaml#/$defs/endpoint-base - unevaluatedProperties: false - properties: - data-lanes: - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 1 - maxItems: 8 - uniqueItems: true - items: - maximum: 8 - - ports: - $ref: /schemas/graph.yaml#/properties/ports - properties: - port@0: - $ref: /schemas/graph.yaml#/properties/port - description: - Super Speed (SS) Output endpoint to the Type-C connector - - port@1: - $ref: /schemas/graph.yaml#/$defs/port-base - description: - Super Speed (SS) Input endpoint from the Super-Speed PHY - unevaluatedProperties: false - - properties: - endpoint: - $ref: /schemas/graph.yaml#/$defs/endpoint-base - unevaluatedProperties: false - properties: - data-lanes: - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 1 - maxItems: 8 - uniqueItems: true - items: - maximum: 8 - -oneOf: - - required: - - port - - required: - - ports - additionalProperties: true diff --git a/Documentation/firmware-guide/acpi/i2c-muxes.rst b/Documentation/firmware-guide/acpi/i2c-muxes.rst index f366539acd79..96ef4012d78f 100644 --- a/Documentation/firmware-guide/acpi/i2c-muxes.rst +++ b/Documentation/firmware-guide/acpi/i2c-muxes.rst @@ -37,8 +37,8 @@ which corresponds to the following ASL (in the scope of \_SB):: Name (_HID, ...) Name (_CRS, ResourceTemplate () { I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED, - AddressingMode7Bit, "\\_SB.SMB1.CH00", 0x00, - ResourceConsumer,,) + AddressingMode7Bit, "\\_SB.SMB1.MUX0.CH00", + 0x00, ResourceConsumer,,) } } } @@ -52,8 +52,8 @@ which corresponds to the following ASL (in the scope of \_SB):: Name (_HID, ...) Name (_CRS, ResourceTemplate () { I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED, - AddressingMode7Bit, "\\_SB.SMB1.CH01", 0x00, - ResourceConsumer,,) + AddressingMode7Bit, "\\_SB.SMB1.MUX0.CH01", + 0x00, ResourceConsumer,,) } } } diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index cafb4ec20447..80728f6f9bc8 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -605,6 +605,8 @@ operations: reply: &pin-attrs attributes: - id + - module-name + - clock-id - board-label - panel-label - package-label diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst index 605e72c6c877..89c79dd6c6f9 100644 --- a/Documentation/networking/ax25.rst +++ b/Documentation/networking/ax25.rst @@ -11,6 +11,7 @@ found on https://linux-ax25.in-berlin.de. There is a mailing list for discussing Linux amateur radio matters called linux-hams@vger.kernel.org. To subscribe to it, send a message to -majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body -of the message, the subject field is ignored. You don't need to be -subscribed to post but of course that means you might miss an answer. +linux-hams+subscribe@vger.kernel.org or use the web interface at +https://vger.kernel.org. The subject and body of the message are +ignored. You don't need to be subscribed to post but of course that +means you might miss an answer. diff --git a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst index 289c146a8291..6877a3260582 100644 --- a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst +++ b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst @@ -137,16 +137,20 @@ d. Checksum offload header v5 Checksum offload header fields are in big endian format. +Packet format:: + Bit 0 - 6 7 8-15 16-31 Function Header Type Next Header Checksum Valid Reserved Header Type is to indicate the type of header, this usually is set to CHECKSUM Header types -= ========================================== + += =============== 0 Reserved 1 Reserved 2 checksum header += =============== Checksum Valid is to indicate whether the header checksum is valid. Value of 1 implies that checksum is calculated on this packet and is valid, value of 0 @@ -183,9 +187,11 @@ rmnet in a single linear skb. rmnet will process the individual packets and either ACK the MAP command or deliver the IP packet to the network stack as needed -MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding.... +Packet format:: + + MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding.... -MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad... + MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad... 3. Userspace configuration ========================== diff --git a/Documentation/networking/net_failover.rst b/Documentation/networking/net_failover.rst index f4e1b4e07adc..2f776e90d318 100644 --- a/Documentation/networking/net_failover.rst +++ b/Documentation/networking/net_failover.rst @@ -96,9 +96,8 @@ needed to these network configuration daemons to make sure that an IP is received only on the 'failover' device. Below is the patch snippet used with 'cloud-ifupdown-helper' script found on -Debian cloud images: +Debian cloud images:: -:: @@ -27,6 +27,8 @@ do_setup() { local working="$cfgdir/.$INTERFACE" local final="$cfgdir/$INTERFACE" @@ -172,9 +171,8 @@ appropriate FDB entry is added. The following script is executed on the destination hypervisor once migration completes, and it reattaches the VF to the VM and brings down the virtio-net -interface. +interface:: -:: # reattach-vf.sh #!/bin/bash diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 59cb9982afe6..2555e75e5cc1 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -19,9 +19,6 @@ Userdata append support by Matthew Wood <thepacketgeek@gmail.com>, Jan 22 2024 Sysdata append support by Breno Leitao <leitao@debian.org>, Jan 15 2025 -Please send bug reports to Matt Mackall <mpm@selenic.com> -Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com> - Introduction: ============= diff --git a/Documentation/rust/coding-guidelines.rst b/Documentation/rust/coding-guidelines.rst index 6ff9e754755d..3198be3a6d63 100644 --- a/Documentation/rust/coding-guidelines.rst +++ b/Documentation/rust/coding-guidelines.rst @@ -38,6 +38,81 @@ Like ``clang-format`` for the rest of the kernel, ``rustfmt`` works on individual files, and does not require a kernel configuration. Sometimes it may even work with broken code. +Imports +~~~~~~~ + +``rustfmt``, by default, formats imports in a way that is prone to conflicts +while merging and rebasing, since in some cases it condenses several items into +the same line. For instance: + +.. code-block:: rust + + // Do not use this style. + use crate::{ + example1, + example2::{example3, example4, example5}, + example6, example7, + example8::example9, + }; + +Instead, the kernel uses a vertical layout that looks like this: + +.. code-block:: rust + + use crate::{ + example1, + example2::{ + example3, + example4, + example5, // + }, + example6, + example7, + example8::example9, // + }; + +That is, each item goes into its own line, and braces are used as soon as there +is more than one item in a list. + +The trailing empty comment allows to preserve this formatting. Not only that, +``rustfmt`` will actually reformat imports vertically when the empty comment is +added. That is, it is possible to easily reformat the original example into the +expected style by running ``rustfmt`` on an input like: + +.. code-block:: rust + + // Do not use this style. + use crate::{ + example1, + example2::{example3, example4, example5, // + }, + example6, example7, + example8::example9, // + }; + +The trailing empty comment works for nested imports, as shown above, as well as +for single item imports -- this can be useful to minimize diffs within patch +series: + +.. code-block:: rust + + use crate::{ + example1, // + }; + +The trailing empty comment works in any of the lines within the braces, but it +is preferred to keep it in the last item, since it is reminiscent of the +trailing comma in other formatters. Sometimes it may be simpler to avoid moving +the comment several times within a patch series due to changes in the list. + +There may be cases where exceptions may need to be made, i.e. none of this is +a hard rule. There is also code that is not migrated to this style yet, but +please do not introduce code in other styles. + +Eventually, the goal is to get ``rustfmt`` to support this formatting style (or +a similar one) automatically in a stable release without requiring the trailing +empty comment. Thus, at some point, the goal is to remove those comments. + Comments -------- diff --git a/Documentation/sound/codecs/cs35l56.rst b/Documentation/sound/codecs/cs35l56.rst index 57d1964453e1..d5363b08f515 100644 --- a/Documentation/sound/codecs/cs35l56.rst +++ b/Documentation/sound/codecs/cs35l56.rst @@ -105,10 +105,10 @@ In this example the SSID is 10280c63. The format of the firmware file names is: -SoundWire (except CS35L56 Rev B0): +SoundWire: cs35lxx-b0-dsp1-misc-SSID[-spkidX]-l?u? -SoundWire CS35L56 Rev B0: +SoundWire CS35L56 Rev B0 firmware released before kernel version 6.16: cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN Non-SoundWire (HDA and I2S): @@ -127,9 +127,8 @@ Where: * spkidX is an optional part, used for laptops that have firmware configurations for different makes and models of internal speakers. -The CS35L56 Rev B0 continues to use the old filename scheme because a -large number of firmware files have already been published with these -names. +Early firmware for CS35L56 Rev B0 used the ALSA prefix (ampN) as the +filename qualifier. Support for the l?u? qualifier was added in kernel 6.16. Sound Open Firmware and ALSA topology files ------------------------------------------- diff --git a/Documentation/userspace-api/netlink/intro-specs.rst b/Documentation/userspace-api/netlink/intro-specs.rst index a4435ae4628d..e5ebc617754a 100644 --- a/Documentation/userspace-api/netlink/intro-specs.rst +++ b/Documentation/userspace-api/netlink/intro-specs.rst @@ -13,10 +13,10 @@ Simple CLI Kernel comes with a simple CLI tool which should be useful when developing Netlink related code. The tool is implemented in Python and can use a YAML specification to issue Netlink requests -to the kernel. Only Generic Netlink is supported. +to the kernel. The tool is located at ``tools/net/ynl/pyynl/cli.py``. It accepts -a handul of arguments, the most important ones are: +a handful of arguments, the most important ones are: - ``--spec`` - point to the spec file - ``--do $name`` / ``--dump $name`` - issue request ``$name`` diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6ae24c5ca559..57061fa29e6a 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1229,6 +1229,9 @@ It is not possible to read back a pending external abort (injected via KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered directly to the virtual CPU). +Calling this ioctl on a vCPU that hasn't been initialized will return +-ENOEXEC. + :: struct kvm_vcpu_events { @@ -1309,6 +1312,8 @@ exceptions by manipulating individual registers using the KVM_SET_ONE_REG API. See KVM_GET_VCPU_EVENTS for the data structure. +Calling this ioctl on a vCPU that hasn't been initialized will return +-ENOEXEC. 4.33 KVM_GET_DEBUGREGS ---------------------- @@ -6432,9 +6437,18 @@ most one mapping per page, i.e. binding multiple memory regions to a single guest_memfd range is not allowed (any number of memory regions can be bound to a single guest_memfd file, but the bound ranges must not overlap). -When the capability KVM_CAP_GUEST_MEMFD_MMAP is supported, the 'flags' field -supports GUEST_MEMFD_FLAG_MMAP. Setting this flag on guest_memfd creation -enables mmap() and faulting of guest_memfd memory to host userspace. +The capability KVM_CAP_GUEST_MEMFD_FLAGS enumerates the `flags` that can be +specified via KVM_CREATE_GUEST_MEMFD. Currently defined flags: + + ============================ ================================================ + GUEST_MEMFD_FLAG_MMAP Enable using mmap() on the guest_memfd file + descriptor. + GUEST_MEMFD_FLAG_INIT_SHARED Make all memory in the file shared during + KVM_CREATE_GUEST_MEMFD (memory files created + without INIT_SHARED will be marked private). + Shared memory can be faulted into host userspace + page tables. Private memory cannot. + ============================ ================================================ When the KVM MMU performs a PFN lookup to service a guest fault and the backing guest_memfd has the GUEST_MEMFD_FLAG_MMAP set, then the fault will always be diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst index ff02102f7141..5395ee66fc32 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst @@ -13,7 +13,8 @@ will act as the VM interrupt controller, requiring emulated user-space devices to inject interrupts to the VGIC instead of directly to CPUs. It is not possible to create both a GICv3 and GICv2 on the same VM. -Creating a guest GICv3 device requires a host GICv3 as well. +Creating a guest GICv3 device requires a host GICv3 host, or a GICv5 host with +support for FEAT_GCIE_LEGACY. Groups: diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..e64b94e6b5a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -915,6 +915,7 @@ F: drivers/staging/media/sunxi/cedrus/ ALPHA PORT M: Richard Henderson <richard.henderson@linaro.org> M: Matt Turner <mattst88@gmail.com> +M: Magnus Lindholm <linmag7@gmail.com> L: linux-alpha@vger.kernel.org S: Odd Fixes F: arch/alpha/ @@ -1997,6 +1998,10 @@ F: include/uapi/linux/if_arcnet.h ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS) M: Arnd Bergmann <arnd@arndb.de> +M: Krzysztof Kozlowski <krzk@kernel.org> +M: Alexandre Belloni <alexandre.belloni@bootlin.com> +M: Linus Walleij <linus.walleij@linaro.org> +R: Drew Fustini <fustini@kernel.org> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: soc@lists.linux.dev S: Maintained @@ -3841,6 +3846,7 @@ F: drivers/hwmon/asus-ec-sensors.c ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary <corentin.chary@gmail.com> M: Luke D. Jones <luke@ljones.dev> +M: Denis Benato <benato.denis96@gmail.com> L: platform-driver-x86@vger.kernel.org S: Maintained W: https://asus-linux.org/ @@ -4393,7 +4399,7 @@ BLOCK LAYER M: Jens Axboe <axboe@kernel.dk> L: linux-block@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git F: Documentation/ABI/stable/sysfs-block F: Documentation/block/ F: block/ @@ -4813,6 +4819,7 @@ F: drivers/net/dsa/b53/* F: drivers/net/dsa/bcm_sf2* F: include/linux/dsa/brcm.h F: include/linux/platform_data/b53.h +F: net/dsa/tag_brcm.c BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE M: Florian Fainelli <florian.fainelli@broadcom.com> @@ -9202,6 +9209,7 @@ R: Yue Hu <zbestahu@gmail.com> R: Jeffle Xu <jefflexu@linux.alibaba.com> R: Sandeep Dhavale <dhavale@google.com> R: Hongbo Li <lihongbo22@huawei.com> +R: Chunhai Guo <guochunhai@vivo.com> L: linux-erofs@lists.ozlabs.org S: Maintained W: https://erofs.docs.kernel.org @@ -11520,7 +11528,7 @@ F: include/linux/platform_data/huawei-gaokun-ec.h HUGETLB SUBSYSTEM M: Muchun Song <muchun.song@linux.dev> M: Oscar Salvador <osalvador@suse.de> -R: David Hildenbrand <david@redhat.com> +R: David Hildenbrand <david@kernel.org> L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -12516,6 +12524,7 @@ F: include/linux/avf/virtchnl.h F: include/linux/net/intel/*/ INTEL ETHERNET PROTOCOL DRIVER FOR RDMA +M: Krzysztof Czurylo <krzysztof.czurylo@intel.com> M: Tatyana Nikolova <tatyana.e.nikolova@intel.com> L: linux-rdma@vger.kernel.org S: Supported @@ -12856,7 +12865,8 @@ F: tools/testing/selftests/sgx/* K: \bSGX_ INTEL SKYLAKE INT3472 ACPI DEVICE DRIVER -M: Daniel Scally <djrscally@gmail.com> +M: Daniel Scally <dan.scally@ideasonboard.com> +M: Sakari Ailus <sakari.ailus@linux.intel.com> S: Maintained F: drivers/platform/x86/intel/int3472/ F: include/linux/platform_data/x86/int3472.h @@ -13111,6 +13121,15 @@ F: include/uapi/linux/io_uring.h F: include/uapi/linux/io_uring/ F: io_uring/ +IO_URING ZCRX +M: Pavel Begunkov <asml.silence@gmail.com> +L: io-uring@vger.kernel.org +L: netdev@vger.kernel.org +T: git https://github.com/isilence/linux.git zcrx/for-next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git +S: Maintained +F: io_uring/zcrx.* + IPMI SUBSYSTEM M: Corey Minyard <corey@minyard.net> L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers) @@ -13246,10 +13265,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git mast F: drivers/infiniband/ulp/isert ISDN/CMTP OVER BLUETOOTH -M: Karsten Keil <isdn@linux-pingi.de> -L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org -S: Odd Fixes +S: Orphan W: http://www.isdn4linux.de F: Documentation/isdn/ F: drivers/isdn/capi/ @@ -13258,10 +13275,8 @@ F: include/uapi/linux/isdn/ F: net/bluetooth/cmtp/ ISDN/mISDN SUBSYSTEM -M: Karsten Keil <isdn@linux-pingi.de> -L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org -S: Maintained +S: Orphan W: http://www.isdn4linux.de F: drivers/isdn/Kconfig F: drivers/isdn/Makefile @@ -13415,9 +13430,12 @@ F: mm/kasan/ F: scripts/Makefile.kasan KCONFIG +M: Nathan Chancellor <nathan@kernel.org> +M: Nicolas Schier <nsc@kernel.org> L: linux-kbuild@vger.kernel.org -S: Orphan +S: Odd Fixes Q: https://patchwork.kernel.org/project/linux-kbuild/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux.git F: Documentation/kbuild/kconfig* F: scripts/Kconfig.include F: scripts/kconfig/ @@ -13602,7 +13620,7 @@ F: fs/smb/server/ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins <brendan.higgins@linux.dev> M: David Gow <davidgow@google.com> -R: Rae Moar <rmoar@google.com> +R: Rae Moar <raemoar63@gmail.com> L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com S: Maintained @@ -13643,7 +13661,7 @@ F: virt/kvm/* KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) M: Marc Zyngier <maz@kernel.org> -M: Oliver Upton <oliver.upton@linux.dev> +M: Oliver Upton <oupton@kernel.org> R: Joey Gouly <joey.gouly@arm.com> R: Suzuki K Poulose <suzuki.poulose@arm.com> R: Zenghui Yu <yuzenghui@huawei.com> @@ -13717,7 +13735,7 @@ KERNEL VIRTUAL MACHINE for s390 (KVM/s390) M: Christian Borntraeger <borntraeger@linux.ibm.com> M: Janosch Frank <frankja@linux.ibm.com> M: Claudio Imbrenda <imbrenda@linux.ibm.com> -R: David Hildenbrand <david@redhat.com> +R: David Hildenbrand <david@kernel.org> L: kvm@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git @@ -14394,6 +14412,7 @@ F: tools/memory-model/ LINUX-NEXT TREE M: Stephen Rothwell <sfr@canb.auug.org.au> +M: Mark Brown <broonie@kernel.org> L: linux-next@vger.kernel.org S: Supported B: mailto:linux-next@vger.kernel.org and the appropriate development tree @@ -16203,7 +16222,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git F: drivers/devfreq/tegra30-devfreq.c MEMORY HOT(UN)PLUG -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> M: Oscar Salvador <osalvador@suse.de> L: linux-mm@kvack.org S: Maintained @@ -16228,7 +16247,7 @@ F: tools/mm/ MEMORY MANAGEMENT - CORE M: Andrew Morton <akpm@linux-foundation.org> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Vlastimil Babka <vbabka@suse.cz> @@ -16284,7 +16303,7 @@ F: mm/execmem.c MEMORY MANAGEMENT - GUP (GET USER PAGES) M: Andrew Morton <akpm@linux-foundation.org> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> R: Jason Gunthorpe <jgg@nvidia.com> R: John Hubbard <jhubbard@nvidia.com> R: Peter Xu <peterx@redhat.com> @@ -16300,7 +16319,7 @@ F: tools/testing/selftests/mm/gup_test.c MEMORY MANAGEMENT - KSM (Kernel Samepage Merging) M: Andrew Morton <akpm@linux-foundation.org> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> R: Xu Xin <xu.xin16@zte.com.cn> R: Chengming Zhou <chengming.zhou@linux.dev> L: linux-mm@kvack.org @@ -16316,7 +16335,7 @@ F: mm/mm_slot.h MEMORY MANAGEMENT - MEMORY POLICY AND MIGRATION M: Andrew Morton <akpm@linux-foundation.org> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> R: Zi Yan <ziy@nvidia.com> R: Matthew Brost <matthew.brost@intel.com> R: Joshua Hahn <joshua.hahnjy@gmail.com> @@ -16356,7 +16375,7 @@ F: mm/workingset.c MEMORY MANAGEMENT - MISC M: Andrew Morton <akpm@linux-foundation.org> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Vlastimil Babka <vbabka@suse.cz> @@ -16444,7 +16463,7 @@ F: mm/shuffle.h MEMORY MANAGEMENT - RECLAIM M: Andrew Morton <akpm@linux-foundation.org> M: Johannes Weiner <hannes@cmpxchg.org> -R: David Hildenbrand <david@redhat.com> +R: David Hildenbrand <david@kernel.org> R: Michal Hocko <mhocko@kernel.org> R: Qi Zheng <zhengqi.arch@bytedance.com> R: Shakeel Butt <shakeel.butt@linux.dev> @@ -16457,7 +16476,7 @@ F: mm/workingset.c MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton <akpm@linux-foundation.org> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Rik van Riel <riel@surriel.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> @@ -16481,12 +16500,12 @@ F: mm/secretmem.c MEMORY MANAGEMENT - SWAP M: Andrew Morton <akpm@linux-foundation.org> +M: Chris Li <chrisl@kernel.org> +M: Kairui Song <kasong@tencent.com> R: Kemeng Shi <shikemeng@huaweicloud.com> -R: Kairui Song <kasong@tencent.com> R: Nhat Pham <nphamcs@gmail.com> R: Baoquan He <bhe@redhat.com> R: Barry Song <baohua@kernel.org> -R: Chris Li <chrisl@kernel.org> L: linux-mm@kvack.org S: Maintained F: Documentation/mm/swap-table.rst @@ -16502,7 +16521,7 @@ F: mm/swapfile.c MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) M: Andrew Morton <akpm@linux-foundation.org> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Zi Yan <ziy@nvidia.com> R: Baolin Wang <baolin.wang@linux.alibaba.com> @@ -16604,7 +16623,7 @@ MEMORY MAPPING - MADVISE (MEMORY ADVICE) M: Andrew Morton <akpm@linux-foundation.org> M: Liam R. Howlett <Liam.Howlett@oracle.com> M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> R: Vlastimil Babka <vbabka@suse.cz> R: Jann Horn <jannh@google.com> L: linux-mm@kvack.org @@ -20146,6 +20165,7 @@ R: Alexander Shishkin <alexander.shishkin@linux.intel.com> R: Jiri Olsa <jolsa@kernel.org> R: Ian Rogers <irogers@google.com> R: Adrian Hunter <adrian.hunter@intel.com> +R: James Clark <james.clark@linaro.org> L: linux-perf-users@vger.kernel.org L: linux-kernel@vger.kernel.org S: Supported @@ -21317,6 +21337,7 @@ F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER M: Loic Poulain <loic.poulain@oss.qualcomm.com> L: wcn36xx@lists.infradead.org +L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx F: drivers/net/wireless/ath/wcn36xx/ @@ -26885,7 +26906,7 @@ S: Maintained F: drivers/vfio/cdx/* VFIO DRIVER -M: Alex Williamson <alex.williamson@redhat.com> +M: Alex Williamson <alex@shazbot.org> L: kvm@vger.kernel.org S: Maintained T: git https://github.com/awilliam/linux-vfio.git @@ -27048,7 +27069,7 @@ T: git git://linuxtv.org/media.git F: drivers/media/test-drivers/vimc/* VIRT LIB -M: Alex Williamson <alex.williamson@redhat.com> +M: Alex Williamson <alex@shazbot.org> M: Paolo Bonzini <pbonzini@redhat.com> L: kvm@vger.kernel.org S: Supported @@ -27069,7 +27090,7 @@ F: net/vmw_vsock/virtio_transport_common.c VIRTIO BALLOON M: "Michael S. Tsirkin" <mst@redhat.com> -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> L: virtualization@lists.linux.dev S: Maintained F: drivers/virtio/virtio_balloon.c @@ -27224,7 +27245,7 @@ F: drivers/iommu/virtio-iommu.c F: include/uapi/linux/virtio_iommu.h VIRTIO MEM DRIVER -M: David Hildenbrand <david@redhat.com> +M: David Hildenbrand <david@kernel.org> L: virtualization@lists.linux.dev S: Maintained W: https://virtio-mem.gitlab.io/ @@ -27830,7 +27851,7 @@ F: arch/x86/kernel/stacktrace.c F: arch/x86/kernel/unwind_*.c X86 TRUST DOMAIN EXTENSIONS (TDX) -M: Kirill A. Shutemov <kas@kernel.org> +M: Kiryl Shutsemau <kas@kernel.org> R: Dave Hansen <dave.hansen@linux.intel.com> R: Rick Edgecombe <rick.p.edgecombe@intel.com> L: x86@kernel.org @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index ebe08b9186ad..61130b88964b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -917,6 +917,13 @@ config ARCH_USES_CFI_TRAPS An architecture should select this option if it requires the .kcfi_traps section for KCFI trap handling. +config ARCH_USES_CFI_GENERIC_LLVM_PASS + bool + help + An architecture should select this option if it uses the generic + KCFIPass in LLVM to expand kCFI bundles instead of architecture-specific + lowering. + config CFI bool "Use Kernel Control Flow Integrity (kCFI)" default CFI_CLANG @@ -965,6 +972,7 @@ config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC def_bool y depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS depends on RUSTC_VERSION >= 107900 + depends on ARM64 || X86_64 # With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373 depends on (RUSTC_LLVM_VERSION >= 190103 && RUSTC_VERSION >= 108200) || \ (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2e3f93b690f4..4fb985b76e97 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -44,6 +44,8 @@ config ARM select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_MEMTEST + # https://github.com/llvm/llvm-project/commit/d130f402642fba3d065aacb506cb061c899558de + select ARCH_USES_CFI_GENERIC_LLVM_PASS if CLANG_VERSION < 220000 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_IPC_PARSE_VERSION diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi index c78ed064d166..1eb6406449d1 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi @@ -77,6 +77,14 @@ /delete-property/ pinctrl-0; }; +&pm { + clocks = <&firmware_clocks 5>, + <&clocks BCM2835_CLOCK_PERI_IMAGE>, + <&clocks BCM2835_CLOCK_H264>, + <&clocks BCM2835_CLOCK_ISP>; + clock-names = "v3d", "peri_image", "h264", "isp"; +}; + &rmem { /* * RPi4's co-processor will copy the board's bootloader configuration diff --git a/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi b/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi index 8b3c21d9f333..fa9d784c88b6 100644 --- a/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi @@ -13,7 +13,16 @@ clock-names = "pixel", "hdmi"; }; +&pm { + clocks = <&firmware_clocks 5>, + <&clocks BCM2835_CLOCK_PERI_IMAGE>, + <&clocks BCM2835_CLOCK_H264>, + <&clocks BCM2835_CLOCK_ISP>; + clock-names = "v3d", "peri_image", "h264", "isp"; +}; + &v3d { + clocks = <&firmware_clocks 5>; power-domains = <&power RPI_POWER_DOMAIN_V3D>; }; diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi index e77a66adc22a..205b87f557d6 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi @@ -326,6 +326,8 @@ <0x7fffe000 0x2000>; interrupt-controller; #address-cells = <0>; + interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | + IRQ_TYPE_LEVEL_HIGH)>; #interrupt-cells = <3>; }; diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 00d97b8a757f..51746005239b 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -26,9 +26,12 @@ void __init apply_alternatives_all(void); bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length); +int apply_alternatives_module(void *start, size_t length); #else -static inline void apply_alternatives_module(void *start, size_t length) { } +static inline int apply_alternatives_module(void *start, size_t length) +{ + return 0; +} #endif void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b37da3ee8529..99a7c0235e6d 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -24,22 +24,48 @@ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it * can reset into an UNKNOWN state and might not read as 1 until it has * been initialized explicitly. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but - * don't advertise it (they predate this relaxation). - * * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H * indicating whether the CPU is running in E2H mode. */ mrs_s x1, SYS_ID_AA64MMFR4_EL1 sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH cmp x1, #0 - b.ge .LnVHE_\@ + b.lt .LnE2H0_\@ + /* + * Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised + * as such via ID_AA64MMFR4_EL1.E2H0: + * + * - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to + * have HCR_EL2.E2H implemented as RAO/WI. + * + * - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest + * reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV + * guests on these hosts can write to HCR_EL2.E2H without + * trapping to the hypervisor, but these writes have no + * functional effect. + * + * Handle both cases by checking for an essential VHE property + * (system register remapping) to decide whether we're + * effectively VHE-only or not. + */ + msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE + isb + mov x1, #1 // Write something to FAR_EL1 + msr far_el1, x1 + isb + mov x1, #2 // Try to overwrite it via FAR_EL2 + msr far_el2, x1 + isb + mrs x1, far_el1 // If we see the latest write in FAR_EL1, + cmp x1, #2 // we can safely assume we are VHE only. + b.ne .LnVHE_\@ // Otherwise, we know that nVHE works. + +.LnE2H0_\@: orr x0, x0, #HCR_E2H -.LnVHE_\@: msr_hcr_el2 x0 isb +.LnVHE_\@: .endm .macro __init_el2_sctlr diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h index a81937fae9f6..21dbc9dda747 100644 --- a/arch/arm64/include/asm/kfence.h +++ b/arch/arm64/include/asm/kfence.h @@ -10,8 +10,6 @@ #include <asm/set_memory.h> -static inline bool arch_kfence_init_pool(void) { return true; } - static inline bool kfence_protect_page(unsigned long addr, bool protect) { set_memory_valid(addr, 1, !protect); @@ -25,6 +23,7 @@ static inline bool arm64_kfence_can_set_direct_map(void) { return !kfence_early_init; } +bool arch_kfence_init_pool(void); #else /* CONFIG_KFENCE */ static inline bool arm64_kfence_can_set_direct_map(void) { return false; } #endif /* CONFIG_KFENCE */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b763293281c8..64302c438355 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -816,6 +816,11 @@ struct kvm_vcpu_arch { u64 hcrx_el2; u64 mdcr_el2; + struct { + u64 r; + u64 w; + } fgt[__NR_FGT_GROUP_IDS__]; + /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -1600,6 +1605,51 @@ static inline bool kvm_arch_has_irq_bypass(void) void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); void check_feature_map(void); +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu); + +static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + case HFGWTR_EL2: + return HFGRTR_GROUP; + case HFGITR_EL2: + return HFGITR_GROUP; + case HDFGRTR_EL2: + case HDFGWTR_EL2: + return HDFGRTR_GROUP; + case HAFGRTR_EL2: + return HAFGRTR_GROUP; + case HFGRTR2_EL2: + case HFGWTR2_EL2: + return HFGRTR2_GROUP; + case HFGITR2_EL2: + return HFGITR2_GROUP; + case HDFGRTR2_EL2: + case HDFGWTR2_EL2: + return HDFGRTR2_GROUP; + default: + BUILD_BUG_ON(1); + } +} +#define vcpu_fgt(vcpu, reg) \ + ({ \ + enum fgt_group_id id = __fgt_reg_to_group_id(reg); \ + u64 *p; \ + switch (reg) { \ + case HFGWTR_EL2: \ + case HDFGWTR_EL2: \ + case HFGWTR2_EL2: \ + case HDFGWTR2_EL2: \ + p = &(vcpu)->arch.fgt[id].w; \ + break; \ + default: \ + p = &(vcpu)->arch.fgt[id].r; \ + break; \ + } \ + \ + p; \ + }) #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 9abcc8ef3087..b57b2bb00967 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -77,7 +77,7 @@ __percpu_##name##_case_##sz(void *ptr, unsigned long val) \ " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \ " cbnz %w[loop], 1b", \ /* LSE atomics */ \ - #op_lse "\t%" #w "[val], %[ptr]\n" \ + #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \ __nops(3)) \ : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \ [ptr] "+Q"(*(u##sz *)ptr) \ @@ -124,9 +124,16 @@ PERCPU_RW_OPS(8) PERCPU_RW_OPS(16) PERCPU_RW_OPS(32) PERCPU_RW_OPS(64) -PERCPU_OP(add, add, stadd) -PERCPU_OP(andnot, bic, stclr) -PERCPU_OP(or, orr, stset) + +/* + * Use value-returning atomics for CPU-local ops as they are more likely + * to execute "near" to the CPU (e.g. in L1$). + * + * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop + */ +PERCPU_OP(add, add, ldadd) +PERCPU_OP(andnot, bic, ldclr) +PERCPU_OP(or, orr, ldset) PERCPU_RET_OP(add, add, ldadd) #undef PERCPU_RW_OPS diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index aa89c2e67ebc..0944e296dd4a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -293,7 +293,8 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) static inline pte_t pte_mkwrite_novma(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + if (pte_sw_dirty(pte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); return pte; } diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index a76f9b387a26..c59f6324f2bb 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -53,7 +53,7 @@ enum { EDYNSCS_INVALID_CFA_OPCODE = 4, }; -int __pi_scs_patch(const u8 eh_frame[], int size); +int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 8fef12626090..900454aaa292 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -117,6 +117,7 @@ void spectre_bhb_patch_wa3(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void spectre_bhb_patch_clearbhb(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_print_disabled_mitigations(void); #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6455db1b54fd..c231d2a3e515 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1220,10 +1220,19 @@ __val; \ }) +/* + * The "Z" constraint combined with the "%x0" template should be enough + * to force XZR generation if (v) is a constant 0 value but LLVM does not + * yet understand that modifier/constraint combo so a conditional is required + * to nudge the compiler into using XZR as a source for a 0 constant value. + */ #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ u32 __maybe_unused __check_r = (u32)(r); \ - asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ + if (__builtin_constant_p(__val) && __val == 0) \ + asm volatile(__msr_s(r, "xzr")); \ + else \ + asm volatile(__msr_s(r, "%x0") : : "r" (__val)); \ } while (0) /* diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7aca29e1d30b..c022c1acb8c7 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -197,8 +197,6 @@ out: */ void __init acpi_boot_table_init(void) { - int ret; - /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or @@ -252,12 +250,8 @@ done: * behaviour, use acpi=nospcr to disable console in ACPI SPCR * table as default serial console. */ - ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, + acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) - pr_info("Use ACPI SPCR as default console: No\n"); - else - pr_info("Use ACPI SPCR as default console: Yes\n"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8ff6610af496..f5ec7e7c1d3f 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __apply_alternatives(const struct alt_region *region, - bool is_module, - unsigned long *cpucap_mask) +static int __apply_alternatives(const struct alt_region *region, + bool is_module, + unsigned long *cpucap_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region, updptr = is_module ? origptr : lm_alias(origptr); nr_inst = alt->orig_len / AARCH64_INSN_SIZE; - if (ALT_HAS_CB(alt)) + if (ALT_HAS_CB(alt)) { alt_cb = ALT_REPL_PTR(alt); - else + if (is_module && !core_kernel_text((unsigned long)alt_cb)) + return -ENOEXEC; + } else { alt_cb = patch_alternative; + } alt_cb(alt, origptr, updptr, nr_inst); @@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region, bitmap_and(applied_alternatives, applied_alternatives, system_cpucaps, ARM64_NCAPS); } + + return 0; } static void __init apply_alternatives_vdso(void) @@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void) } #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length) +int apply_alternatives_module(void *start, size_t length) { struct alt_region region = { .begin = start, @@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length) bitmap_fill(all_capabilities, ARM64_NCAPS); - __apply_alternatives(®ion, true, &all_capabilities[0]); + return __apply_alternatives(®ion, true, &all_capabilities[0]); } #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ed401ff79e3..e25b0f84a22d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -95,6 +95,7 @@ #include <asm/vectors.h> #include <asm/virt.h> +#include <asm/spectre.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; @@ -3875,6 +3876,11 @@ static void __init setup_system_capabilities(void) */ if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); + + /* + * Report Spectre mitigations status. + */ + spectre_print_disabled_mitigations(); } void __init setup_system_features(void) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index f546a914f041..a9c81715ce59 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -697,6 +697,8 @@ static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr) static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) { + bool step_done; + if (!is_ttbr0_addr(regs->pc)) arm64_apply_bp_hardening(); @@ -707,10 +709,10 @@ static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) * If we are stepping a suspended breakpoint there's nothing more to do: * the single-step is complete. */ - if (!try_step_suspended_breakpoints(regs)) { - local_daif_restore(DAIF_PROCCTX); + step_done = try_step_suspended_breakpoints(regs); + local_daif_restore(DAIF_PROCCTX); + if (!step_done) do_el0_softstep(esr, regs); - } arm64_exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index d6d443c4a01a..24adb581af0e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -489,16 +489,29 @@ int module_finalize(const Elf_Ehdr *hdr, int ret; s = find_section(hdr, sechdrs, ".altinstructions"); - if (s) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (s) { + ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (ret < 0) { + pr_err("module %s: error occurred when applying alternatives\n", me->name); + return ret; + } + } if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame"); if (s) { - ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); - if (ret) + /* + * Because we can reject modules that are malformed + * so SCS patching fails, skip dry run and try to patch + * it in place. If patching fails, the module would not + * be loaded anyway. + */ + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true); + if (ret) { pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", me->name, ret); + return -ENOEXEC; + } } } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 43f7a2f39403..32148bf09c1d 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -476,7 +476,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, folio = page_folio(page); if (folio_test_hugetlb(folio)) - WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); + WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) && + !is_huge_zero_folio(folio)); else WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e8ddbde31a83..659297f87cfa 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -104,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) if (enable_scs) { scs_patch(__eh_frame_start + va_offset, - __eh_frame_end - __eh_frame_start); + __eh_frame_end - __eh_frame_start, false); asm("ic ialluis"); dynamic_scs_is_enabled = true; diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index 55d0cd64ef71..bbe7d30ed12b 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -225,7 +225,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, return 0; } -int scs_patch(const u8 eh_frame[], int size) +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run) { int code_alignment_factor = 1; bool fde_use_sdata8 = false; @@ -277,11 +277,13 @@ int scs_patch(const u8 eh_frame[], int size) } } else { ret = scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, true); + fde_use_sdata8, !skip_dry_run); if (ret) return ret; - scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, false); + + if (!skip_dry_run) + scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, false); } p += sizeof(frame->size) + frame->size; diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index 08ef9f80456b..aec3172d4003 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -27,7 +27,7 @@ extern pgd_t init_pg_dir[], init_pg_end[]; void init_feature_override(u64 boot_status, const void *fdt, int chosen); u64 kaslr_early_init(void *fdt, int chosen); void relocate_kernel(u64 offset); -int scs_patch(const u8 eh_frame[], int size); +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, pgprot_t prot, int level, pte_t *tbl, bool may_use_cont, diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 8ab6104a4883..43a0361a8bf0 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -49,7 +49,10 @@ void *alloc_insn_page(void) addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!addr) return NULL; - set_memory_rox((unsigned long)addr, 1); + if (set_memory_rox((unsigned long)addr, 1)) { + execmem_free(addr); + return NULL; + } return addr; } diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index f9a32dfde006..c7d70d04c164 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param); static bool spectre_v2_mitigations_off(void) { - bool ret = __nospectre_v2 || cpu_mitigations_off(); - - if (ret) - pr_info_once("spectre-v2 mitigation disabled by command line option\n"); - - return ret; + return __nospectre_v2 || cpu_mitigations_off(); } static const char *get_bhb_affected_string(enum mitigation_state bhb_state) @@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param); */ static bool spectre_v4_mitigations_off(void) { - bool ret = cpu_mitigations_off() || - __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; - - if (ret) - pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); - - return ret; + return cpu_mitigations_off() || + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; } /* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ @@ -1042,10 +1032,6 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { /* No point mitigating Spectre-BHB alone. */ - } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { - pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); - } else if (cpu_mitigations_off() || __nospectre_bhb) { - pr_info_once("spectre-bhb mitigation disabled by command line option\n"); } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { state = SPECTRE_MITIGATED; set_bit(BHB_HW, &system_bhb_mitigations); @@ -1199,3 +1185,18 @@ void unpriv_ebpf_notify(int new_state) pr_err("WARNING: %s", EBPF_WARN); } #endif + +void spectre_print_disabled_mitigations(void) +{ + /* Keep a single copy of the common message suffix to avoid duplication. */ + const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n"; + + if (spectre_v2_mitigations_off()) + pr_info("spectre-v2 %s", spectre_disabled_suffix); + + if (spectre_v4_mitigations_off()) + pr_info("spectre-v4 %s", spectre_disabled_suffix); + + if (__nospectre_bhb || cpu_mitigations_off()) + pr_info("spectre-bhb %s", spectre_disabled_suffix); +} diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index dbd74e4885e2..3f675875abea 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -66,7 +66,7 @@ static int nr_timers(struct kvm_vcpu *vcpu) u32 timer_get_ctl(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -85,7 +85,7 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) u64 timer_get_cval(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -104,7 +104,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -126,7 +126,7 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -146,16 +146,6 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) } } -static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) -{ - if (!ctxt->offset.vm_offset) { - WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); - return; - } - - WRITE_ONCE(*ctxt->offset.vm_offset, offset); -} - u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); @@ -343,7 +333,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) u64 ns; ctx = container_of(hrt, struct arch_timer_context, hrtimer); - vcpu = ctx->vcpu; + vcpu = timer_context_to_vcpu(ctx); trace_kvm_timer_hrtimer_expire(ctx); @@ -436,8 +426,9 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) * * But hey, it's fast, right? */ - if (is_hyp_ctxt(ctx->vcpu) && - (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); + if (is_hyp_ctxt(vcpu) && + (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) { unsigned long val = timer_get_ctl(ctx); __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); timer_set_ctl(ctx, val); @@ -470,7 +461,7 @@ static void timer_emulate(struct arch_timer_context *ctx) trace_kvm_timer_emulate(ctx, should_fire); if (should_fire != ctx->irq.level) - kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); + kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx); kvm_timer_update_status(ctx, should_fire); @@ -498,7 +489,7 @@ static void set_cntpoff(u64 cntpoff) static void timer_save_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -609,7 +600,7 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) static void timer_restore_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -668,7 +659,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { - struct kvm_vcpu *vcpu = ctx->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); bool phys_active = false; /* @@ -677,7 +668,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) * this point and the register restoration, we'll take the * interrupt anyway. */ - kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); + kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); @@ -1063,7 +1054,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); struct kvm *kvm = vcpu->kvm; - ctxt->vcpu = vcpu; + ctxt->timer_id = timerid; if (timerid == TIMER_VTIMER) ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; @@ -1121,49 +1112,6 @@ void kvm_timer_cpu_down(void) disable_percpu_irq(host_ptimer_irq); } -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - struct arch_timer_context *timer; - - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_TIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_vtimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_TIMER_CVAL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - case KVM_REG_ARM_PTIMER_CTL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_PTIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_ptimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_PTIMER_CVAL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - - default: - return -1; - } - - return 0; -} - static u64 read_timer_ctl(struct arch_timer_context *timer) { /* @@ -1180,31 +1128,6 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) return ctl; } -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_TIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_TIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CVAL); - case KVM_REG_ARM_PTIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_PTIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_PTIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CVAL); - } - return (u64)-1; -} - static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f21d1b7f20f8..870953b4a8a7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -642,6 +642,7 @@ nommu: vcpu->arch.hcr_el2 |= HCR_TWI; vcpu_set_pauth_traps(vcpu); + kvm_vcpu_load_fgt(vcpu); if (is_protected_kvm_enabled()) { kvm_call_hyp_nvhe(__pkvm_vcpu_load, @@ -1794,6 +1795,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (kvm_arm_vcpu_get_events(vcpu, &events)) return -EINVAL; @@ -1805,6 +1809,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (copy_from_user(&events, argp, sizeof(events))) return -EFAULT; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 20bb9af125b1..be26d5aa668c 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -91,7 +91,6 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o case OP_AT_S1E2W: case OP_AT_S1E2A: return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; - break; default: return (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; @@ -1602,13 +1601,17 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) .fn = match_s1_desc, .priv = &dm, }, - .regime = TR_EL10, .as_el0 = false, .pan = false, }; struct s1_walk_result wr = {}; int ret; + if (is_hyp_ctxt(vcpu)) + wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; + else + wi.regime = TR_EL10; + ret = setup_s1_walk(vcpu, &wi, &wr, va); if (ret) return ret; diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index fbd8944a3dea..24bb3f36e9d5 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -5,6 +5,8 @@ */ #include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_nested.h> #include <asm/sysreg.h> /* @@ -1428,3 +1430,91 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r break; } } + +static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + return &hfgrtr_masks; + case HFGWTR_EL2: + return &hfgwtr_masks; + case HFGITR_EL2: + return &hfgitr_masks; + case HDFGRTR_EL2: + return &hdfgrtr_masks; + case HDFGWTR_EL2: + return &hdfgwtr_masks; + case HAFGRTR_EL2: + return &hafgrtr_masks; + case HFGRTR2_EL2: + return &hfgrtr2_masks; + case HFGWTR2_EL2: + return &hfgwtr2_masks; + case HFGITR2_EL2: + return &hfgitr2_masks; + case HDFGRTR2_EL2: + return &hdfgrtr2_masks; + case HDFGWTR2_EL2: + return &hdfgwtr2_masks; + default: + BUILD_BUG_ON(1); + } +} + +static __always_inline void __compute_fgt(struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) +{ + u64 fgu = vcpu->kvm->arch.fgu[__fgt_reg_to_group_id(reg)]; + struct fgt_masks *m = __fgt_reg_to_masks(reg); + u64 clear = 0, set = 0, val = m->nmask; + + set |= fgu & m->mask; + clear |= fgu & m->nmask; + + if (is_nested_ctxt(vcpu)) { + u64 nested = __vcpu_sys_reg(vcpu, reg); + set |= nested & m->mask; + clear |= ~nested & m->nmask; + } + + val |= set; + val &= ~clear; + *vcpu_fgt(vcpu, reg) = val; +} + +static void __compute_hfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HFGWTR_EL2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) + *vcpu_fgt(vcpu, HFGWTR_EL2) |= HFGWTR_EL2_TCR_EL1; +} + +static void __compute_hdfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HDFGWTR_EL2); + + if (is_hyp_ctxt(vcpu)) + *vcpu_fgt(vcpu, HDFGWTR_EL2) |= HDFGWTR_EL2_MDSCR_EL1; +} + +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; + + __compute_fgt(vcpu, HFGRTR_EL2); + __compute_hfgwtr(vcpu); + __compute_fgt(vcpu, HFGITR_EL2); + __compute_fgt(vcpu, HDFGRTR_EL2); + __compute_hdfgwtr(vcpu); + __compute_fgt(vcpu, HAFGRTR_EL2); + + if (!cpus_have_final_cap(ARM64_HAS_FGT2)) + return; + + __compute_fgt(vcpu, HFGRTR2_EL2); + __compute_fgt(vcpu, HFGWTR2_EL2); + __compute_fgt(vcpu, HFGITR2_EL2); + __compute_fgt(vcpu, HDFGRTR2_EL2); + __compute_fgt(vcpu, HDFGWTR2_EL2); +} diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 3515a273eaa2..3ad6b7c6e4ba 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -15,6 +15,12 @@ #include <asm/kvm_arm.h> #include <asm/kvm_emulate.h> +static int cpu_has_spe(u64 dfr0) +{ + return cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && + !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P); +} + /** * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value * @@ -77,13 +83,12 @@ void kvm_init_host_debug_data(void) *host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0); *host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0); + if (cpu_has_spe(dfr0)) + host_data_set_flag(HAS_SPE); + if (has_vhe()) return; - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && - !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P)) - host_data_set_flag(HAS_SPE); - /* Check if we have BRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT)) host_data_set_flag(HAS_BRBE); @@ -102,7 +107,7 @@ void kvm_init_host_debug_data(void) void kvm_debug_init_vhe(void) { /* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */ - if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1))) + if (host_data_test_flag(HAS_SPE)) write_sysreg_el1(0, SYS_PMSCR); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 16ba5e9ac86c..1c87699fd886 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -591,64 +591,6 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -static const u64 timer_reg_list[] = { - KVM_REG_ARM_TIMER_CTL, - KVM_REG_ARM_TIMER_CNT, - KVM_REG_ARM_TIMER_CVAL, - KVM_REG_ARM_PTIMER_CTL, - KVM_REG_ARM_PTIMER_CNT, - KVM_REG_ARM_PTIMER_CVAL, -}; - -#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) - -static bool is_timer_reg(u64 index) -{ - switch (index) { - case KVM_REG_ARM_TIMER_CTL: - case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_TIMER_CVAL: - case KVM_REG_ARM_PTIMER_CTL: - case KVM_REG_ARM_PTIMER_CNT: - case KVM_REG_ARM_PTIMER_CVAL: - return true; - } - return false; -} - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - for (int i = 0; i < NUM_TIMER_REGS; i++) { - if (put_user(timer_reg_list[i], uindices)) - return -EFAULT; - uindices++; - } - - return 0; -} - -static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - int ret; - - ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); - if (ret != 0) - return -EFAULT; - - return kvm_arm_timer_set_reg(vcpu, reg->id, val); -} - -static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - - val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; -} - static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) { const unsigned int slices = vcpu_sve_slices(vcpu); @@ -724,7 +666,6 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) res += num_sve_regs(vcpu); res += kvm_arm_num_sys_reg_descs(vcpu); res += kvm_arm_get_fw_num_regs(vcpu); - res += NUM_TIMER_REGS; return res; } @@ -755,11 +696,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return ret; uindices += kvm_arm_get_fw_num_regs(vcpu); - ret = copy_timer_indices(vcpu, uindices); - if (ret < 0) - return ret; - uindices += NUM_TIMER_REGS; - return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -777,9 +713,6 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return get_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -797,9 +730,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return set_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_set_reg(vcpu, reg); } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index bca8c80e11da..cc7d5d1709cb 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -147,7 +147,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) if (esr & ESR_ELx_WFx_ISS_RV) { u64 val, now; - now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + now = kvm_phys_timer_read(); + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + now -= timer_get_offset(vcpu_hvtimer(vcpu)); + else + now -= timer_get_offset(vcpu_vtimer(vcpu)); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); if (now >= val) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b6682202edf3..c5d5e5b86eaf 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -195,123 +195,6 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) __deactivate_cptr_traps_nvhe(vcpu); } -#define reg_to_fgt_masks(reg) \ - ({ \ - struct fgt_masks *m; \ - switch(reg) { \ - case HFGRTR_EL2: \ - m = &hfgrtr_masks; \ - break; \ - case HFGWTR_EL2: \ - m = &hfgwtr_masks; \ - break; \ - case HFGITR_EL2: \ - m = &hfgitr_masks; \ - break; \ - case HDFGRTR_EL2: \ - m = &hdfgrtr_masks; \ - break; \ - case HDFGWTR_EL2: \ - m = &hdfgwtr_masks; \ - break; \ - case HAFGRTR_EL2: \ - m = &hafgrtr_masks; \ - break; \ - case HFGRTR2_EL2: \ - m = &hfgrtr2_masks; \ - break; \ - case HFGWTR2_EL2: \ - m = &hfgwtr2_masks; \ - break; \ - case HFGITR2_EL2: \ - m = &hfgitr2_masks; \ - break; \ - case HDFGRTR2_EL2: \ - m = &hdfgrtr2_masks; \ - break; \ - case HDFGWTR2_EL2: \ - m = &hdfgwtr2_masks; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - m; \ - }) - -#define compute_clr_set(vcpu, reg, clr, set) \ - do { \ - u64 hfg = __vcpu_sys_reg(vcpu, reg); \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= ~hfg & m->nmask; \ - } while(0) - -#define reg_to_fgt_group_id(reg) \ - ({ \ - enum fgt_group_id id; \ - switch(reg) { \ - case HFGRTR_EL2: \ - case HFGWTR_EL2: \ - id = HFGRTR_GROUP; \ - break; \ - case HFGITR_EL2: \ - id = HFGITR_GROUP; \ - break; \ - case HDFGRTR_EL2: \ - case HDFGWTR_EL2: \ - id = HDFGRTR_GROUP; \ - break; \ - case HAFGRTR_EL2: \ - id = HAFGRTR_GROUP; \ - break; \ - case HFGRTR2_EL2: \ - case HFGWTR2_EL2: \ - id = HFGRTR2_GROUP; \ - break; \ - case HFGITR2_EL2: \ - id = HFGITR2_GROUP; \ - break; \ - case HDFGRTR2_EL2: \ - case HDFGWTR2_EL2: \ - id = HDFGRTR2_GROUP; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - id; \ - }) - -#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \ - do { \ - u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= hfg & m->nmask; \ - } while(0) - -#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \ - do { \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - u64 c = clr, s = set; \ - u64 val; \ - \ - ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ - if (is_nested_ctxt(vcpu)) \ - compute_clr_set(vcpu, reg, c, s); \ - \ - compute_undef_clr_set(vcpu, kvm, reg, c, s); \ - \ - val = m->nmask; \ - val |= s; \ - val &= ~c; \ - write_sysreg_s(val, SYS_ ## reg); \ - } while(0) - -#define update_fgt_traps(hctxt, vcpu, kvm, reg) \ - update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0) - static inline bool cpu_has_amu(void) { u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); @@ -320,33 +203,36 @@ static inline bool cpu_has_amu(void) ID_AA64PFR0_EL1_AMU_SHIFT); } +#define __activate_fgt(hctxt, vcpu, reg) \ + do { \ + ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ + write_sysreg_s(*vcpu_fgt(vcpu, reg), SYS_ ## reg); \ + } while (0) + static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2); - update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0, - cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ? - HFGWTR_EL2_TCR_EL1_MASK : 0); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGITR_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR_EL2); if (cpu_has_amu()) - update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HAFGRTR_EL2); if (!cpus_have_final_cap(ARM64_HAS_FGT2)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGWTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGITR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR2_EL2); } #define __deactivate_fgt(htcxt, vcpu, reg) \ diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 4e16f9b96f63..58b7d0c477d7 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -479,7 +479,7 @@ static void __do_ffa_mem_xfer(const u64 func_id, struct ffa_mem_region_attributes *ep_mem_access; struct ffa_composite_mem_region *reg; struct ffa_mem_region *buf; - u32 offset, nr_ranges; + u32 offset, nr_ranges, checked_offset; int ret = 0; if (addr_mbz || npages_mbz || fraglen > len || @@ -516,7 +516,12 @@ static void __do_ffa_mem_xfer(const u64 func_id, goto out_unlock; } - if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) { + if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + if (fraglen < checked_offset) { ret = FFA_RET_INVALID_PARAMETERS; goto out_unlock; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ddc8beb55eee..49db32f3ddf7 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -367,6 +367,19 @@ static int host_stage2_unmap_dev_all(void) return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); } +/* + * Ensure the PFN range is contained within PA-range. + * + * This check is also robust to overflows and is therefore a requirement before + * using a pfn/nr_pages pair from an untrusted source. + */ +static bool pfn_range_is_valid(u64 pfn, u64 nr_pages) +{ + u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT); + + return pfn < limit && ((limit - pfn) >= nr_pages); +} + struct kvm_mem_range { u64 start; u64 end; @@ -776,6 +789,9 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) void *virt = __hyp_va(phys); int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); hyp_lock_component(); @@ -804,6 +820,9 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) u64 virt = (u64)__hyp_va(phys); int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); hyp_lock_component(); @@ -887,6 +906,9 @@ int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) u64 size = PAGE_SIZE * nr_pages; int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); if (!ret) @@ -902,6 +924,9 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) u64 size = PAGE_SIZE * nr_pages; int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); if (!ret) @@ -945,6 +970,9 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu if (prot & ~KVM_PGTABLE_PROT_RWX) return -EINVAL; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size); if (ret) return ret; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 05774aed09cb..43bde061b65d 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -172,6 +172,7 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) /* Trust the host for non-protected vcpu features. */ vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; + memcpy(vcpu->arch.fgt, host_vcpu->arch.fgt, sizeof(vcpu->arch.fgt)); return 0; } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 7a045cad6bdf..f04cda40545b 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1859,13 +1859,16 @@ void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu) { u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2); + if (is_nested_ctxt(vcpu)) + vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); /* * In yet another example where FEAT_NV2 is fscking broken, accesses * to MDSCR_EL1 are redirected to the VNCR despite having an effect * at EL2. Use a big hammer to apply sanity. + * + * Unless of course we have FEAT_FGT, in which case we can precisely + * trap MDSCR_EL1. */ - if (is_hyp_ctxt(vcpu)) + else if (!cpus_have_final_cap(ARM64_HAS_FGT)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; - else - vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 91053aa832d0..8ae2bca81614 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -203,7 +203,6 @@ static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); - MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); case CNTHCTL_EL2: @@ -1595,14 +1594,47 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } -static bool access_hv_timer(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int arch_timer_set_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) { - if (!vcpu_el2_e2h_is_set(vcpu)) - return undef_access(vcpu, p, r); + switch (reg_to_encoding(rd)) { + case SYS_CNTV_CTL_EL0: + case SYS_CNTP_CTL_EL0: + case SYS_CNTHV_CTL_EL2: + case SYS_CNTHP_CTL_EL2: + val &= ~ARCH_TIMER_CTRL_IT_STAT; + break; + case SYS_CNTVCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read() - val); + return 0; + case SYS_CNTPCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_ptimer(vcpu), kvm_phys_timer_read() - val); + return 0; + } - return access_arch_timer(vcpu, p, r); + __vcpu_assign_sys_reg(vcpu, rd->reg, val); + return 0; +} + +static int arch_timer_get_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 *val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTVCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_vtimer(vcpu)); + break; + case SYS_CNTPCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_ptimer(vcpu)); + break; + default: + *val = __vcpu_sys_reg(vcpu, rd->reg); + } + + return 0; } static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, @@ -2507,15 +2539,20 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, "trap of EL2 register redirected to EL1"); } -#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \ +#define SYS_REG_USER_FILTER(name, acc, rst, v, gu, su, filter) { \ SYS_DESC(SYS_##name), \ .access = acc, \ .reset = rst, \ .reg = name, \ + .get_user = gu, \ + .set_user = su, \ .visibility = filter, \ .val = v, \ } +#define EL2_REG_FILTERED(name, acc, rst, v, filter) \ + SYS_REG_USER_FILTER(name, acc, rst, v, NULL, NULL, filter) + #define EL2_REG(name, acc, rst, v) \ EL2_REG_FILTERED(name, acc, rst, v, el2_visibility) @@ -2526,6 +2563,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, EL2_REG_VNCR_FILT(name, hidden_visibility) #define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) +#define TIMER_REG(name, vis) \ + SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \ + arch_timer_get_user, arch_timer_set_user, vis) + /* * Since reset() callback and field val are not used for idregs, they will be * used for specific purposes for idregs. @@ -2554,19 +2595,23 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, .val = 0, \ } -/* sys_reg_desc initialiser for known cpufeature ID registers */ -#define AA32_ID_SANITISED(name) { \ - ID_DESC(name), \ - .visibility = aa32_id_visibility, \ - .val = 0, \ -} - /* sys_reg_desc initialiser for writable ID registers */ #define ID_WRITABLE(name, mask) { \ ID_DESC(name), \ .val = mask, \ } +/* + * 32bit ID regs are fully writable when the guest is 32bit + * capable. Nothing in the KVM code should rely on 32bit features + * anyway, only 64bit, so let the VMM do its worse. + */ +#define AA32_ID_WRITABLE(name) { \ + ID_DESC(name), \ + .visibility = aa32_id_visibility, \ + .val = GENMASK(31, 0), \ +} + /* sys_reg_desc initialiser for cpufeature ID registers that need filtering */ #define ID_FILTERED(sysreg, name, mask) { \ ID_DESC(sysreg), \ @@ -2705,18 +2750,17 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, if (guest_hyp_sve_traps_enabled(vcpu)) { kvm_inject_nested_sve_trap(vcpu); - return true; + return false; } if (!p->is_write) { - p->regval = vcpu_read_sys_reg(vcpu, ZCR_EL2); + p->regval = __vcpu_sys_reg(vcpu, ZCR_EL2); return true; } vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; vq = min(vq, vcpu_sve_max_vq(vcpu)); - vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2); - + __vcpu_assign_sys_reg(vcpu, ZCR_EL2, vq - 1); return true; } @@ -2833,6 +2877,16 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, s1pie_visibility); } +static unsigned int cnthv_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (vcpu_has_nv(vcpu) && + !vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2_E2H0)) + return 0; + + return REG_HIDDEN; +} + static bool access_mdcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -3078,40 +3132,39 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ - AA32_ID_SANITISED(ID_PFR0_EL1), - AA32_ID_SANITISED(ID_PFR1_EL1), + AA32_ID_WRITABLE(ID_PFR0_EL1), + AA32_ID_WRITABLE(ID_PFR1_EL1), { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, .get_user = get_id_reg, .set_user = set_id_dfr0_el1, .visibility = aa32_id_visibility, .reset = read_sanitised_id_dfr0_el1, - .val = ID_DFR0_EL1_PerfMon_MASK | - ID_DFR0_EL1_CopDbg_MASK, }, + .val = GENMASK(31, 0) }, ID_HIDDEN(ID_AFR0_EL1), - AA32_ID_SANITISED(ID_MMFR0_EL1), - AA32_ID_SANITISED(ID_MMFR1_EL1), - AA32_ID_SANITISED(ID_MMFR2_EL1), - AA32_ID_SANITISED(ID_MMFR3_EL1), + AA32_ID_WRITABLE(ID_MMFR0_EL1), + AA32_ID_WRITABLE(ID_MMFR1_EL1), + AA32_ID_WRITABLE(ID_MMFR2_EL1), + AA32_ID_WRITABLE(ID_MMFR3_EL1), /* CRm=2 */ - AA32_ID_SANITISED(ID_ISAR0_EL1), - AA32_ID_SANITISED(ID_ISAR1_EL1), - AA32_ID_SANITISED(ID_ISAR2_EL1), - AA32_ID_SANITISED(ID_ISAR3_EL1), - AA32_ID_SANITISED(ID_ISAR4_EL1), - AA32_ID_SANITISED(ID_ISAR5_EL1), - AA32_ID_SANITISED(ID_MMFR4_EL1), - AA32_ID_SANITISED(ID_ISAR6_EL1), + AA32_ID_WRITABLE(ID_ISAR0_EL1), + AA32_ID_WRITABLE(ID_ISAR1_EL1), + AA32_ID_WRITABLE(ID_ISAR2_EL1), + AA32_ID_WRITABLE(ID_ISAR3_EL1), + AA32_ID_WRITABLE(ID_ISAR4_EL1), + AA32_ID_WRITABLE(ID_ISAR5_EL1), + AA32_ID_WRITABLE(ID_MMFR4_EL1), + AA32_ID_WRITABLE(ID_ISAR6_EL1), /* CRm=3 */ - AA32_ID_SANITISED(MVFR0_EL1), - AA32_ID_SANITISED(MVFR1_EL1), - AA32_ID_SANITISED(MVFR2_EL1), + AA32_ID_WRITABLE(MVFR0_EL1), + AA32_ID_WRITABLE(MVFR1_EL1), + AA32_ID_WRITABLE(MVFR2_EL1), ID_UNALLOCATED(3,3), - AA32_ID_SANITISED(ID_PFR2_EL1), + AA32_ID_WRITABLE(ID_PFR2_EL1), ID_HIDDEN(ID_DFR1_EL1), - AA32_ID_SANITISED(ID_MMFR5_EL1), + AA32_ID_WRITABLE(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ @@ -3482,17 +3535,19 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), - { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, + { SYS_DESC(SYS_CNTVCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTP_CTL_EL0, NULL), + TIMER_REG(CNTP_CVAL_EL0, NULL), { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTV_CTL_EL0, NULL), + TIMER_REG(CNTV_CVAL_EL0, NULL), /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -3690,12 +3745,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, - EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), - EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), + TIMER_REG(CNTHP_CTL_EL2, el2_visibility), + TIMER_REG(CNTHP_CVAL_EL2, el2_visibility), - { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer }, - EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0), - EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0), + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, + TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility), + TIMER_REG(CNTHV_CVAL_EL2, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, @@ -5233,15 +5288,28 @@ static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) } } +static u64 kvm_one_reg_to_id(const struct kvm_one_reg *reg) +{ + switch(reg->id) { + case KVM_REG_ARM_TIMER_CVAL: + return TO_ARM64_SYS_REG(CNTV_CVAL_EL0); + case KVM_REG_ARM_TIMER_CNT: + return TO_ARM64_SYS_REG(CNTVCT_EL0); + default: + return reg->id; + } +} + int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5274,13 +5342,14 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; if (get_user(val, uaddr)) return -EFAULT; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5340,10 +5409,23 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg) static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) { + u64 idx; + if (!*uind) return true; - if (put_user(sys_reg_to_index(reg), *uind)) + switch (reg_to_encoding(reg)) { + case SYS_CNTV_CVAL_EL0: + idx = KVM_REG_ARM_TIMER_CVAL; + break; + case SYS_CNTVCT_EL0: + idx = KVM_REG_ARM_TIMER_CNT; + break; + default: + idx = sys_reg_to_index(reg); + } + + if (put_user(idx, *uind)) return false; (*uind)++; @@ -5527,11 +5609,13 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) guard(mutex)(&kvm->arch.config_lock); - if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && - irqchip_in_kernel(kvm) && - kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) { - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK; - kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK; + if (!irqchip_in_kernel(kvm)) { + u64 val; + + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val); + val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val); } if (vcpu_has_nv(vcpu)) { diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 317abc490368..b3f904472fac 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -257,4 +257,10 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu); (val); \ }) +#define TO_ARM64_SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 4c1209261b65..bb92853d1fd3 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -64,29 +64,37 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) static int iter_mark_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; - unsigned long intid; int nr_lpis = 0; + xa_lock_irqsave(&dist->lpi_xa, flags); + xa_for_each(&dist->lpi_xa, intid, irq) { if (!vgic_try_get_irq_ref(irq)) continue; - xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + __xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); nr_lpis++; } + xa_unlock_irqrestore(&dist->lpi_xa, flags); + return nr_lpis; } static void iter_unmark_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; - unsigned long intid; xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { - xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + xa_lock_irqsave(&dist->lpi_xa, flags); + __xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + + /* vgic_put_irq() expects to be called outside of the xa_lock */ vgic_put_irq(kvm, irq); } } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 1796b1a22a72..da62edbc1205 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - xa_init(&dist->lpi_xa); + xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); } /* CREATION */ @@ -71,6 +71,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type); int kvm_vgic_create(struct kvm *kvm, u32 type) { struct kvm_vcpu *vcpu; + u64 aa64pfr0, pfr1; unsigned long i; int ret; @@ -161,10 +162,19 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) { kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - else + } else { INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); + pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); + } + + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); if (type == KVM_DEV_TYPE_ARM_VGIC_V3) kvm->arch.vgic.nassgicap = system_supports_direct_sgis(); diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index ce3e3ed3f29f..3f1c4b10fed9 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -78,6 +78,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; + unsigned long flags; int ret; /* In this case there is no put, since we keep the reference. */ @@ -88,7 +89,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (!irq) return ERR_PTR(-ENOMEM); - ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); + ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); if (ret) { kfree(irq); return ERR_PTR(ret); @@ -103,7 +104,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, irq->target_vcpu = vcpu; irq->group = 1; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); /* * There could be a race with another vgic_add_lpi(), so we need to @@ -114,21 +115,18 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, /* Someone was faster with adding this LPI, lets use that. */ kfree(irq); irq = oldirq; - - goto out_unlock; + } else { + ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); } - ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + if (ret) { xa_release(&dist->lpi_xa, intid); kfree(irq); - } - -out_unlock: - xa_unlock(&dist->lpi_xa); - if (ret) return ERR_PTR(ret); + } /* * We "cache" the configuration table entries in our struct vgic_irq's. diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index f1c153106c56..2f75ef14d339 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -297,8 +297,12 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + if (!vgic_is_v3(vcpu->kvm)) + return; + /* Hide GICv3 sysreg if necessary */ - if (!kvm_has_gicv3(vcpu->kvm)) { + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 || + !irqchip_in_kernel(vcpu->kvm)) { vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | ICH_HCR_EL2_TC); return; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 6dd5a10081e2..8d20c53faef0 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -28,7 +28,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * kvm->arch.config_lock (mutex) * its->cmd_lock (mutex) * its->its_lock (mutex) - * vgic_dist->lpi_xa.xa_lock + * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled * vgic_cpu->ap_list_lock must be taken with IRQs disabled * vgic_irq->irq_lock must be taken with IRQs disabled * @@ -141,32 +141,39 @@ static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; - if (irq->intid >= VGIC_MIN_LPI) - might_lock(&dist->lpi_xa.xa_lock); + /* + * Normally the lock is only taken when the refcount drops to 0. + * Acquire/release it early on lockdep kernels to make locking issues + * in rare release paths a bit more obvious. + */ + if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) { + guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock); + } if (!__vgic_put_irq(kvm, irq)) return; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); vgic_release_lpi_locked(dist, irq); - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); } static void vgic_release_deleted_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid; + unsigned long flags, intid; struct vgic_irq *irq; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); xa_for_each(&dist->lpi_xa, intid, irq) { if (irq->pending_release) vgic_release_lpi_locked(dist, irq); } - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index a86c897017df..cd5912ba617b 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -35,7 +35,7 @@ void copy_highpage(struct page *to, struct page *from) from != folio_page(src, 0)) return; - WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst)); + folio_try_hugetlb_mte_tagging(dst); /* * Populate tags for all subpages. @@ -51,8 +51,13 @@ void copy_highpage(struct page *to, struct page *from) } folio_set_hugetlb_mte_tagged(dst); } else if (page_mte_tagged(from)) { - /* It's a new page, shouldn't have been tagged yet */ - WARN_ON_ONCE(!try_page_mte_tagging(to)); + /* + * Most of the time it's a new page that shouldn't have been + * tagged yet. However, folio migration can end up reusing the + * same page without untagging it. Ignore the warning if the + * page is already tagged. + */ + try_page_mte_tagging(to); mte_copy_page_tags(kto, kfrom); set_page_mte_tagged(to); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d816ff44faff..125dfa6c613b 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -969,6 +969,16 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { + /* + * Check if MTE is supported and fall back to clear_highpage(). + * get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and + * post_alloc_hook() will invoke tag_clear_highpage(). + */ + if (!system_supports_mte()) { + clear_highpage(page); + return; + } + /* Newly allocated page, shouldn't have been tagged yet */ WARN_ON_ONCE(!try_page_mte_tagging(page)); mte_zero_clear_page_tags(page_address(page)); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b8d37eb037fc..2ba01dc8ef82 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -708,6 +708,30 @@ out: return ret; } +static inline bool force_pte_mapping(void) +{ + const bool bbml2 = system_capabilities_finalized() ? + system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); + + if (debug_pagealloc_enabled()) + return true; + if (bbml2) + return false; + return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); +} + +static inline bool split_leaf_mapping_possible(void) +{ + /* + * !BBML2_NOABORT systems should never run into scenarios where we would + * have to split. So exit early and let calling code detect it and raise + * a warning. + */ + if (!system_supports_bbml2_noabort()) + return false; + return !force_pte_mapping(); +} + static DEFINE_MUTEX(pgtable_split_lock); int split_kernel_leaf_mapping(unsigned long start, unsigned long end) @@ -715,12 +739,11 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) int ret; /* - * !BBML2_NOABORT systems should not be trying to change permissions on - * anything that is not pte-mapped in the first place. Just return early - * and let the permission change code raise a warning if not already - * pte-mapped. + * Exit early if the region is within a pte-mapped area or if we can't + * split. For the latter case, the permission change code will raise a + * warning if not already pte-mapped. */ - if (!system_supports_bbml2_noabort()) + if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) return 0; /* @@ -758,30 +781,30 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) return ret; } -static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pud_t pud = pudp_get(pudp); int ret = 0; if (pud_leaf(pud)) - ret = split_pud(pudp, pud, GFP_ATOMIC, false); + ret = split_pud(pudp, pud, gfp, false); return ret; } -static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pmd_t pmd = pmdp_get(pmdp); int ret = 0; if (pmd_leaf(pmd)) { if (pmd_cont(pmd)) split_contpmd(pmdp); - ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); + ret = split_pmd(pmdp, pmd, gfp, false); /* * We have split the pmd directly to ptes so there is no need to @@ -793,9 +816,8 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, return ret; } -static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, + unsigned long next, struct mm_walk *walk) { pte_t pte = __ptep_get(ptep); @@ -805,12 +827,24 @@ static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, return 0; } -static const struct mm_walk_ops split_to_ptes_ops __initconst = { +static const struct mm_walk_ops split_to_ptes_ops = { .pud_entry = split_to_ptes_pud_entry, .pmd_entry = split_to_ptes_pmd_entry, .pte_entry = split_to_ptes_pte_entry, }; +static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) +{ + int ret; + + arch_enter_lazy_mmu_mode(); + ret = walk_kernel_page_table_range_lockless(start, end, + &split_to_ptes_ops, NULL, &gfp); + arch_leave_lazy_mmu_mode(); + + return ret; +} + static bool linear_map_requires_bbml2 __initdata; u32 idmap_kpti_bbml2_flag; @@ -847,11 +881,9 @@ static int __init linear_map_split_to_ptes(void *__unused) * PTE. The kernel alias remains static throughout runtime so * can continue to be safely mapped with large mappings. */ - ret = walk_kernel_page_table_range_lockless(lstart, kstart, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC); if (!ret) - ret = walk_kernel_page_table_range_lockless(kend, lend, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(kend, lend, GFP_ATOMIC); if (ret) panic("Failed to split linear map\n"); flush_tlb_kernel_range(lstart, lend); @@ -1002,6 +1034,33 @@ static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); __kfence_pool = phys_to_virt(kfence_pool); } + +bool arch_kfence_init_pool(void) +{ + unsigned long start = (unsigned long)__kfence_pool; + unsigned long end = start + KFENCE_POOL_SIZE; + int ret; + + /* Exit early if we know the linear map is already pte-mapped. */ + if (!split_leaf_mapping_possible()) + return true; + + /* Kfence pool is already pte-mapped for the early init case. */ + if (kfence_early_init) + return true; + + mutex_lock(&pgtable_split_lock); + ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL); + mutex_unlock(&pgtable_split_lock); + + /* + * Since the system supports bbml2_noabort, tlb invalidation is not + * required here; the pgtable mappings have been split to pte but larger + * entries may safely linger in the TLB. + */ + + return !ret; +} #else /* CONFIG_KFENCE */ static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } @@ -1009,16 +1068,6 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { #endif /* CONFIG_KFENCE */ -static inline bool force_pte_mapping(void) -{ - bool bbml2 = system_capabilities_finalized() ? - system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); - - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || - is_realm_world())) || - debug_pagealloc_enabled(); -} - static void __init map_mem(pgd_t *pgdp) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ab83089c3d8f..0c9a50a1e73e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1213,6 +1213,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; + const u8 tmp3 = bpf2a64[TMP_REG_3]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const u8 priv_sp = bpf2a64[PRIVATE_SP]; @@ -1757,8 +1758,8 @@ emit_cond_jmp: case BPF_ST | BPF_PROBE_MEM32 | BPF_W: case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { - emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); - dst = tmp2; + emit(A64_ADD(1, tmp3, dst, arena_vm_base), ctx); + dst = tmp3; } if (dst == fp) { dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP; diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c index 876028b1083f..064b0f0f95ca 100644 --- a/arch/csky/abiv2/cacheflush.c +++ b/arch/csky/abiv2/cacheflush.c @@ -21,7 +21,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, folio = page_folio(pfn_to_page(pfn)); - if (test_and_set_bit(PG_dcache_clean, &folio->flags)) + if (test_and_set_bit(PG_dcache_clean, &folio->flags.f)) return; icache_inv_range(address, address + nr*PAGE_SIZE); diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h index 6513ac5d2578..da51a0f02391 100644 --- a/arch/csky/abiv2/inc/abi/cacheflush.h +++ b/arch/csky/abiv2/inc/abi/cacheflush.h @@ -20,8 +20,8 @@ static inline void flush_dcache_folio(struct folio *folio) { - if (test_bit(PG_dcache_clean, &folio->flags)) - clear_bit(PG_dcache_clean, &folio->flags); + if (test_bit(PG_dcache_clean, &folio->flags.f)) + clear_bit(PG_dcache_clean, &folio->flags.f); } #define flush_dcache_folio flush_dcache_folio diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index dc5bd3f1b8d2..96ca1a688984 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -109,7 +109,7 @@ endif ifdef CONFIG_RUSTC_HAS_ANNOTATE_TABLEJUMP KBUILD_RUSTFLAGS += -Cllvm-args=--loongarch-annotate-tablejump else -KBUILD_RUSTFLAGS += -Zno-jump-tables # keep compatibility with older compilers +KBUILD_RUSTFLAGS += $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) # keep compatibility with older compilers endif ifdef CONFIG_LTO_CLANG # The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index fc83bb32f9f0..bd5f0457ad21 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -67,6 +67,8 @@ #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) #define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW) +#define cpu_has_msgint cpu_opt(LOONGARCH_CPU_MSGINT) #define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) +#define cpu_has_redirectint cpu_opt(LOONGARCH_CPU_REDIRECTINT) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index dfb982fe8701..d4cd4041bee7 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -101,7 +101,9 @@ enum cpu_type_enum { #define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ #define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ #define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ -#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */ +#define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -132,6 +134,8 @@ enum cpu_type_enum { #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) #define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) #define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW) +#define LOONGARCH_CPU_MSGINT BIT_ULL(CPU_FEATURE_MSGINT) #define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) +#define LOONGARCH_CPU_REDIRECTINT BIT_ULL(CPU_FEATURE_REDIRECTINT) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index 13b2462f3d8c..5faa97a87a9e 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -134,13 +134,13 @@ static inline void hw_breakpoint_thread_switch(struct task_struct *next) /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; + return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; } /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; + return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; } #endif /* __KERNEL__ */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index eaff72b38dc8..0130185e0349 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -14,7 +14,7 @@ #include <asm/pgtable-bits.h> #include <asm/string.h> -extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); +extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size); extern void __init early_iounmap(void __iomem *addr, unsigned long size); #define early_memremap early_ioremap @@ -25,6 +25,9 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, pgprot_t prot) { + if (offset > TO_PHYS_MASK) + return NULL; + switch (pgprot_val(prot) & _CACHE_MASK) { case _CACHE_CC: return (void __iomem *)(unsigned long)(CACHE_BASE + offset); diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 09dfd7eb406e..3de03cb864b2 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -128,6 +128,7 @@ #define CPUCFG6_PMNUM GENMASK(7, 4) #define CPUCFG6_PMNUM_SHIFT 4 #define CPUCFG6_PMBITS GENMASK(13, 8) +#define CPUCFG6_PMBITS_SHIFT 8 #define CPUCFG6_UPM BIT(14) #define LOONGARCH_CPUCFG16 0x10 @@ -1137,6 +1138,7 @@ #define IOCSRF_FLATMODE BIT_ULL(10) #define IOCSRF_VM BIT_ULL(11) #define IOCSRF_AVEC BIT_ULL(15) +#define IOCSRF_REDIRECT BIT_ULL(16) #define LOONGARCH_IOCSR_VENDOR 0x10 diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 1c63a9d9a6d3..08dcc698ec18 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -88,7 +88,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; - struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index bd128696e96d..03fb60432fde 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -424,6 +424,9 @@ static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) |= _PAGE_MODIFIED; + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } @@ -547,9 +550,11 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) | - (pgprot_val(newprot) & ~_HPAGE_CHG_MASK); - return pmd; + if (pmd_val(pmd) & _PAGE_DIRTY) + pmd_val(pmd) |= _PAGE_MODIFIED; + + return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_HPAGE_CHG_MASK)); } static inline pmd_t pmd_mkinvalid(pmd_t pmd) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index cbfce2872d71..6f943d1391ff 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -157,6 +157,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_TLB; if (config & CPUCFG1_IOCSR) c->options |= LOONGARCH_CPU_IOCSR; + if (config & CPUCFG1_MSGINT) + c->options |= LOONGARCH_CPU_MSGINT; if (config & CPUCFG1_UAL) { c->options |= LOONGARCH_CPU_UAL; elf_hwcap |= HWCAP_LOONGARCH_UAL; @@ -331,6 +333,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int c->options |= LOONGARCH_CPU_EIODECODE; if (config & IOCSRF_AVEC) c->options |= LOONGARCH_CPU_AVECINT; + if (config & IOCSRF_REDIRECT) + c->options |= LOONGARCH_CPU_REDIRECTINT; if (config & IOCSRF_VM) c->options |= LOONGARCH_CPU_HYPERVISOR; } diff --git a/arch/loongarch/kernel/kexec_efi.c b/arch/loongarch/kernel/kexec_efi.c index 45121b914f8f..5ee78ebb1546 100644 --- a/arch/loongarch/kernel/kexec_efi.c +++ b/arch/loongarch/kernel/kexec_efi.c @@ -42,7 +42,7 @@ static void *efi_kexec_load(struct kimage *image, { int ret; unsigned long text_offset, kernel_segment_number; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; struct kexec_segment *kernel_segment; struct loongarch_image_header *h; diff --git a/arch/loongarch/kernel/kexec_elf.c b/arch/loongarch/kernel/kexec_elf.c index 97b2f049801a..1b6b64744c7f 100644 --- a/arch/loongarch/kernel/kexec_elf.c +++ b/arch/loongarch/kernel/kexec_elf.c @@ -59,7 +59,7 @@ static void *elf_kexec_load(struct kimage *image, int ret; unsigned long text_offset, kernel_segment_number; struct elfhdr ehdr; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; struct kexec_elf_info elf_info; struct kexec_segment *kernel_segment; diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index e4b2bbc47e62..2d64b7c81e5e 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -39,34 +39,12 @@ static unsigned long systable_ptr; static unsigned long start_addr; static unsigned long first_ind_entry; -static void kexec_image_info(const struct kimage *kimage) -{ - unsigned long i; - - pr_debug("kexec kimage info:\n"); - pr_debug("\ttype: %d\n", kimage->type); - pr_debug("\tstart: %lx\n", kimage->start); - pr_debug("\thead: %lx\n", kimage->head); - pr_debug("\tnr_segments: %lu\n", kimage->nr_segments); - - for (i = 0; i < kimage->nr_segments; i++) { - pr_debug("\t segment[%lu]: %016lx - %016lx", i, - kimage->segment[i].mem, - kimage->segment[i].mem + kimage->segment[i].memsz); - pr_debug("\t\t0x%lx bytes, %lu pages\n", - (unsigned long)kimage->segment[i].memsz, - (unsigned long)kimage->segment[i].memsz / PAGE_SIZE); - } -} - int machine_kexec_prepare(struct kimage *kimage) { int i; char *bootloader = "kexec"; void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR; - kexec_image_info(kimage); - kimage->arch.efi_boot = fw_arg0; kimage->arch.systable_ptr = fw_arg2; diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index dda236b51a88..fb57026f5f25 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -143,7 +143,7 @@ int load_other_segments(struct kimage *image, unsigned long initrd_load_addr = 0; unsigned long orig_segments = image->nr_segments; char *modified_cmdline = NULL; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; kbuf.image = image; /* Don't allocate anything below the kernel */ diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c index aed901c57fb4..8ab1ffedc52c 100644 --- a/arch/loongarch/kernel/mem.c +++ b/arch/loongarch/kernel/mem.c @@ -13,7 +13,7 @@ void __init memblock_init(void) { u32 mem_type; - u64 mem_start, mem_end, mem_size; + u64 mem_start, mem_size; efi_memory_desc_t *md; /* Parse memory information */ @@ -21,7 +21,6 @@ void __init memblock_init(void) mem_type = md->type; mem_start = md->phys_addr; mem_size = md->num_pages << EFI_PAGE_SHIFT; - mem_end = mem_start + mem_size; switch (mem_type) { case EFI_LOADER_CODE: @@ -31,8 +30,6 @@ void __init memblock_init(void) case EFI_PERSISTENT_MEMORY: case EFI_CONVENTIONAL_MEMORY: memblock_add(mem_start, mem_size); - if (max_low_pfn < (mem_end >> PAGE_SHIFT)) - max_low_pfn = mem_end >> PAGE_SHIFT; break; case EFI_PAL_CODE: case EFI_UNUSABLE_MEMORY: @@ -49,6 +46,8 @@ void __init memblock_init(void) } } + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); memblock_set_current_limit(PFN_PHYS(max_low_pfn)); /* Reserve the first 2MB */ diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index d6e73e8f9c0b..ab9c660526a3 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -272,7 +272,8 @@ int __init init_numa_memory(void) node_mem_init(node); node_set_online(node); } - max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); setup_nr_node_ids(); loongson_sysconf.nr_nodes = nr_node_ids; @@ -283,26 +284,6 @@ int __init init_numa_memory(void) #endif -void __init paging_init(void) -{ - unsigned int node; - unsigned long zones_size[MAX_NR_ZONES] = {0, }; - - for_each_online_node(node) { - unsigned long start_pfn, end_pfn; - - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - - if (end_pfn > max_low_pfn) - max_low_pfn = end_pfn; - } -#ifdef CONFIG_ZONE_DMA32 - zones_size[ZONE_DMA32] = MAX_DMA32_PFN; -#endif - zones_size[ZONE_NORMAL] = max_low_pfn; - free_area_init(zones_size); -} - int pcibus_to_node(struct pci_bus *bus) { return dev_to_node(&bus->dev); diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 8ad098703488..9d257c8519c9 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -845,13 +845,14 @@ static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config static int __init init_hw_perf_events(void) { - int counters; + int bits, counters; if (!cpu_has_pmp) return -ENODEV; pr_info("Performance counters: "); - counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; + bits = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1; + counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1; loongarch_pmu.num_counters = counters; loongarch_pmu.max_period = (1ULL << 63) - 1; @@ -867,7 +868,7 @@ static int __init init_hw_perf_events(void) on_each_cpu(reset_counters, NULL, 1); pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", - loongarch_pmu.name, counters, 64); + loongarch_pmu.name, counters, bits); perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 69c17d162fff..25a87378e48e 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -294,8 +294,6 @@ static void __init fdt_setup(void) early_init_dt_scan(fdt_pointer, __pa(fdt_pointer)); early_init_fdt_reserve_self(); - - max_low_pfn = PFN_PHYS(memblock_end_of_DRAM()); #endif } @@ -390,7 +388,8 @@ static void __init check_kernel_sections_mem(void) static void __init arch_mem_init(char **cmdline_p) { /* Recalculate max_low_pfn for "mem=xxx" */ - max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); if (usermem) pr_info("User-defined physical RAM map overwrite\n"); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 3d9be6ca7ec5..da5926fead4a 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -1131,8 +1131,8 @@ static void configure_exception_vector(void) tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE; csr_write64(eentry, LOONGARCH_CSR_EENTRY); - csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); - csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + csr_write64(__pa(eentry), LOONGARCH_CSR_MERRENTRY); + csr_write64(__pa(tlbrentry), LOONGARCH_CSR_TLBRENTRY); } void per_cpu_trap_init(int cpu) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index c32333695381..a1cc116b4dac 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -439,7 +439,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: - if (val >= EIOINTC_ROUTE_MAX_VCPUS) + if (val > EIOINTC_ROUTE_MAX_VCPUS) ret = -EINVAL; else s->num_cpu = val; diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 7c8143e79c12..a7fa458e3360 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -857,7 +857,7 @@ retry: if (writeable) { prot_bits = kvm_pte_mkwriteable(prot_bits); - if (write) + if (write || !kvm_slot_dirty_track_enabled(memslot)) prot_bits = kvm_pte_mkdirty(prot_bits); } diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 32dc213374be..29c2aaba63c3 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -4,6 +4,7 @@ */ #include <linux/kvm_host.h> +#include <asm/delay.h> #include <asm/kvm_csr.h> #include <asm/kvm_vcpu.h> @@ -95,6 +96,7 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) * and set CSR TVAL with -1 */ write_gcsr_timertick(0); + __delay(2); /* Wait cycles until timer interrupt injected */ /* * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 30e3b089a596..1245a6b35896 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -132,6 +132,9 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when * exiting the guest, so that the next time trap into the guest. * We don't need to deal with PMU CSRs contexts. + * + * Otherwise set the request bit KVM_REQ_PMU to restore guest PMU + * before entering guest VM */ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); @@ -139,16 +142,12 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); if (!(val & KVM_PMU_EVENT_ENABLED)) vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU; + else + kvm_make_request(KVM_REQ_PMU, vcpu); kvm_restore_host_pmu(vcpu); } -static void kvm_restore_pmu(struct kvm_vcpu *vcpu) -{ - if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU)) - kvm_make_request(KVM_REQ_PMU, vcpu); -} - static void kvm_check_pmu(struct kvm_vcpu *vcpu) { if (kvm_check_request(KVM_REQ_PMU, vcpu)) { @@ -299,7 +298,10 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { - kvm_lose_pmu(vcpu); + if (vcpu->arch.aux_inuse & KVM_LARCH_PMU) { + kvm_lose_pmu(vcpu); + kvm_make_request(KVM_REQ_PMU, vcpu); + } /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); @@ -1604,9 +1606,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_restore_timer(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - /* Restore hardware PMU CSRs */ - kvm_restore_pmu(vcpu); - /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) return 0; diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index c3e4586a7975..6bfd4b8dad1b 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -60,7 +60,6 @@ int __ref page_is_ram(unsigned long pfn) return memblock_is_memory(addr) && !memblock_is_reserved(addr); } -#ifndef CONFIG_NUMA void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -72,7 +71,6 @@ void __init paging_init(void) free_area_init(max_zone_pfns); } -#endif /* !CONFIG_NUMA */ void __ref free_initmem(void) { diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index df949a3d0f34..27c336959fe8 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -6,7 +6,7 @@ #include <asm/io.h> #include <asm-generic/early_ioremap.h> -void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) +void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size) { return ((void __iomem *)TO_CACHE(phys_addr)); } diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index 3a2836e9d856..816570514c37 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -47,7 +47,7 @@ static struct resource standard_io_resources[] = { .name = "keyboard", .start = 0x60, .end = 0x6f, - .flags = IORESOURCE_IO | IORESOURCE_BUSY + .flags = IORESOURCE_IO }, { .name = "dma page reg", @@ -213,7 +213,7 @@ void __init plat_mem_setup(void) /* Request I/O space for devices used on the Malta board. */ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, standard_io_resources+i); + insert_resource(&ioport_resource, standard_io_resources + i); /* * Enable DMA channel 4 (cascade channel) in the PIIX4 south bridge. diff --git a/arch/mips/pci/pci-malta.c b/arch/mips/pci/pci-malta.c index 6aefdf20ca05..2e35aeba45bc 100644 --- a/arch/mips/pci/pci-malta.c +++ b/arch/mips/pci/pci-malta.c @@ -230,8 +230,7 @@ void __init mips_pcibios_init(void) } /* PIIX4 ACPI starts at 0x1000 */ - if (controller->io_resource->start < 0x00001000UL) - controller->io_resource->start = 0x00001000UL; + PCIBIOS_MIN_IO = 0x1000; iomem_resource.end &= 0xfffffffffULL; /* 64 GB */ ioport_resource.end = controller->io_resource->end; diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index f7e0fee5ee55..7ac88ff13d3c 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -35,6 +35,8 @@ #define KERNEL_START (KERNEL_BINARY_TEXT_START) +#define ALIGNMENT_OK(ptr, type) (((ptr) & (sizeof(type) - 1)) == 0) + extern struct unwind_table_entry __start___unwind[]; extern struct unwind_table_entry __stop___unwind[]; @@ -257,12 +259,15 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int if (pc_is_kernel_fn(pc, _switch_to) || pc == (unsigned long)&_switch_to_ret) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; - info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); + if (ALIGNMENT_OK(info->prev_sp, long)) + info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); + else + info->prev_ip = info->prev_sp = 0; return 1; } #ifdef CONFIG_IRQSTACKS - if (pc == (unsigned long)&_call_on_stack) { + if (pc == (unsigned long)&_call_on_stack && ALIGNMENT_OK(info->sp, long)) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; @@ -370,8 +375,10 @@ static void unwind_frame_regs(struct unwind_frame_info *info) info->prev_sp = info->sp - frame_size; if (e->Millicode) info->rp = info->r31; - else if (rpoffset) + else if (rpoffset && ALIGNMENT_OK(info->prev_sp, long)) info->rp = *(unsigned long *)(info->prev_sp - rpoffset); + else + info->rp = 0; info->prev_ip = info->rp; info->rp = 0; } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e24f4d88885a..9537a61ebae0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_DMA_OPS if PPC64 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_GIGANTIC_PAGE if ARCH_SUPPORTS_HUGETLBFS select ARCH_HAS_KCOV select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU select ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5782e743fd27..4ebc333dd786 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1747,6 +1747,9 @@ void __init fadump_setup_param_area(void) { phys_addr_t range_start, range_end; + if (!fw_dump.fadump_enabled) + return; + if (!fw_dump.param_area_supported || fw_dump.dump_active) return; diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1302b5ac5672..89a1b8c21ab4 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -916,8 +916,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, * it fires once. */ if (single_escalation) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[prio]); xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); vcpu->arch.xive_esc_raddr = xd->eoi_page; @@ -1612,7 +1611,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, /* Grab info about irq */ state->pt_number = hw_irq; - state->pt_data = irq_data_get_irq_handler_data(host_data); + state->pt_data = irq_data_get_irq_chip_data(host_data); /* * Configure the IRQ to match the existing configuration of @@ -1787,8 +1786,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) */ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) { - struct irq_data *d = irq_get_irq_data(irq); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(irq); /* * This slightly odd sequence gives the right result @@ -2827,9 +2825,7 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) i0, i1); } if (xc->esc_virq[i]) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); - struct xive_irq_data *xd = - irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[i]); u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); seq_printf(m, " ESC %d %c%c EOI @%llx", diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7b527d18aa5e..4c321a8ea896 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -423,7 +423,6 @@ config PPC_64S_HASH_MMU config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 - select ARCH_HAS_GIGANTIC_PAGE default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index b65256a63e87..9c9650319f3b 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -121,7 +121,7 @@ static int init_vas_instance(struct platform_device *pdev) return -EINVAL; } - xd = irq_get_handler_data(vinst->virq); + xd = irq_get_chip_data(vinst->virq); if (!xd) { pr_err("Inst%d: Invalid virq %d\n", vinst->vas_id, vinst->virq); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 825f9432e03d..a82aaa786e9e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -443,8 +443,7 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev */ static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) { - struct msi_desc *desc = arg->desc; - struct pci_dev *pdev = msi_desc_to_pci_dev(desc); + struct pci_dev *pdev = to_pci_dev(domain->dev); rtas_disable_msi(pdev); } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 625361a15424..8d0123b0ae84 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1580,7 +1580,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) cpu, irq); #endif raw_spin_lock(&desc->lock); - xd = irq_desc_get_handler_data(desc); + xd = irq_desc_get_chip_data(desc); /* * Clear saved_p to indicate that it's no longer pending diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0c6038dc5dfd..fadec20b87a8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,7 +29,7 @@ config RISCV select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX - select ARCH_HAS_ELF_CORE_EFLAGS + select ARCH_HAS_ELF_CORE_EFLAGS if BINFMT_ELF && ELF_CORE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -367,7 +367,7 @@ config RISCV_NONSTANDARD_CACHE_OPS systems to handle cache management. config AS_HAS_INSN - def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) + def_bool $(as-instr,.insn 0x100000f) config AS_HAS_OPTION_ARCH # https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4 diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index ecf2fcce2d92..4c6de57f65ef 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -134,21 +134,6 @@ endif CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) # Default target when executing plain make -boot := arch/riscv/boot -ifeq ($(CONFIG_XIP_KERNEL),y) -KBUILD_IMAGE := $(boot)/xipImage -else -ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy) -KBUILD_IMAGE := $(boot)/loader.bin -else -ifeq ($(CONFIG_EFI_ZBOOT),) -KBUILD_IMAGE := $(boot)/Image.gz -else -KBUILD_IMAGE := $(boot)/vmlinuz.efi -endif -endif -endif - boot := arch/riscv/boot boot-image-y := Image boot-image-$(CONFIG_KERNEL_BZIP2) := Image.bz2 @@ -159,7 +144,7 @@ boot-image-$(CONFIG_KERNEL_LZO) := Image.lzo boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst boot-image-$(CONFIG_KERNEL_XZ) := Image.xz ifdef CONFIG_RISCV_M_MODE -boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin +boot-image-$(CONFIG_SOC_CANAAN_K210) := loader.bin endif boot-image-$(CONFIG_EFI_ZBOOT) := vmlinuz.efi boot-image-$(CONFIG_XIP_KERNEL) := xipImage diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 8bd2a11382a3..e9e8ba83e632 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -12,6 +12,12 @@ #define __ASM_STR(x) #x #endif +#ifdef CONFIG_AS_HAS_INSN +#define ASM_INSN_I(__x) ".insn " __x +#else +#define ASM_INSN_I(__x) ".4byte " __x +#endif + #if __riscv_xlen == 64 #define __REG_SEL(a, b) __ASM_STR(a) #elif __riscv_xlen == 32 @@ -84,15 +90,9 @@ .endm #ifdef CONFIG_SMP -#ifdef CONFIG_32BIT -#define PER_CPU_OFFSET_SHIFT 2 -#else -#define PER_CPU_OFFSET_SHIFT 3 -#endif - .macro asm_per_cpu dst sym tmp lw \tmp, TASK_TI_CPU_NUM(tp) - slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT + slli \tmp, \tmp, RISCV_LGPTR la \dst, __per_cpu_offset add \dst, \dst, \tmp REG_L \tmp, 0(\dst) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index fbd0e4306c93..62837fa981e8 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -31,6 +31,8 @@ struct riscv_isainfo { DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +extern const struct seq_operations cpuinfo_op; + /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 948d2b34e94e..58f8dda73259 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -42,4 +42,11 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair, return pair->value == other_pair->value; } +#ifdef CONFIG_MMU +void riscv_hwprobe_register_async_probe(void); +void riscv_hwprobe_complete_async_probe(void); +#else +static inline void riscv_hwprobe_register_async_probe(void) {} +static inline void riscv_hwprobe_complete_async_probe(void) {} +#endif #endif diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index c9cfcea52cbb..d29da6ccd3dd 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -256,10 +256,10 @@ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \ SIMM12((offset) & 0xfe0), RS1(base)) -#define RISCV_PAUSE ".4byte 0x100000f" -#define ZAWRS_WRS_NTO ".4byte 0x00d00073" -#define ZAWRS_WRS_STO ".4byte 0x01d00073" -#define RISCV_NOP4 ".4byte 0x00000013" +#define RISCV_PAUSE ASM_INSN_I("0x100000f") +#define ZAWRS_WRS_NTO ASM_INSN_I("0x00d00073") +#define ZAWRS_WRS_STO ASM_INSN_I("0x01d00073") +#define RISCV_NOP4 ASM_INSN_I("0x00000013") #define RISCV_INSN_NOP4 _AC(0x00000013, U) diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 7559d728c5ff..78b18e2fd771 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -3,14 +3,18 @@ #ifndef __ASM_KGDB_H_ #define __ASM_KGDB_H_ +#include <linux/build_bug.h> + #ifdef __KERNEL__ #define GDB_SIZEOF_REG sizeof(unsigned long) -#define DBG_MAX_REG_NUM (36) -#define NUMREGBYTES ((DBG_MAX_REG_NUM) * GDB_SIZEOF_REG) +#define DBG_MAX_REG_NUM 36 +#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG) #define CACHE_FLUSH_IS_SAFE 1 #define BUFMAX 2048 +static_assert(BUFMAX > NUMREGBYTES, + "As per KGDB documentation, BUFMAX must be larger than NUMREGBYTES"); #ifdef CONFIG_RISCV_ISA_C #define BREAK_INSTR_SIZE 2 #else @@ -97,6 +101,7 @@ extern unsigned long kgdb_compiled_break; #define DBG_REG_STATUS_OFF 33 #define DBG_REG_BADADDR_OFF 34 #define DBG_REG_CAUSE_OFF 35 +/* NOTE: increase DBG_MAX_REG_NUM if you add more values here. */ extern const char riscv_gdb_stub_feature[64]; diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 1018d2216901..6e789fa58514 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -69,6 +69,8 @@ typedef struct { #define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t)) +#define MAX_POSSIBLE_PHYSMEM_BITS 56 + /* * rv64 PTE format: * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 29e994a9afb6..5a08eb5fe99f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -654,6 +654,8 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } +#define pgprot_dmacoherent pgprot_writecombine + /* * Both Svade and Svadu control the hardware behavior when the PTE A/D bits need to be set. By * default the M-mode firmware enables the hardware updating scheme when only Svadu is present in diff --git a/arch/riscv/include/asm/vdso/arch_data.h b/arch/riscv/include/asm/vdso/arch_data.h index da57a3786f7a..88b37af55175 100644 --- a/arch/riscv/include/asm/vdso/arch_data.h +++ b/arch/riscv/include/asm/vdso/arch_data.h @@ -12,6 +12,12 @@ struct vdso_arch_data { /* Boolean indicating all CPUs have the same static hwprobe values. */ __u8 homogeneous_cpus; + + /* + * A gate to check and see if the hwprobe data is actually ready, as + * probing is deferred to avoid boot slowdowns. + */ + __u8 ready; }; #endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */ diff --git a/arch/riscv/include/asm/vendor_extensions/mips.h b/arch/riscv/include/asm/vendor_extensions/mips.h index ea8ca747d691..ffeb12dc17a3 100644 --- a/arch/riscv/include/asm/vendor_extensions/mips.h +++ b/arch/riscv/include/asm/vendor_extensions/mips.h @@ -30,8 +30,8 @@ extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_mips; * allowing any subsequent instructions to fetch. */ -#define MIPS_PAUSE ".4byte 0x00501013\n\t" -#define MIPS_EHB ".4byte 0x00301013\n\t" -#define MIPS_IHB ".4byte 0x00101013\n\t" +#define MIPS_PAUSE ASM_INSN_I("0x00501013\n\t") +#define MIPS_EHB ASM_INSN_I("0x00301013\n\t") +#define MIPS_IHB ASM_INSN_I("0x00101013\n\t") #endif // _ASM_RISCV_VENDOR_EXTENSIONS_MIPS_H diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index a1e38ecfc8be..3f50d3dd76c6 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -54,6 +54,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) pr_notice("CPU%u: off\n", cpu); + clear_tasks_mm_cpumask(cpu); /* Verify from the firmware if the cpu is really stopped*/ if (cpu_ops->cpu_is_stopped) ret = cpu_ops->cpu_is_stopped(cpu); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index f6b13e9f5e6c..3dbc8cc557dd 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -62,10 +62,8 @@ int __init riscv_early_of_processor_hartid(struct device_node *node, unsigned lo return -ENODEV; } - if (!of_device_is_available(node)) { - pr_info("CPU with hartid=%lu is not available\n", *hart); + if (!of_device_is_available(node)) return -ENODEV; - } if (of_property_read_string(node, "riscv,isa-base", &isa)) goto old_interface; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 67b59699357d..72ca768f4e91 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -932,9 +932,9 @@ static int has_thead_homogeneous_vlenb(void) { int cpu; u32 prev_vlenb = 0; - u32 vlenb; + u32 vlenb = 0; - /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + /* Ignore thead,vlenb property if xtheadvector is not enabled in the kernel */ if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) return 0; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d3d92a4becc7..9b9dec6893b8 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -455,7 +455,7 @@ SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_ecall_s RISCV_PTR do_trap_unknown RISCV_PTR do_trap_ecall_m - /* instruciton page fault */ + /* instruction page fault */ ALT_PAGE_FAULT(RISCV_PTR do_page_fault) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 9f3db3503dab..15fec5d1e6de 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -265,10 +265,10 @@ void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, { if (!strncmp(remcom_in_buffer, gdb_xfer_read_target, sizeof(gdb_xfer_read_target))) - strcpy(remcom_out_buffer, riscv_gdb_stub_target_desc); + strscpy(remcom_out_buffer, riscv_gdb_stub_target_desc, BUFMAX); else if (!strncmp(remcom_in_buffer, gdb_xfer_read_cpuxml, sizeof(gdb_xfer_read_cpuxml))) - strcpy(remcom_out_buffer, riscv_gdb_stub_cpuxml); + strscpy(remcom_out_buffer, riscv_gdb_stub_cpuxml, BUFMAX); } static inline void kgdb_arch_update_addr(struct pt_regs *regs, diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index 75551ac6504c..1675cbad8619 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -119,6 +119,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned int num_plts = 0; unsigned int num_gots = 0; Elf_Rela *scratch = NULL; + Elf_Rela *new_scratch; size_t scratch_size = 0; int i; @@ -168,9 +169,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch); if (scratch_size_needed > scratch_size) { scratch_size = scratch_size_needed; - scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); - if (!scratch) + new_scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); + if (!new_scratch) { + kvfree(scratch); return -ENOMEM; + } + scratch = new_scratch; } for (size_t j = 0; j < num_relas; j++) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index c0738d6c6498..8723390c7cad 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -49,10 +49,15 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) post_kprobe_handler(p, kcb, regs); } -static bool __kprobes arch_check_kprobe(struct kprobe *p) +static bool __kprobes arch_check_kprobe(unsigned long addr) { - unsigned long tmp = (unsigned long)p->addr - p->offset; - unsigned long addr = (unsigned long)p->addr; + unsigned long tmp, offset; + + /* start iterating at the closest preceding symbol */ + if (!kallsyms_lookup_size_offset(addr, NULL, &offset)) + return false; + + tmp = addr - offset; while (tmp <= addr) { if (tmp == addr) @@ -71,7 +76,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if ((unsigned long)insn & 0x1) return -EILSEQ; - if (!arch_check_kprobe(p)) + if (!arch_check_kprobe((unsigned long)p->addr)) return -EILSEQ; /* copy instruction */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 14235e58c539..b5bc5fc65cea 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -331,11 +331,14 @@ void __init setup_arch(char **cmdline_p) /* Parse the ACPI tables for possible boot-time configuration */ acpi_boot_table_init(); + if (acpi_disabled) { #if IS_ENABLED(CONFIG_BUILTIN_DTB) - unflatten_and_copy_device_tree(); + unflatten_and_copy_device_tree(); #else - unflatten_device_tree(); + unflatten_device_tree(); #endif + } + misc_mem_init(); init_resources(); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index e650dec44817..5ed5095320e6 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -40,6 +40,17 @@ enum ipi_message_type { IPI_MAX }; +static const char * const ipi_names[] = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", +}; + unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; @@ -199,7 +210,7 @@ void riscv_ipi_set_virq_range(int virq, int nr) /* Request IPIs */ for (i = 0; i < nr_ipi; i++) { err = request_percpu_irq(ipi_virq_base + i, handle_IPI, - "IPI", &ipi_dummy_dev); + ipi_names[i], &ipi_dummy_dev); WARN_ON(err); ipi_desc[i] = irq_to_desc(ipi_virq_base + i); @@ -210,17 +221,6 @@ void riscv_ipi_set_virq_range(int virq, int nr) riscv_ipi_enable(); } -static const char * const ipi_names[] = { - [IPI_RESCHEDULE] = "Rescheduling interrupts", - [IPI_CALL_FUNC] = "Function call interrupts", - [IPI_CPU_STOP] = "CPU stop interrupts", - [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", - [IPI_IRQ_WORK] = "IRQ work interrupts", - [IPI_TIMER] = "Timer broadcast interrupts", - [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", - [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", -}; - void show_ipi_stats(struct seq_file *p, int prec) { unsigned int cpu, i; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 3fe9e6edef8f..b41b6255751c 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,6 +16,22 @@ #ifdef CONFIG_FRAME_POINTER +/* + * This disables KASAN checking when reading a value from another task's stack, + * since the other task could be running on another CPU and could have poisoned + * the stack in the meantime. + */ +#define READ_ONCE_TASK_STACK(task, x) \ +({ \ + unsigned long val; \ + unsigned long addr = x; \ + if ((task) == current) \ + val = READ_ONCE(addr); \ + else \ + val = READ_ONCE_NOCHECK(addr); \ + val; \ +}) + extern asmlinkage void handle_exception(void); extern unsigned long ret_from_exception_end; @@ -69,8 +85,9 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->ra; pc = regs->ra; } else { - fp = frame->fp; - pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, + fp = READ_ONCE_TASK_STACK(task, frame->fp); + pc = READ_ONCE_TASK_STACK(task, frame->ra); + pc = ftrace_graph_ret_addr(current, &graph_idx, pc, &frame->ra); if (pc >= (unsigned long)handle_exception && pc < (unsigned long)&ret_from_exception_end) { diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 000f4451a9d8..199d13f86f31 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -5,6 +5,9 @@ * more details. */ #include <linux/syscalls.h> +#include <linux/completion.h> +#include <linux/atomic.h> +#include <linux/once.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwprobe.h> @@ -28,6 +31,11 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, bool first = true; int cpu; + if (pair->key != RISCV_HWPROBE_KEY_MVENDORID && + pair->key != RISCV_HWPROBE_KEY_MIMPID && + pair->key != RISCV_HWPROBE_KEY_MARCHID) + goto out; + for_each_cpu(cpu, cpus) { u64 cpu_id; @@ -58,6 +66,7 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, } } +out: pair->value = id; } @@ -454,28 +463,32 @@ static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs, return 0; } -static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, - size_t pair_count, size_t cpusetsize, - unsigned long __user *cpus_user, - unsigned int flags) -{ - if (flags & RISCV_HWPROBE_WHICH_CPUS) - return hwprobe_get_cpus(pairs, pair_count, cpusetsize, - cpus_user, flags); +#ifdef CONFIG_MMU - return hwprobe_get_values(pairs, pair_count, cpusetsize, - cpus_user, flags); +static DECLARE_COMPLETION(boot_probes_done); +static atomic_t pending_boot_probes = ATOMIC_INIT(1); + +void riscv_hwprobe_register_async_probe(void) +{ + atomic_inc(&pending_boot_probes); } -#ifdef CONFIG_MMU +void riscv_hwprobe_complete_async_probe(void) +{ + if (atomic_dec_and_test(&pending_boot_probes)) + complete(&boot_probes_done); +} -static int __init init_hwprobe_vdso_data(void) +static int complete_hwprobe_vdso_data(void) { struct vdso_arch_data *avd = vdso_k_arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; + if (unlikely(!atomic_dec_and_test(&pending_boot_probes))) + wait_for_completion(&boot_probes_done); + /* * Initialize vDSO data with the answers for the "all CPUs" case, to * save a syscall in the common case. @@ -503,13 +516,52 @@ static int __init init_hwprobe_vdso_data(void) * vDSO should defer to the kernel for exotic cpu masks. */ avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; + + /* + * Make sure all the VDSO values are visible before we look at them. + * This pairs with the implicit "no speculativly visible accesses" + * barrier in the VDSO hwprobe code. + */ + smp_wmb(); + avd->ready = true; + return 0; +} + +static int __init init_hwprobe_vdso_data(void) +{ + struct vdso_arch_data *avd = vdso_k_arch_data; + + /* + * Prevent the vDSO cached values from being used, as they're not ready + * yet. + */ + avd->ready = false; return 0; } arch_initcall_sync(init_hwprobe_vdso_data); +#else + +static int complete_hwprobe_vdso_data(void) { return 0; } + #endif /* CONFIG_MMU */ +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpusetsize, + unsigned long __user *cpus_user, + unsigned int flags) +{ + DO_ONCE_SLEEPABLE(complete_hwprobe_vdso_data); + + if (flags & RISCV_HWPROBE_WHICH_CPUS) + return hwprobe_get_cpus(pairs, pair_count, cpusetsize, + cpus_user, flags); + + return hwprobe_get_values(pairs, pair_count, cpusetsize, + cpus_user, flags); +} + SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, size_t, pair_count, size_t, cpusetsize, unsigned long __user *, cpus, unsigned int, flags) diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug index 5db4df44279e..40f8dafffa0a 100644 --- a/arch/riscv/kernel/tests/Kconfig.debug +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -31,7 +31,7 @@ config RISCV_MODULE_LINKING_KUNIT If unsure, say N. config RISCV_KPROBES_KUNIT - bool "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS + tristate "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS depends on KUNIT depends on KPROBES default KUNIT_ALL_TESTS diff --git a/arch/riscv/kernel/tests/kprobes/Makefile b/arch/riscv/kernel/tests/kprobes/Makefile index 4cb6c66a98e8..df7256f62313 100644 --- a/arch/riscv/kernel/tests/kprobes/Makefile +++ b/arch/riscv/kernel/tests/kprobes/Makefile @@ -1 +1,3 @@ -obj-y += test-kprobes.o test-kprobes-asm.o +obj-$(CONFIG_RISCV_KPROBES_KUNIT) += kprobes_riscv_kunit.o + +kprobes_riscv_kunit-objs := test-kprobes.o test-kprobes-asm.o diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.c b/arch/riscv/kernel/tests/kprobes/test-kprobes.c index 6f6cdfbf5a95..664535ca0a98 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.c +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.c @@ -49,8 +49,11 @@ static struct kunit_case kprobes_testcases[] = { }; static struct kunit_suite kprobes_test_suite = { - .name = "kprobes_test_riscv", + .name = "kprobes_riscv", .test_cases = kprobes_testcases, }; kunit_test_suites(&kprobes_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit test for riscv kprobes"); diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.h b/arch/riscv/kernel/tests/kprobes/test-kprobes.h index 3886ab491ecb..537f44aa9d3f 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.h +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.h @@ -11,7 +11,7 @@ #define KPROBE_TEST_MAGIC_LOWER 0x0000babe #define KPROBE_TEST_MAGIC_UPPER 0xcafe0000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* array of addresses to install kprobes */ extern void *test_kprobes_addresses[]; @@ -19,6 +19,6 @@ extern void *test_kprobes_addresses[]; /* array of functions that return KPROBE_TEST_MAGIC */ extern long (*test_kprobes_functions[])(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* TEST_KPROBES_H */ diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index ae2068425fbc..70b5e6927620 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -379,6 +379,7 @@ free: static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { schedule_on_each_cpu(check_vector_unaligned_access); + riscv_hwprobe_complete_async_probe(); return 0; } @@ -473,8 +474,12 @@ static int __init check_unaligned_access_all_cpus(void) per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; } else if (!check_vector_unaligned_access_emulated_all_cpus() && IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { - kthread_run(vec_check_unaligned_access_speed_all_cpus, - NULL, "vec_check_unaligned_access_speed_all_cpus"); + riscv_hwprobe_register_async_probe(); + if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"))) { + pr_warn("Failed to create vec_unalign_check kthread\n"); + riscv_hwprobe_complete_async_probe(); + } } /* diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index 2ddeba6c68dd..8f45500d0a6e 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -27,7 +27,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, * homogeneous, then this function can handle requests for arbitrary * masks. */ - if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus)) + if (flags != 0 || (!all_cpus && !avd->homogeneous_cpus) || unlikely(!avd->ready)) return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags); /* This is something we can handle, fill out the pairs. */ diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index fda0346f0ea1..11422cb95a64 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -689,8 +689,20 @@ bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) */ read_lock_irqsave(&imsic->vsfile_lock, flags); - if (imsic->vsfile_cpu > -1) - ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + if (imsic->vsfile_cpu > -1) { + /* + * This function is typically called from kvm_vcpu_block() via + * kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block() + * can be preempted and the blocking VCPU might resume on a + * different CPU. This means it is possible that current CPU + * does not match the imsic->vsfile_cpu hence this function + * must check imsic->vsfile_cpu before accessing HGEIP CSR. + */ + if (imsic->vsfile_cpu != vcpu->cpu) + ret = true; + else + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + } read_unlock_irqrestore(&imsic->vsfile_lock, flags); return ret; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 525fb5a330c0..58f5f3536ffd 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -171,7 +171,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change) { hva_t hva, reg_end, size; - gpa_t base_gpa; bool writable; int ret = 0; @@ -190,15 +189,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = new->userspace_addr; size = new->npages << PAGE_SHIFT; reg_end = hva + size; - base_gpa = new->base_gfn << PAGE_SHIFT; writable = !(new->flags & KVM_MEM_READONLY); mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and - * any holes between them, so iterate over all of them to find - * out if we can map any of them right now. + * any holes between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -209,7 +206,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, */ do { struct vm_area_struct *vma; - hva_t vm_start, vm_end; + hva_t vm_end; vma = find_vma_intersection(current->mm, hva, reg_end); if (!vma) @@ -225,36 +222,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } /* Take the intersection of this VMA with the memory region */ - vm_start = max(hva, vma->vm_start); vm_end = min(reg_end, vma->vm_end); if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = base_gpa + (vm_start - hva); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; goto out; } - - ret = kvm_riscv_mmu_ioremap(kvm, gpa, pa, vm_end - vm_start, - writable, false); - if (ret) - break; } hva = vm_end; } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - if (ret) - kvm_riscv_mmu_iounmap(kvm, base_gpa, size); - out: mmap_read_unlock(current->mm); return ret; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index bccb919ca615..5ce35aba6069 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -212,7 +212,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && + return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) && !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 3b51690cc876..34299c2b231f 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -21,7 +21,7 @@ #define pt_dump_seq_puts(m, fmt) \ ({ \ if (m) \ - seq_printf(m, fmt); \ + seq_puts(m, fmt); \ }) /* diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c4145672ca34..df22b10d9141 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -158,7 +158,6 @@ config S390 select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_KERNEL_PMD_MKWRITE select ARCH_WANT_LD_ORPHAN_WARN - select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_THP_SWAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b31c1df90257..8433f769f7e1 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -101,6 +101,7 @@ CONFIG_SLUB_STATS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y +CONFIG_PERSISTENT_HUGE_ZERO_FOLIO=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y @@ -123,12 +124,12 @@ CONFIG_TLS_DEVICE=y CONFIG_TLS_TOE=y CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_XDP_SOCKETS=y -CONFIG_XDP_SOCKETS_DIAG=m -CONFIG_DIBS=y -CONFIG_DIBS_LO=y CONFIG_SMC=m CONFIG_SMC_DIAG=m +CONFIG_DIBS=y +CONFIG_DIBS_LO=y +CONFIG_XDP_SOCKETS=y +CONFIG_XDP_SOCKETS_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -472,6 +473,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_LLBITMAP=y # CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m @@ -654,9 +656,12 @@ CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_XFS_FS=y +CONFIG_XFS_SUPPORT_V4=y +CONFIG_XFS_SUPPORT_ASCII_CI=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y +# CONFIG_XFS_ONLINE_SCRUB is not set CONFIG_XFS_DEBUG=y CONFIG_GFS2_FS=m CONFIG_GFS2_FS_LOCKING_DLM=y @@ -666,7 +671,6 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_BTRFS_DEBUG=y CONFIG_BTRFS_ASSERT=y CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 161dad7ef211..4414dabd04a6 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -94,6 +94,7 @@ CONFIG_SLAB_BUCKETS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y +CONFIG_PERSISTENT_HUGE_ZERO_FOLIO=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 @@ -114,12 +115,12 @@ CONFIG_TLS_DEVICE=y CONFIG_TLS_TOE=y CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_XDP_SOCKETS=y -CONFIG_XDP_SOCKETS_DIAG=m -CONFIG_DIBS=y -CONFIG_DIBS_LO=y CONFIG_SMC=m CONFIG_SMC_DIAG=m +CONFIG_DIBS=y +CONFIG_DIBS_LO=y +CONFIG_XDP_SOCKETS=y +CONFIG_XDP_SOCKETS_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -462,6 +463,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_LLBITMAP=y # CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m @@ -644,16 +646,18 @@ CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_XFS_FS=y +CONFIG_XFS_SUPPORT_V4=y +CONFIG_XFS_SUPPORT_ASCII_CI=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y +# CONFIG_XFS_ONLINE_SCRUB is not set CONFIG_GFS2_FS=m CONFIG_GFS2_FS_LOCKING_DLM=y CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index ed0b137353ad..b5478267d6a7 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -33,7 +33,6 @@ CONFIG_NET=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_SAFE=y CONFIG_BLK_DEV_RAM=y -# CONFIG_DCSSBLK is not set # CONFIG_DASD is not set CONFIG_ENCLOSURE_SERVICES=y CONFIG_SCSI=y diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c index 7ecfdc4fba2d..89f3e6d8fd89 100644 --- a/arch/s390/crypto/phmac_s390.c +++ b/arch/s390/crypto/phmac_s390.c @@ -169,11 +169,18 @@ struct kmac_sha2_ctx { u64 buflen[2]; }; +enum async_op { + OP_NOP = 0, + OP_UPDATE, + OP_FINAL, + OP_FINUP, +}; + /* phmac request context */ struct phmac_req_ctx { struct hash_walk_helper hwh; struct kmac_sha2_ctx kmac_ctx; - bool final; + enum async_op async_op; }; /* @@ -610,6 +617,7 @@ static int phmac_update(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { + req_ctx->async_op = OP_UPDATE; atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -647,8 +655,7 @@ static int phmac_final(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { - req->nbytes = 0; - req_ctx->final = true; + req_ctx->async_op = OP_FINAL; atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -676,13 +683,16 @@ static int phmac_finup(struct ahash_request *req) if (rc) goto out; + req_ctx->async_op = OP_FINUP; + /* Try synchronous operations if no active engine usage */ if (!atomic_read(&tfm_ctx->via_engine_ctr)) { rc = phmac_kmac_update(req, false); if (rc == 0) - req->nbytes = 0; + req_ctx->async_op = OP_FINAL; } - if (!rc && !req->nbytes && !atomic_read(&tfm_ctx->via_engine_ctr)) { + if (!rc && req_ctx->async_op == OP_FINAL && + !atomic_read(&tfm_ctx->via_engine_ctr)) { rc = phmac_kmac_final(req, false); if (rc == 0) goto out; @@ -694,7 +704,7 @@ static int phmac_finup(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { - req_ctx->final = true; + /* req->async_op has been set to either OP_FINUP or OP_FINAL */ atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -855,15 +865,16 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) /* * Three kinds of requests come in here: - * update when req->nbytes > 0 and req_ctx->final is false - * final when req->nbytes = 0 and req_ctx->final is true - * finup when req->nbytes > 0 and req_ctx->final is true - * For update and finup the hwh walk needs to be prepared and - * up to date but the actual nr of bytes in req->nbytes may be - * any non zero number. For final there is no hwh walk needed. + * 1. req->async_op == OP_UPDATE with req->nbytes > 0 + * 2. req->async_op == OP_FINUP with req->nbytes > 0 + * 3. req->async_op == OP_FINAL + * For update and finup the hwh walk has already been prepared + * by the caller. For final there is no hwh walk needed. */ - if (req->nbytes) { + switch (req_ctx->async_op) { + case OP_UPDATE: + case OP_FINUP: rc = phmac_kmac_update(req, true); if (rc == -EKEYEXPIRED) { /* @@ -880,10 +891,11 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) hwh_advance(hwh, rc); goto out; } - req->nbytes = 0; - } - - if (req_ctx->final) { + if (req_ctx->async_op == OP_UPDATE) + break; + req_ctx->async_op = OP_FINAL; + fallthrough; + case OP_FINAL: rc = phmac_kmac_final(req, true); if (rc == -EKEYEXPIRED) { /* @@ -897,10 +909,14 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) cond_resched(); return -ENOSPC; } + break; + default: + /* unknown/unsupported/unimplemented asynch op */ + return -EOPNOTSUPP; } out: - if (rc || req_ctx->final) + if (rc || req_ctx->async_op == OP_FINAL) memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); pr_debug("request complete with rc=%d\n", rc); local_bh_disable(); diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6890925d5587..a32f465ecf73 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -145,7 +145,6 @@ struct zpci_dev { u8 has_resources : 1; u8 is_physfn : 1; u8 util_str_avail : 1; - u8 irqs_registered : 1; u8 tid_avail : 1; u8 rtr_avail : 1; /* Relaxed translation allowed */ unsigned int devfn; /* DEVFN part of the RID*/ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b7100c6a4054..6663f1619abb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1154,17 +1154,15 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_NODAT 0x400 #define IPTE_GUEST_ASCE 0x800 -static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, int local) { unsigned long pto; pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1); - asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]" + asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%%r0,%[m4]" : "+m" (*ptep) - : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt), - [asce] "a" (asce), [m4] "i" (local)); + : [r1] "a" (pto), [r2] "a" (addr & PAGE_MASK), + [m4] "i" (local)); } static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, @@ -1348,7 +1346,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, * A local RDP can be used to do the flush. */ if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT)) - __ptep_rdp(address, ptep, 0, 0, 1); + __ptep_rdp(address, ptep, 1); } #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 9af2aae0a515..528d7c70979f 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -291,16 +291,14 @@ static int ptdump_cmp(const void *a, const void *b) static int add_marker(unsigned long start, unsigned long end, const char *name) { - size_t oldsize, newsize; - - oldsize = markers_cnt * sizeof(*markers); - newsize = oldsize + 2 * sizeof(*markers); - if (!oldsize) - markers = kvmalloc(newsize, GFP_KERNEL); - else - markers = kvrealloc(markers, newsize, GFP_KERNEL); - if (!markers) - goto error; + struct addr_marker *new; + size_t newsize; + + newsize = (markers_cnt + 2) * sizeof(*markers); + new = kvrealloc(markers, newsize, GFP_KERNEL); + if (!new) + return -ENOMEM; + markers = new; markers[markers_cnt].is_start = 1; markers[markers_cnt].start_address = start; markers[markers_cnt].size = end - start; @@ -312,9 +310,6 @@ static int add_marker(unsigned long start, unsigned long end, const char *name) markers[markers_cnt].name = name; markers_cnt++; return 0; -error: - markers_cnt = 0; - return -ENOMEM; } static int pt_dump_init(void) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0fde20bbc50b..05974304d622 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -274,9 +274,9 @@ void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep, preempt_disable(); atomic_inc(&mm->context.flush_count); if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_rdp(addr, ptep, 0, 0, 1); + __ptep_rdp(addr, ptep, 1); else - __ptep_rdp(addr, ptep, 0, 0, 0); + __ptep_rdp(addr, ptep, 0); /* * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That * means it is still valid and active, and must not be changed according diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b95376041501..27db1e72c623 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -188,7 +188,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) * is unbound or probed and that userspace can't access its * configuration space while we perform recovery. */ - pci_dev_lock(pdev); + device_lock(&pdev->dev); if (pdev->error_state == pci_channel_io_perm_failure) { ers_res = PCI_ERS_RESULT_DISCONNECT; goto out_unlock; @@ -257,7 +257,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) driver->err_handler->resume(pdev); pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED); out_unlock: - pci_dev_unlock(pdev); + device_unlock(&pdev->dev); zpci_report_status(zdev, "recovery", status_str); return ers_res; diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 84482a921332..e73be96ce5fe 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -107,9 +107,6 @@ static int zpci_set_irq(struct zpci_dev *zdev) else rc = zpci_set_airq(zdev); - if (!rc) - zdev->irqs_registered = 1; - return rc; } @@ -123,9 +120,6 @@ static int zpci_clear_irq(struct zpci_dev *zdev) else rc = zpci_clear_airq(zdev); - if (!rc) - zdev->irqs_registered = 0; - return rc; } @@ -427,8 +421,7 @@ bool arch_restore_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); - if (!zdev->irqs_registered) - zpci_set_irq(zdev); + zpci_set_irq(zdev); return true; } diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4db7e4bf69f5..1a27efcf3c20 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -75,7 +75,7 @@ export BITS # # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-sse4a KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 @@ -98,7 +98,7 @@ ifeq ($(CONFIG_X86_KERNEL_IBT),y) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 # KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) -KBUILD_RUSTFLAGS += -Zcf-protection=branch -Zno-jump-tables +KBUILD_RUSTFLAGS += -Zcf-protection=branch $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) else KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 745caa6c15a3..fa6c47b50989 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2789,13 +2789,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_callchain_store(entry, regs->ip)) - return; - - if (perf_hw_regs(regs)) + if (perf_hw_regs(regs)) { + if (perf_callchain_store(entry, regs->ip)) + return; unwind_start(&state, current, regs, NULL); - else + } else { unwind_start(&state, current, NULL, (void *)regs->sp); + } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 28f5468a6ea3..fe65be0b9d9c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -7596,6 +7596,7 @@ __init int intel_pmu_init(void) break; case INTEL_PANTHERLAKE_L: + case INTEL_WILDCATLAKE_L: pr_cont("Pantherlake Hybrid events, "); name = "pantherlake_hybrid"; goto lnl_common; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c0b7ac1c7594..01bc59e9286c 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -317,7 +317,8 @@ static u64 __grt_latency_data(struct perf_event *event, u64 status, { u64 val; - WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big); + WARN_ON_ONCE(is_hybrid() && + hybrid_pmu(event->pmu)->pmu_type == hybrid_big); dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; val = hybrid_var(event->pmu, pebs_data_source)[dse]; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index a762f7f5b161..d6c945cc5d07 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1895,6 +1895,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &ptl_uncore_init), + X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &ptl_uncore_init), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), diff --git a/arch/x86/include/asm/amd/node.h b/arch/x86/include/asm/amd/node.h index 23fe617898a8..a672b8765fa8 100644 --- a/arch/x86/include/asm/amd/node.h +++ b/arch/x86/include/asm/amd/node.h @@ -23,7 +23,6 @@ #define AMD_NODE0_PCI_SLOT 0x18 struct pci_dev *amd_node_get_func(u16 node, u8 func); -struct pci_dev *amd_node_get_root(u16 node); static inline u16 amd_num_nodes(void) { diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 93156ac4ffe0..b08c95872eed 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -56,6 +56,11 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) return &arch_ftrace_regs(fregs)->regs; } +#define arch_ftrace_partial_regs(regs) do { \ + regs->flags &= ~X86_EFLAGS_FIXED; \ + regs->cs = __KERNEL_CS; \ +} while (0) + #define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \ (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index f32a0eca2ae5..950bfd006905 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -150,12 +150,12 @@ #define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */ -#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */ +#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Darkmont */ #define INTEL_WILDCATLAKE_L IFM(6, 0xD5) -#define INTEL_NOVALAKE IFM(18, 0x01) -#define INTEL_NOVALAKE_L IFM(18, 0x03) +#define INTEL_NOVALAKE IFM(18, 0x01) /* Coyote Cove / Arctic Wolf */ +#define INTEL_NOVALAKE_L IFM(18, 0x03) /* Coyote Cove / Arctic Wolf */ /* "Small Core" Processors (Atom/E-Core) */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 015d23f3e01f..53f4089333f2 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -43,6 +43,9 @@ extern unsigned long __phys_addr_symbol(unsigned long); void clear_page_orig(void *page); void clear_page_rep(void *page); void clear_page_erms(void *page); +KCFI_REFERENCE(clear_page_orig); +KCFI_REFERENCE(clear_page_rep); +KCFI_REFERENCE(clear_page_erms); static inline void clear_page(void *page) { diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index 8d983cfd06ea..e5a13dc8816e 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h @@ -2,6 +2,10 @@ #ifndef _ASM_RUNTIME_CONST_H #define _ASM_RUNTIME_CONST_H +#ifdef MODULE + #error "Cannot use runtime-const infrastructure from modules" +#endif + #ifdef __ASSEMBLY__ .macro RUNTIME_CONST_PTR sym reg diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c8a5ae35c871..641f45c22f9d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -12,12 +12,12 @@ #include <asm/cpufeatures.h> #include <asm/page.h> #include <asm/percpu.h> -#include <asm/runtime-const.h> -/* - * Virtual variable: there's no actual backing store for this, - * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)' - */ +#ifdef MODULE + #define runtime_const_ptr(sym) (sym) +#else + #include <asm/runtime-const.h> +#endif extern unsigned long USER_PTR_MAX; #ifdef CONFIG_ADDRESS_MASKING diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 9792e329343e..1baa86dfe029 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -93,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_SEAMCALL 76 #define EXIT_REASON_TDCALL 77 #define EXIT_REASON_MSR_READ_IMM 84 #define EXIT_REASON_MSR_WRITE_IMM 85 diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 7047124490f6..d7c8ef1e354d 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -196,7 +196,7 @@ int amd_detect_prefcore(bool *detected) break; } - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { u32 tmp; int ret; diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c index a40176b62eb5..3d0a4768d603 100644 --- a/arch/x86/kernel/amd_node.c +++ b/arch/x86/kernel/amd_node.c @@ -34,62 +34,6 @@ struct pci_dev *amd_node_get_func(u16 node, u8 func) return pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(AMD_NODE0_PCI_SLOT + node, func)); } -#define DF_BLK_INST_CNT 0x040 -#define DF_CFG_ADDR_CNTL_LEGACY 0x084 -#define DF_CFG_ADDR_CNTL_DF4 0xC04 - -#define DF_MAJOR_REVISION GENMASK(27, 24) - -static u16 get_cfg_addr_cntl_offset(struct pci_dev *df_f0) -{ - u32 reg; - - /* - * Revision fields added for DF4 and later. - * - * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. - */ - if (pci_read_config_dword(df_f0, DF_BLK_INST_CNT, ®)) - return 0; - - if (reg & DF_MAJOR_REVISION) - return DF_CFG_ADDR_CNTL_DF4; - - return DF_CFG_ADDR_CNTL_LEGACY; -} - -struct pci_dev *amd_node_get_root(u16 node) -{ - struct pci_dev *root; - u16 cntl_off; - u8 bus; - - if (!cpu_feature_enabled(X86_FEATURE_ZEN)) - return NULL; - - /* - * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) - * Bits [7:0] (SecBusNum) holds the bus number of the root device for - * this Data Fabric instance. The segment, device, and function will be 0. - */ - struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); - if (!df_f0) - return NULL; - - cntl_off = get_cfg_addr_cntl_offset(df_f0); - if (!cntl_off) - return NULL; - - if (pci_read_config_byte(df_f0, cntl_off, &bus)) - return NULL; - - /* Grab the pointer for the actual root device instance. */ - root = pci_get_domain_bus_and_slot(0, bus, 0); - - pci_dbg(root, "is root for AMD node %u\n", node); - return root; -} - static struct pci_dev **amd_roots; /* Protect the PCI config register pairs used for SMN. */ @@ -274,51 +218,21 @@ DEFINE_SHOW_STORE_ATTRIBUTE(smn_node); DEFINE_SHOW_STORE_ATTRIBUTE(smn_address); DEFINE_SHOW_STORE_ATTRIBUTE(smn_value); -static int amd_cache_roots(void) -{ - u16 node, num_nodes = amd_num_nodes(); - - amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); - if (!amd_roots) - return -ENOMEM; - - for (node = 0; node < num_nodes; node++) - amd_roots[node] = amd_node_get_root(node); - - return 0; -} - -static int reserve_root_config_spaces(void) +static struct pci_dev *get_next_root(struct pci_dev *root) { - struct pci_dev *root = NULL; - struct pci_bus *bus = NULL; - - while ((bus = pci_find_next_bus(bus))) { - /* Root device is Device 0 Function 0 on each Primary Bus. */ - root = pci_get_slot(bus, 0); - if (!root) + while ((root = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, root))) { + /* Root device is Device 0 Function 0. */ + if (root->devfn) continue; if (root->vendor != PCI_VENDOR_ID_AMD && root->vendor != PCI_VENDOR_ID_HYGON) continue; - pci_dbg(root, "Reserving PCI config space\n"); - - /* - * There are a few SMN index/data pairs and other registers - * that shouldn't be accessed by user space. - * So reserve the entire PCI config space for simplicity rather - * than covering specific registers piecemeal. - */ - if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { - pci_err(root, "Failed to reserve config space\n"); - return -EEXIST; - } + break; } - smn_exclusive = true; - return 0; + return root; } static bool enable_dfs; @@ -332,7 +246,8 @@ __setup("amd_smn_debugfs_enable", amd_smn_enable_dfs); static int __init amd_smn_init(void) { - int err; + u16 count, num_roots, roots_per_node, node, num_nodes; + struct pci_dev *root; if (!cpu_feature_enabled(X86_FEATURE_ZEN)) return 0; @@ -342,13 +257,48 @@ static int __init amd_smn_init(void) if (amd_roots) return 0; - err = amd_cache_roots(); - if (err) - return err; + num_roots = 0; + root = NULL; + while ((root = get_next_root(root))) { + pci_dbg(root, "Reserving PCI config space\n"); - err = reserve_root_config_spaces(); - if (err) - return err; + /* + * There are a few SMN index/data pairs and other registers + * that shouldn't be accessed by user space. So reserve the + * entire PCI config space for simplicity rather than covering + * specific registers piecemeal. + */ + if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { + pci_err(root, "Failed to reserve config space\n"); + return -EEXIST; + } + + num_roots++; + } + + pr_debug("Found %d AMD root devices\n", num_roots); + + if (!num_roots) + return -ENODEV; + + num_nodes = amd_num_nodes(); + amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); + if (!amd_roots) + return -ENOMEM; + + roots_per_node = num_roots / num_nodes; + + count = 0; + node = 0; + root = NULL; + while (node < num_nodes && (root = get_next_root(root))) { + /* Use one root for each node and skip the rest. */ + if (count++ % roots_per_node) + continue; + + pci_dbg(root, "is root for AMD node %u\n", node); + amd_roots[node++] = root; + } if (enable_dfs) { debugfs_dir = debugfs_create_dir("amd_smn", arch_debugfs_dir); @@ -358,6 +308,8 @@ static int __init amd_smn_init(void) debugfs_create_file("value", 0600, debugfs_dir, NULL, &smn_value_fops); } + smn_exclusive = true; + return 0; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5398db4dedb4..5d46709c58d0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -516,7 +516,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ZEN5); break; case 0x50 ... 0x5f: - case 0x90 ... 0xaf: + case 0x80 ... 0xaf: case 0xc0 ... 0xcf: setup_force_cpu_cap(X86_FEATURE_ZEN6); break; @@ -1035,8 +1035,26 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) } } +static const struct x86_cpu_id zen5_rdseed_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), + ZEN_MODEL_STEP_UCODE(0x1a, 0x08, 0x1, 0x0b008121), + ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), + ZEN_MODEL_STEP_UCODE(0x1a, 0x24, 0x0, 0x0b204037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x0, 0x0b404035), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x1, 0x0b404108), + ZEN_MODEL_STEP_UCODE(0x1a, 0x60, 0x0, 0x0b600037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x68, 0x0, 0x0b608038), + ZEN_MODEL_STEP_UCODE(0x1a, 0x70, 0x0, 0x0b700037), + {}, +}; + static void init_amd_zen5(struct cpuinfo_x86 *c) { + if (!x86_match_min_microcode_rev(zen5_rdseed_microcode)) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg_once("RDSEED32 is broken. Disabling the corresponding CPUID bit.\n"); + } } static void init_amd(struct cpuinfo_x86 *c) @@ -1355,11 +1373,23 @@ static __init int print_s5_reset_status_mmio(void) return 0; value = ioread32(addr); - iounmap(addr); /* Value with "all bits set" is an error response and should be ignored. */ - if (value == U32_MAX) + if (value == U32_MAX) { + iounmap(addr); return 0; + } + + /* + * Clear all reason bits so they won't be retained if the next reset + * does not update the register. Besides, some bits are never cleared by + * hardware so it's software's responsibility to clear them. + * + * Writing the value back effectively clears all reason bits as they are + * write-1-to-clear. + */ + iowrite32(value, addr); + iounmap(addr); for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6a526ae1fe99..d7fa03bf51b4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1463,7 +1463,9 @@ static void __init retbleed_update_mitigation(void) break; default: if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) { - pr_err(RETBLEED_INTEL_MSG); + if (retbleed_mitigation != RETBLEED_MITIGATION_NONE) + pr_err(RETBLEED_INTEL_MSG); + retbleed_mitigation = RETBLEED_MITIGATION_NONE; } } @@ -1825,13 +1827,6 @@ void unpriv_ebpf_notify(int new_state) } #endif -static inline bool match_option(const char *arg, int arglen, const char *opt) -{ - int len = strlen(opt); - - return len == arglen && !strncmp(arg, opt, len); -} - /* The kernel command line selection for spectre v2 */ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_NONE, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c7d3512914ca..02d97834a1d4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -78,6 +78,10 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* Used for modules: built-in code uses runtime constants */ +unsigned long USER_PTR_MAX; +EXPORT_SYMBOL(USER_PTR_MAX); + u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ @@ -2579,7 +2583,7 @@ void __init arch_cpu_finalize_init(void) alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { - unsigned long USER_PTR_MAX = TASK_SIZE_MAX; + USER_PTR_MAX = TASK_SIZE_MAX; /* * Enable this when LAM is gated on LASS support diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index cdce885e2fd5..a881bf4c2011 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -194,7 +194,7 @@ static bool need_sha_check(u32 cur_rev) } switch (cur_rev >> 8) { - case 0x80012: return cur_rev <= 0x800126f; break; + case 0x80012: return cur_rev <= 0x8001277; break; case 0x80082: return cur_rev <= 0x800820f; break; case 0x83010: return cur_rev <= 0x830107c; break; case 0x86001: return cur_rev <= 0x860010e; break; @@ -220,10 +220,13 @@ static bool need_sha_check(u32 cur_rev) case 0xaa001: return cur_rev <= 0xaa00116; break; case 0xaa002: return cur_rev <= 0xaa00218; break; case 0xb0021: return cur_rev <= 0xb002146; break; + case 0xb0081: return cur_rev <= 0xb008111; break; case 0xb1010: return cur_rev <= 0xb101046; break; case 0xb2040: return cur_rev <= 0xb204031; break; case 0xb4040: return cur_rev <= 0xb404031; break; + case 0xb4041: return cur_rev <= 0xb404101; break; case 0xb6000: return cur_rev <= 0xb600031; break; + case 0xb6080: return cur_rev <= 0xb608031; break; case 0xb7000: return cur_rev <= 0xb700031; break; default: break; } @@ -233,13 +236,31 @@ static bool need_sha_check(u32 cur_rev) return true; } +static bool cpu_has_entrysign(void) +{ + unsigned int fam = x86_family(bsp_cpuid_1_eax); + unsigned int model = x86_model(bsp_cpuid_1_eax); + + if (fam == 0x17 || fam == 0x19) + return true; + + if (fam == 0x1a) { + if (model <= 0x2f || + (0x40 <= model && model <= 0x4f) || + (0x60 <= model && model <= 0x6f)) + return true; + } + + return false; +} + static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsigned int len) { struct patch_digest *pd = NULL; u8 digest[SHA256_DIGEST_SIZE]; int i; - if (x86_family(bsp_cpuid_1_eax) < 0x17) + if (!cpu_has_entrysign()) return true; if (!need_sha_check(cur_rev)) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c8945610d455..fe1a2aa53c16 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; u64 msr_val; u32 prmid; int ret; @@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); - if (ret) - return ret; - *val = get_corrected_val(r, d, rmid, eventid, msr_val); + if (!ret) { + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + } else if (ret == -EINVAL) { + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + am->prev_msr = 0; + } - return 0; + return ret; } static int __cntr_id_read(u32 cntr_id, u64 *val) @@ -452,7 +458,16 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } - if (rdt_cpu_has(X86_FEATURE_ABMC)) { + /* + * resctrl assumes a system that supports assignable counters can + * switch to "default" mode. Ensure that there is a "default" mode + * to switch to. This enforces a dependency between the independent + * X86_FEATURE_ABMC and X86_FEATURE_CQM_MBM_TOTAL/X86_FEATURE_CQM_MBM_LOCAL + * hardware features. + */ + if (rdt_cpu_has(X86_FEATURE_ABMC) && + (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL) || + rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))) { r->mon.mbm_cntr_assignable = true; cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1f71cc135e9a..e88eacb1b5bb 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -825,6 +825,9 @@ void fpu__clear_user_states(struct fpu *fpu) !fpregs_state_valid(fpu, smp_processor_id())) os_xrstor_supervisor(fpu->fpstate); + /* Ensure XFD state is in sync before reloading XSTATE */ + xfd_update_state(fpu->fpstate); + /* Reset user states in registers. */ restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE); diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 367da3638167..823dbdd0eb41 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -354,12 +354,17 @@ SYM_CODE_START(return_to_handler) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR + /* Restore return_to_handler value that got eaten by previous ret instruction. */ + subq $8, %rsp + UNWIND_HINT_FUNC + /* Save ftrace_regs for function exit context */ subq $(FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rdx, RDX(%rsp) movq %rbp, RBP(%rsp) + movq %rsp, RSP(%rsp) movq %rsp, %rdi call ftrace_return_to_handler @@ -368,7 +373,8 @@ SYM_CODE_START(return_to_handler) movq RDX(%rsp), %rdx movq RAX(%rsp), %rax - addq $(FRAME_SIZE), %rsp + addq $(FRAME_SIZE) + 8, %rsp + /* * Jump back to the old return address. This cannot be JMP_NOSPEC rdi * since IBT would demand that contain ENDBR, which simply isn't so for diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 40ac4cb44ed2..487ad19a236e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -108,16 +108,18 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; - perf_get_x86_pmu_capability(&kvm_host_pmu); - /* * Hybrid PMUs don't play nice with virtualization without careful * configuration by userspace, and KVM's APIs for reporting supported * vPMU features do not account for hybrid PMUs. Disable vPMU support * for hybrid PMUs until KVM gains a way to let userspace opt-in. */ - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { enable_pmu = false; + memset(&kvm_host_pmu, 0, sizeof(kvm_host_pmu)); + } else { + perf_get_x86_pmu_capability(&kvm_host_pmu); + } if (enable_pmu) { /* diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index f286b5706d7c..fef00546c885 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -216,7 +216,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm) * This function is called from IOMMU driver to notify * SVM to schedule in a particular vCPU of a particular VM. */ -int avic_ga_log_notifier(u32 ga_tag) +static int avic_ga_log_notifier(u32 ga_tag) { unsigned long flags; struct kvm_svm *kvm_svm; @@ -788,7 +788,7 @@ int avic_init_vcpu(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; INIT_LIST_HEAD(&svm->ir_list); - spin_lock_init(&svm->ir_list_lock); + raw_spin_lock_init(&svm->ir_list_lock); if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm)) return 0; @@ -816,9 +816,9 @@ static void svm_ir_list_del(struct kvm_kernel_irqfd *irqfd) if (!vcpu) return; - spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); + raw_spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); list_del(&irqfd->vcpu_list); - spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); } int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, @@ -855,7 +855,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, * list of IRQs being posted to the vCPU, to ensure the IRTE * isn't programmed with stale pCPU/IsRunning information. */ - guard(spinlock_irqsave)(&svm->ir_list_lock); + guard(raw_spinlock_irqsave)(&svm->ir_list_lock); /* * Update the target pCPU for IOMMU doorbells if the vCPU is @@ -972,7 +972,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, * up-to-date entry information, or that this task will wait until * svm_ir_list_add() completes to set the new target pCPU. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); entry = svm->avic_physical_id_entry; WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); @@ -997,7 +997,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action); - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1035,7 +1035,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) * or that this task will wait until svm_ir_list_add() completes to * mark the vCPU as not running. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); avic_update_iommu_vcpu_affinity(vcpu, -1, action); @@ -1059,7 +1059,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) svm->avic_physical_id_entry = entry; - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_put(struct kvm_vcpu *vcpu) @@ -1243,3 +1243,9 @@ bool __init avic_hardware_setup(void) return true; } + +void avic_hardware_unsetup(void) +{ + if (avic) + amd_iommu_register_ga_log_notifier(NULL); +} diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a6443feab252..da6e80b3ac35 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -677,11 +677,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 */ svm_copy_lbrs(vmcb02, vmcb12); vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS; - svm_update_lbrv(&svm->vcpu); - - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { + } else { svm_copy_lbrs(vmcb02, vmcb01); } + svm_update_lbrv(&svm->vcpu); } static inline bool is_evtinj_soft(u32 evtinj) @@ -833,11 +832,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, svm->soft_int_next_rip = vmcb12_rip; } - vmcb02->control.virt_ext = vmcb01->control.virt_ext & - LBR_CTL_ENABLE_MASK; - if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV)) - vmcb02->control.virt_ext |= - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); + /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */ if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; @@ -1189,13 +1184,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) svm_copy_lbrs(vmcb12, vmcb02); - svm_update_lbrv(vcpu); - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { + else svm_copy_lbrs(vmcb01, vmcb02); - svm_update_lbrv(vcpu); - } + + svm_update_lbrv(vcpu); if (vnmi) { if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 153c12dbf3eb..10c21e4c5406 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -806,60 +806,43 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) vmcb_mark_dirty(to_vmcb, VMCB_LBR); } -void svm_enable_lbrv(struct kvm_vcpu *vcpu) +static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; - svm_recalc_lbr_msr_intercepts(vcpu); - - /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ - if (is_guest_mode(vcpu)) - svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); + to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; } -static void svm_disable_lbrv(struct kvm_vcpu *vcpu) +void svm_enable_lbrv(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - - KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); - svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; + __svm_enable_lbrv(vcpu); svm_recalc_lbr_msr_intercepts(vcpu); - - /* - * Move the LBR msrs back to the vmcb01 to avoid copying them - * on nested guest entries. - */ - if (is_guest_mode(vcpu)) - svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb); } -static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm) +static void __svm_disable_lbrv(struct kvm_vcpu *vcpu) { - /* - * If LBR virtualization is disabled, the LBR MSRs are always kept in - * vmcb01. If LBR virtualization is enabled and L1 is running VMs of - * its own, the MSRs are moved between vmcb01 and vmcb02 as needed. - */ - return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb : - svm->vmcb01.ptr; + KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); + to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; } void svm_update_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK; - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) || + bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) || (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); - if (enable_lbrv == current_enable_lbrv) - return; + if (enable_lbrv && !current_enable_lbrv) + __svm_enable_lbrv(vcpu); + else if (!enable_lbrv && current_enable_lbrv) + __svm_disable_lbrv(vcpu); - if (enable_lbrv) - svm_enable_lbrv(vcpu); - else - svm_disable_lbrv(vcpu); + /* + * During nested transitions, it is possible that the current VMCB has + * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa). + * In this case, even though LBR_CTL does not need an update, intercepts + * do, so always recalculate the intercepts here. + */ + svm_recalc_lbr_msr_intercepts(vcpu); } void disable_nmi_singlestep(struct vcpu_svm *svm) @@ -921,6 +904,8 @@ static void svm_hardware_unsetup(void) { int cpu; + avic_hardware_unsetup(); + sev_hardware_unsetup(); for_each_possible_cpu(cpu) @@ -2722,19 +2707,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->tsc_aux; break; case MSR_IA32_DEBUGCTLMSR: - msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl; + msr_info->data = svm->vmcb->save.dbgctl; break; case MSR_IA32_LASTBRANCHFROMIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from; + msr_info->data = svm->vmcb->save.br_from; break; case MSR_IA32_LASTBRANCHTOIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to; + msr_info->data = svm->vmcb->save.br_to; break; case MSR_IA32_LASTINTFROMIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from; + msr_info->data = svm->vmcb->save.last_excp_from; break; case MSR_IA32_LASTINTTOIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to; + msr_info->data = svm->vmcb->save.last_excp_to; break; case MSR_VM_HSAVE_PA: msr_info->data = svm->nested.hsave_msr; @@ -3002,7 +2987,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & DEBUGCTL_RESERVED_BITS) return 1; - svm_get_lbr_vmcb(svm)->save.dbgctl = data; + if (svm->vmcb->save.dbgctl == data) + break; + + svm->vmcb->save.dbgctl = data; + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); svm_update_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: @@ -5386,12 +5375,6 @@ static __init int svm_hardware_setup(void) svm_hv_hardware_setup(); - for_each_possible_cpu(cpu) { - r = svm_cpu_init(cpu); - if (r) - goto err; - } - enable_apicv = avic_hardware_setup(); if (!enable_apicv) { enable_ipiv = false; @@ -5435,6 +5418,13 @@ static __init int svm_hardware_setup(void) svm_set_cpu_caps(); kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_CD_NW_CLEARED; + + for_each_possible_cpu(cpu) { + r = svm_cpu_init(cpu); + if (r) + goto err; + } + return 0; err: diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e4b04f435b3d..c856d8e0f95e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -329,7 +329,7 @@ struct vcpu_svm { * back into remapped mode). */ struct list_head ir_list; - spinlock_t ir_list_lock; + raw_spinlock_t ir_list_lock; struct vcpu_sev_es_state sev_es; @@ -805,7 +805,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; ) bool __init avic_hardware_setup(void); -int avic_ga_log_notifier(u32 ga_tag); +void avic_hardware_unsetup(void); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h index bc5ece76533a..412d0829d7a2 100644 --- a/arch/x86/kvm/vmx/common.h +++ b/arch/x86/kvm/vmx/common.h @@ -98,7 +98,7 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa, error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK) ? PFERR_PRESENT_MASK : 0; - if (error_code & EPT_VIOLATION_GVA_IS_VALID) + if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID) error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 76271962cb70..bcea087b642f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_NOTIFY: /* Notify VM exit is not exposed to L1 */ return false; + case EXIT_REASON_SEAMCALL: + case EXIT_REASON_TDCALL: + /* + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't + * virtualized by KVM for L1 hypervisors, i.e. L1 should + * never want or expect such an exit. + */ + return false; default: return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f87c216d976d..91b6f2f3edc2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6032,6 +6032,12 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu) return 1; } +static int handle_tdx_instruction(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + #ifndef CONFIG_X86_SGX_KVM static int handle_encls(struct kvm_vcpu *vcpu) { @@ -6157,6 +6163,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_ENCLS] = handle_encls, [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, [EXIT_REASON_NOTIFY] = handle_notify, + [EXIT_REASON_SEAMCALL] = handle_tdx_instruction, + [EXIT_REASON_TDCALL] = handle_tdx_instruction, [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm, [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 42ecd093bb4c..c9c2aa6f4705 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3874,15 +3874,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) /* * Returns true if the MSR in question is managed via XSTATE, i.e. is context - * switched with the rest of guest FPU state. Note! S_CET is _not_ context - * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS. - * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields, - * the value saved/restored via XSTATE is always the host's value. That detail - * is _extremely_ important, as the guest's S_CET must _never_ be resident in - * hardware while executing in the host. Loading guest values for U_CET and - * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to - * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower - * privilege levels, i.e. are effectively only consumed by userspace as well. + * switched with the rest of guest FPU state. + * + * Note, S_CET is _not_ saved/restored via XSAVES/XRSTORS. */ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) { @@ -3905,6 +3899,11 @@ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) * MSR that is managed via XSTATE. Note, the caller is responsible for doing * the initial FPU load, this helper only ensures that guest state is resident * in hardware (the kernel can load its FPU state in IRQ context). + * + * Note, loading guest values for U_CET and PL[0-3]_SSP while executing in the + * kernel is safe, as U_CET is specific to userspace, and PL[0-3]_SSP are only + * consumed when transitioning to lower privilege levels, i.e. are effectively + * only consumed by userspace as well. */ static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info, @@ -11807,6 +11806,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) + return; + /* Exclude PKRU, it's restored separately immediately after VM-Exit. */ fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true); trace_kvm_fpu(1); @@ -11815,6 +11817,9 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) + return; + fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); @@ -12137,9 +12142,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int r; vcpu_load(vcpu); - if (kvm_mpx_supported()) - kvm_load_guest_fpu(vcpu); - kvm_vcpu_srcu_read_lock(vcpu); r = kvm_apic_accept_events(vcpu); @@ -12156,9 +12158,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, out: kvm_vcpu_srcu_read_unlock(vcpu); - - if (kvm_mpx_supported()) - kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return r; } @@ -12788,6 +12787,7 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) { struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; u64 xfeatures_mask; + bool fpu_in_use; int i; /* @@ -12811,13 +12811,23 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); /* - * All paths that lead to INIT are required to load the guest's FPU - * state (because most paths are buried in KVM_RUN). - */ - kvm_put_guest_fpu(vcpu); + * Unload guest FPU state (if necessary) before zeroing XSTATE fields + * as the kernel can only modify the state when its resident in memory, + * i.e. when it's not loaded into hardware. + * + * WARN if the vCPU's desire to run, i.e. whether or not its in KVM_RUN, + * doesn't match the loaded/in-use state of the FPU, as KVM_RUN is the + * only path that can trigger INIT emulation _and_ loads FPU state, and + * KVM_RUN should _always_ load FPU state. + */ + WARN_ON_ONCE(vcpu->wants_to_run != fpstate->in_use); + fpu_in_use = fpstate->in_use; + if (fpu_in_use) + kvm_put_guest_fpu(vcpu); for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) fpstate_clear_xstate_component(fpstate, i); - kvm_load_guest_fpu(vcpu); + if (fpu_in_use) + kvm_load_guest_fpu(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -13941,10 +13951,11 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_GUEST_MEMFD /* - * KVM doesn't yet support mmap() on guest_memfd for VMs with private memory - * (the private vs. shared tracking needs to be moved into guest_memfd). + * KVM doesn't yet support initializing guest_memfd memory as shared for VMs + * with private memory (the private vs. shared tracking needs to be moved into + * guest_memfd). */ -bool kvm_arch_supports_gmem_mmap(struct kvm *kvm) +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { return !kvm_arch_has_private_mem(kvm); } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index d2d54b8c4dbb..970981893c9b 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -446,7 +446,7 @@ static void cpa_flush(struct cpa_data *cpa, int cache) } start = fix_addr(__cpa_addr(cpa, 0)); - end = fix_addr(__cpa_addr(cpa, cpa->numpages)); + end = start + cpa->numpages * PAGE_SIZE; if (cpa->force_flush_all) end = TLB_FLUSH_ALL; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 39f80111e6f1..5d221709353e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -911,11 +911,31 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); - barrier(); - /* Start receiving IPIs and then read tlb_gen (and LAM below) */ + /* + * Make sure this CPU is set in mm_cpumask() such that we'll + * receive invalidation IPIs. + * + * Rely on the smp_mb() implied by cpumask_set_cpu()'s atomic + * operation, or explicitly provide one. Such that: + * + * switch_mm_irqs_off() flush_tlb_mm_range() + * smp_store_release(loaded_mm, SWITCHING); atomic64_inc_return(tlb_gen) + * smp_mb(); // here // smp_mb() implied + * atomic64_read(tlb_gen); this_cpu_read(loaded_mm); + * + * we properly order against flush_tlb_mm_range(), where the + * loaded_mm load can happen in mative_flush_tlb_multi() -> + * should_flush_tlb(). + * + * This way switch_mm() must see the new tlb_gen or + * flush_tlb_mm_range() must see the new loaded_mm, or both. + */ if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); + else + smp_mb(); + next_tlb_gen = atomic64_read(&next->context.tlb_gen); ns = choose_new_asid(next, next_tlb_gen); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d4c93d9e73e4..de5083cb1d37 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2701,7 +2701,7 @@ emit_jmp: /* Update cleanup_addr */ ctx->cleanup_addr = proglen; if (bpf_prog_was_classic(bpf_prog) && - !capable(CAP_SYS_ADMIN)) { + !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) { u8 *ip = image + addrs[i - 1]; if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f93de34fe87d..3cffb68ba5d8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -812,8 +812,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) } /* * Similar to blkg_conf_open_bdev, but additionally freezes the queue, - * acquires q->elevator_lock, and ensures the correct locking order - * between q->elevator_lock and q->rq_qos_mutex. + * ensures the correct locking order between freeze queue and q->rq_qos_mutex. * * This function returns negative error on failure. On success it returns * memflags which must be saved and later passed to blkg_conf_exit_frozen @@ -834,13 +833,11 @@ unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx) * At this point, we haven’t started protecting anything related to QoS, * so we release q->rq_qos_mutex here, which was first acquired in blkg_ * conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing - * the queue and acquiring q->elevator_lock to maintain the correct - * locking order. + * the queue to maintain the correct locking order. */ mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex); memflags = blk_mq_freeze_queue(ctx->bdev->bd_queue); - mutex_lock(&ctx->bdev->bd_queue->elevator_lock); mutex_lock(&ctx->bdev->bd_queue->rq_qos_mutex); return memflags; @@ -995,9 +992,8 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx) EXPORT_SYMBOL_GPL(blkg_conf_exit); /* - * Similar to blkg_conf_exit, but also unfreezes the queue and releases - * q->elevator_lock. Should be used when blkg_conf_open_bdev_frozen - * is used to open the bdev. + * Similar to blkg_conf_exit, but also unfreezes the queue. Should be used + * when blkg_conf_open_bdev_frozen is used to open the bdev. */ void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags) { @@ -1005,7 +1001,6 @@ void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags) struct request_queue *q = ctx->bdev->bd_queue; blkg_conf_exit(ctx); - mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); } } diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 4b1ad84d1b5a..3e7bf1974cbd 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -292,7 +292,7 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr) } if (!bio_crypt_check_alignment(bio)) { - bio->bi_status = BLK_STS_IOERR; + bio->bi_status = BLK_STS_INVAL; goto fail; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d06bb137a743..e0bed16485c3 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -557,7 +557,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, if (blk_mq_is_shared_tags(flags)) { /* Shared tags are stored at index 0 in @et->tags. */ q->sched_shared_tags = et->tags[0]; - blk_mq_tag_update_sched_shared_tags(q); + blk_mq_tag_update_sched_shared_tags(q, et->nr_requests); } queue_for_each_hw_ctx(q, hctx, i) { diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c7a4d4b9cc87..5b664dbdf655 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -622,10 +622,11 @@ void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags); } -void blk_mq_tag_update_sched_shared_tags(struct request_queue *q) +void blk_mq_tag_update_sched_shared_tags(struct request_queue *q, + unsigned int nr) { sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags, - q->nr_requests - q->tag_set->reserved_tags); + nr - q->tag_set->reserved_tags); } /** diff --git a/block/blk-mq.c b/block/blk-mq.c index 09f579414161..d626d32f6e57 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4941,7 +4941,7 @@ struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q, * tags can't grow, see blk_mq_alloc_sched_tags(). */ if (q->elevator) - blk_mq_tag_update_sched_shared_tags(q); + blk_mq_tag_update_sched_shared_tags(q, nr); else blk_mq_tag_resize_shared_tags(set, nr); } else if (!q->elevator) { diff --git a/block/blk-mq.h b/block/blk-mq.h index af42dc018808..c4fccdeb5441 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -186,7 +186,8 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size); -void blk_mq_tag_update_sched_shared_tags(struct request_queue *q); +void blk_mq_tag_update_sched_shared_tags(struct request_queue *q, + unsigned int nr); void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, diff --git a/block/blk-settings.c b/block/blk-settings.c index 54cffaae4df4..d74b13ec8e54 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -184,6 +184,16 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) if (!bi->interval_exp) bi->interval_exp = ilog2(lim->logical_block_size); + /* + * The PI generation / validation helpers do not expect intervals to + * straddle multiple bio_vecs. Enforce alignment so that those are + * never generated, and that each buffer is aligned as expected. + */ + if (bi->csum_type) { + lim->dma_alignment = max(lim->dma_alignment, + (1U << bi->interval_exp) - 1); + } + return 0; } diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index c31081e42cee..820d133236dd 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -97,6 +97,8 @@ struct dma_bridge_chan { * response queue's head and tail pointer of this DBC. */ void __iomem *dbc_base; + /* Synchronizes access to Request queue's head and tail pointer */ + struct mutex req_lock; /* Head of list where each node is a memory handle queued in request queue */ struct list_head xfer_list; /* Synchronizes DBC readers during cleanup */ diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index d8bdab69f800..b86a8e48e731 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c @@ -407,7 +407,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev, return -EINVAL; remaining = in_trans->size - resources->xferred_dma_size; if (remaining == 0) - return 0; + return -EINVAL; if (check_add_overflow(xfer_start_addr, remaining, &end)) return -EINVAL; diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 797289e9d780..c4f117edb266 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -1356,13 +1356,17 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr goto release_ch_rcu; } + ret = mutex_lock_interruptible(&dbc->req_lock); + if (ret) + goto release_ch_rcu; + head = readl(dbc->dbc_base + REQHP_OFF); tail = readl(dbc->dbc_base + REQTP_OFF); if (head == U32_MAX || tail == U32_MAX) { /* PCI link error */ ret = -ENODEV; - goto release_ch_rcu; + goto unlock_req_lock; } queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail); @@ -1370,11 +1374,12 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc, head, &tail); if (ret) - goto release_ch_rcu; + goto unlock_req_lock; /* Finalize commit to hardware */ submit_ts = ktime_get_ns(); writel(tail, dbc->dbc_base + REQTP_OFF); + mutex_unlock(&dbc->req_lock); update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts, submit_ts, queue_level); @@ -1382,6 +1387,9 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr if (datapath_polling) schedule_work(&dbc->poll_work); +unlock_req_lock: + if (ret) + mutex_unlock(&dbc->req_lock); release_ch_rcu: srcu_read_unlock(&dbc->ch_lock, rcu_id); unlock_dev_srcu: diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c index a991b8198dc4..8dc4fe5bb560 100644 --- a/drivers/accel/qaic/qaic_debugfs.c +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -218,6 +218,9 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d if (ret) goto destroy_workqueue; + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->bootlog_ch = mhi_dev; + for (i = 0; i < BOOTLOG_POOL_SIZE; i++) { msg = devm_kzalloc(&qdev->pdev->dev, sizeof(*msg), GFP_KERNEL); if (!msg) { @@ -233,8 +236,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d goto mhi_unprepare; } - dev_set_drvdata(&mhi_dev->dev, qdev); - qdev->bootlog_ch = mhi_dev; return 0; mhi_unprepare: diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index e31bcb0ecfc9..e162f4b8a262 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -454,6 +454,9 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, return NULL; init_waitqueue_head(&qdev->dbc[i].dbc_release); INIT_LIST_HEAD(&qdev->dbc[i].bo_lists); + ret = drmm_mutex_init(drm, &qdev->dbc[i].req_lock); + if (ret) + return NULL; } return qdev; diff --git a/drivers/acpi/acpi_mrrm.c b/drivers/acpi/acpi_mrrm.c index 47ea3ccc2142..6d69554c940e 100644 --- a/drivers/acpi/acpi_mrrm.c +++ b/drivers/acpi/acpi_mrrm.c @@ -63,6 +63,9 @@ static __init int acpi_parse_mrrm(struct acpi_table_header *table) if (!mrrm) return -ENODEV; + if (mrrm->header.revision != 1) + return -EINVAL; + if (mrrm->flags & ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS) return -EOPNOTSUPP; @@ -149,26 +152,49 @@ ATTRIBUTE_GROUPS(memory_range); static __init int add_boot_memory_ranges(void) { - struct kobject *pkobj, *kobj; + struct kobject *pkobj, *kobj, **kobjs; int ret = -EINVAL; - char *name; + char name[16]; + int i; pkobj = kobject_create_and_add("memory_ranges", acpi_kobj); + if (!pkobj) + return -ENOMEM; - for (int i = 0; i < mrrm_mem_entry_num; i++) { - name = kasprintf(GFP_KERNEL, "range%d", i); - if (!name) { - ret = -ENOMEM; - break; - } + kobjs = kcalloc(mrrm_mem_entry_num, sizeof(*kobjs), GFP_KERNEL); + if (!kobjs) { + kobject_put(pkobj); + return -ENOMEM; + } + for (i = 0; i < mrrm_mem_entry_num; i++) { + scnprintf(name, sizeof(name), "range%d", i); kobj = kobject_create_and_add(name, pkobj); + if (!kobj) { + ret = -ENOMEM; + goto cleanup; + } ret = sysfs_create_groups(kobj, memory_range_groups); - if (ret) - return ret; + if (ret) { + kobject_put(kobj); + goto cleanup; + } + kobjs[i] = kobj; } + kfree(kobjs); + return 0; + +cleanup: + for (int j = 0; j < i; j++) { + if (kobjs[j]) { + sysfs_remove_groups(kobjs[j], memory_range_groups); + kobject_put(kobjs[j]); + } + } + kfree(kobjs); + kobject_put(pkobj); return ret; } diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 103f29661576..be8e7e18abca 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -1959,8 +1959,10 @@ static void acpi_video_bus_remove_notify_handler(struct acpi_video_bus *video) struct acpi_video_device *dev; mutex_lock(&video->device_list_lock); - list_for_each_entry(dev, &video->video_device_list, entry) + list_for_each_entry(dev, &video->video_device_list, entry) { acpi_video_dev_remove_notify_handler(dev); + cancel_delayed_work_sync(&dev->switch_brightness_work); + } mutex_unlock(&video->device_list_lock); acpi_video_bus_stop_devices(video); diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c index 049f6c2f1e32..e5631027f7f1 100644 --- a/drivers/acpi/acpica/tbprint.c +++ b/drivers/acpi/acpica/tbprint.c @@ -95,6 +95,11 @@ acpi_tb_print_table_header(acpi_physical_address address, { struct acpi_table_header local_header; +#pragma GCC diagnostic push +#if defined(__GNUC__) && __GNUC__ >= 11 +#pragma GCC diagnostic ignored "-Wstringop-overread" +#endif + if (ACPI_COMPARE_NAMESEG(header->signature, ACPI_SIG_FACS)) { /* FACS only has signature and length fields */ @@ -143,4 +148,5 @@ acpi_tb_print_table_header(acpi_physical_address address, local_header.asl_compiler_id, local_header.asl_compiler_revision)); } +#pragma GCC diagnostic pop } diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 0a7026040188..3c6dd9b4ba0a 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -619,8 +619,10 @@ static int acpi_button_add(struct acpi_device *device) input_set_drvdata(input, device); error = input_register_device(input); - if (error) + if (error) { + input_free_device(input); goto err_remove_fs; + } switch (device->device_type) { case ACPI_BUS_TYPE_POWER_BUTTON: diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index ab4651205e8a..3bdeeee3414e 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -460,7 +460,7 @@ bool acpi_cpc_valid(void) if (acpi_disabled) return false; - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); if (!cpc_ptr) return false; @@ -476,7 +476,7 @@ bool cppc_allow_fast_switch(void) struct cpc_desc *cpc_ptr; int cpu; - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF]; if (!CPC_IN_SYSTEM_MEMORY(desired_reg) && @@ -750,7 +750,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } /* - * Disregard _CPC if the number of entries in the return pachage is not + * Disregard _CPC if the number of entries in the return package is not * as expected, but support future revisions being proper supersets of * the v3 and only causing more entries to be returned by _CPC. */ @@ -1435,7 +1435,7 @@ bool cppc_perf_ctrs_in_pcc(void) { int cpu; - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { struct cpc_register_resource *ref_perf_reg; struct cpc_desc *cpc_desc; diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index 8a28a72a7c6a..bedbab0e8e4e 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -49,6 +49,7 @@ struct acpi_fan_fst { }; struct acpi_fan { + acpi_handle handle; bool acpi4; bool has_fst; struct acpi_fan_fif fif; @@ -59,14 +60,14 @@ struct acpi_fan { struct device_attribute fine_grain_control; }; -int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst); +int acpi_fan_get_fst(acpi_handle handle, struct acpi_fan_fst *fst); int acpi_fan_create_attributes(struct acpi_device *device); void acpi_fan_delete_attributes(struct acpi_device *device); #if IS_REACHABLE(CONFIG_HWMON) -int devm_acpi_fan_create_hwmon(struct acpi_device *device); +int devm_acpi_fan_create_hwmon(struct device *dev); #else -static inline int devm_acpi_fan_create_hwmon(struct acpi_device *device) { return 0; }; +static inline int devm_acpi_fan_create_hwmon(struct device *dev) { return 0; }; #endif #endif diff --git a/drivers/acpi/fan_attr.c b/drivers/acpi/fan_attr.c index c1afb7b5ed3d..9b7fa52f3c2a 100644 --- a/drivers/acpi/fan_attr.c +++ b/drivers/acpi/fan_attr.c @@ -55,7 +55,7 @@ static ssize_t show_fan_speed(struct device *dev, struct device_attribute *attr, struct acpi_fan_fst fst; int status; - status = acpi_fan_get_fst(acpi_dev, &fst); + status = acpi_fan_get_fst(acpi_dev->handle, &fst); if (status) return status; diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c index 04ff608f2ff0..46e7fe7a506d 100644 --- a/drivers/acpi/fan_core.c +++ b/drivers/acpi/fan_core.c @@ -44,25 +44,30 @@ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long return 0; } -int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst) +int acpi_fan_get_fst(acpi_handle handle, struct acpi_fan_fst *fst) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; acpi_status status; int ret = 0; - status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer); - if (ACPI_FAILURE(status)) { - dev_err(&device->dev, "Get fan state failed\n"); - return -ENODEV; - } + status = acpi_evaluate_object(handle, "_FST", NULL, &buffer); + if (ACPI_FAILURE(status)) + return -EIO; obj = buffer.pointer; - if (!obj || obj->type != ACPI_TYPE_PACKAGE || - obj->package.count != 3 || - obj->package.elements[1].type != ACPI_TYPE_INTEGER) { - dev_err(&device->dev, "Invalid _FST data\n"); - ret = -EINVAL; + if (!obj) + return -ENODATA; + + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 3) { + ret = -EPROTO; + goto err; + } + + if (obj->package.elements[0].type != ACPI_TYPE_INTEGER || + obj->package.elements[1].type != ACPI_TYPE_INTEGER || + obj->package.elements[2].type != ACPI_TYPE_INTEGER) { + ret = -EPROTO; goto err; } @@ -81,7 +86,7 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state) struct acpi_fan_fst fst; int status, i; - status = acpi_fan_get_fst(device, &fst); + status = acpi_fan_get_fst(device->handle, &fst); if (status) return status; @@ -311,11 +316,16 @@ static int acpi_fan_probe(struct platform_device *pdev) struct acpi_device *device = ACPI_COMPANION(&pdev->dev); char *name; + if (!device) + return -ENODEV; + fan = devm_kzalloc(&pdev->dev, sizeof(*fan), GFP_KERNEL); if (!fan) { dev_err(&device->dev, "No memory for fan\n"); return -ENOMEM; } + + fan->handle = device->handle; device->driver_data = fan; platform_set_drvdata(pdev, fan); @@ -337,7 +347,7 @@ static int acpi_fan_probe(struct platform_device *pdev) } if (fan->has_fst) { - result = devm_acpi_fan_create_hwmon(device); + result = devm_acpi_fan_create_hwmon(&pdev->dev); if (result) return result; diff --git a/drivers/acpi/fan_hwmon.c b/drivers/acpi/fan_hwmon.c index e8d90605106e..4b2c2007f2d7 100644 --- a/drivers/acpi/fan_hwmon.c +++ b/drivers/acpi/fan_hwmon.c @@ -93,13 +93,12 @@ static umode_t acpi_fan_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_ static int acpi_fan_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { - struct acpi_device *adev = to_acpi_device(dev->parent); struct acpi_fan *fan = dev_get_drvdata(dev); struct acpi_fan_fps *fps; struct acpi_fan_fst fst; int ret; - ret = acpi_fan_get_fst(adev, &fst); + ret = acpi_fan_get_fst(fan->handle, &fst); if (ret < 0) return ret; @@ -167,12 +166,12 @@ static const struct hwmon_chip_info acpi_fan_hwmon_chip_info = { .info = acpi_fan_hwmon_info, }; -int devm_acpi_fan_create_hwmon(struct acpi_device *device) +int devm_acpi_fan_create_hwmon(struct device *dev) { - struct acpi_fan *fan = acpi_driver_data(device); + struct acpi_fan *fan = dev_get_drvdata(dev); struct device *hdev; - hdev = devm_hwmon_device_register_with_info(&device->dev, "acpi_fan", fan, - &acpi_fan_hwmon_chip_info, NULL); + hdev = devm_hwmon_device_register_with_info(dev, "acpi_fan", fan, &acpi_fan_hwmon_chip_info, + NULL); return PTR_ERR_OR_ZERO(hdev); } diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 5a36d57289b4..11e4483685c9 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -874,11 +874,33 @@ static void hmat_register_target_devices(struct memory_target *target) } } -static void hmat_register_target(struct memory_target *target) +static void hmat_hotplug_target(struct memory_target *target) { int nid = pxm_to_node(target->memory_pxm); /* + * Skip offline nodes. This can happen when memory marked EFI_MEMORY_SP, + * "specific purpose", is applied to all the memory in a proximity + * domain leading to * the node being marked offline / unplugged, or if + * memory-only "hotplug" node is offline. + */ + if (nid == NUMA_NO_NODE || !node_online(nid)) + return; + + guard(mutex)(&target_lock); + if (target->registered) + return; + + hmat_register_target_initiators(target); + hmat_register_target_cache(target); + hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL); + hmat_register_target_perf(target, ACCESS_COORDINATE_CPU); + target->registered = true; +} + +static void hmat_register_target(struct memory_target *target) +{ + /* * Devices may belong to either an offline or online * node, so unconditionally add them. */ @@ -895,25 +917,7 @@ static void hmat_register_target(struct memory_target *target) } mutex_unlock(&target_lock); - /* - * Skip offline nodes. This can happen when memory - * marked EFI_MEMORY_SP, "specific purpose", is applied - * to all the memory in a proximity domain leading to - * the node being marked offline / unplugged, or if - * memory-only "hotplug" node is offline. - */ - if (nid == NUMA_NO_NODE || !node_online(nid)) - return; - - mutex_lock(&target_lock); - if (!target->registered) { - hmat_register_target_initiators(target); - hmat_register_target_cache(target); - hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL); - hmat_register_target_perf(target, ACCESS_COORDINATE_CPU); - target->registered = true; - } - mutex_unlock(&target_lock); + hmat_hotplug_target(target); } static void hmat_register_targets(void) @@ -939,7 +943,7 @@ static int hmat_callback(struct notifier_block *self, if (!target) return NOTIFY_OK; - hmat_register_target(target); + hmat_hotplug_target(target); return NOTIFY_OK; } diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 53816dfab645..aa87ee1583a4 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -237,7 +237,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) struct acpi_srat_generic_affinity *p = (struct acpi_srat_generic_affinity *)header; - if (p->device_handle_type == 0) { + if (p->device_handle_type == 1) { /* * For pci devices this may be the only place they * are assigned a proximity domain diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 1b997a5497e7..43d5e457814e 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1107,7 +1107,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, size_t num_args, struct fwnode_reference_args *args) { - return acpi_fwnode_get_reference_args(fwnode, propname, NULL, index, num_args, args); + return acpi_fwnode_get_reference_args(fwnode, propname, NULL, num_args, index, args); } EXPORT_SYMBOL_GPL(__acpi_node_get_property_reference); diff --git a/drivers/acpi/riscv/rimt.c b/drivers/acpi/riscv/rimt.c index 683fcfe35c31..7f423405e5ef 100644 --- a/drivers/acpi/riscv/rimt.c +++ b/drivers/acpi/riscv/rimt.c @@ -61,30 +61,6 @@ static int rimt_set_fwnode(struct acpi_rimt_node *rimt_node, return 0; } -/** - * rimt_get_fwnode() - Retrieve fwnode associated with an RIMT node - * - * @node: RIMT table node to be looked-up - * - * Returns: fwnode_handle pointer on success, NULL on failure - */ -static struct fwnode_handle *rimt_get_fwnode(struct acpi_rimt_node *node) -{ - struct fwnode_handle *fwnode = NULL; - struct rimt_fwnode *curr; - - spin_lock(&rimt_fwnode_lock); - list_for_each_entry(curr, &rimt_fwnode_list, list) { - if (curr->rimt_node == node) { - fwnode = curr->fwnode; - break; - } - } - spin_unlock(&rimt_fwnode_lock); - - return fwnode; -} - static acpi_status rimt_match_node_callback(struct acpi_rimt_node *node, void *context) { @@ -202,6 +178,67 @@ static struct acpi_rimt_node *rimt_scan_node(enum acpi_rimt_node_type type, return NULL; } +/* + * RISC-V supports IOMMU as a PCI device or a platform device. + * When it is a platform device, there should be a namespace device as + * well along with RIMT. To create the link between RIMT information and + * the platform device, the IOMMU driver should register itself with the + * RIMT module. This is true for PCI based IOMMU as well. + */ +int rimt_iommu_register(struct device *dev) +{ + struct fwnode_handle *rimt_fwnode; + struct acpi_rimt_node *node; + + node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_IOMMU, dev); + if (!node) { + pr_err("Could not find IOMMU node in RIMT\n"); + return -ENODEV; + } + + if (dev_is_pci(dev)) { + rimt_fwnode = acpi_alloc_fwnode_static(); + if (!rimt_fwnode) + return -ENOMEM; + + rimt_fwnode->dev = dev; + if (!dev->fwnode) + dev->fwnode = rimt_fwnode; + + rimt_set_fwnode(node, rimt_fwnode); + } else { + rimt_set_fwnode(node, dev->fwnode); + } + + return 0; +} + +#ifdef CONFIG_IOMMU_API + +/** + * rimt_get_fwnode() - Retrieve fwnode associated with an RIMT node + * + * @node: RIMT table node to be looked-up + * + * Returns: fwnode_handle pointer on success, NULL on failure + */ +static struct fwnode_handle *rimt_get_fwnode(struct acpi_rimt_node *node) +{ + struct fwnode_handle *fwnode = NULL; + struct rimt_fwnode *curr; + + spin_lock(&rimt_fwnode_lock); + list_for_each_entry(curr, &rimt_fwnode_list, list) { + if (curr->rimt_node == node) { + fwnode = curr->fwnode; + break; + } + } + spin_unlock(&rimt_fwnode_lock); + + return fwnode; +} + static bool rimt_pcie_rc_supports_ats(struct acpi_rimt_node *node) { struct acpi_rimt_pcie_rc *pci_rc; @@ -290,43 +327,6 @@ static struct acpi_rimt_node *rimt_node_get_id(struct acpi_rimt_node *node, return NULL; } -/* - * RISC-V supports IOMMU as a PCI device or a platform device. - * When it is a platform device, there should be a namespace device as - * well along with RIMT. To create the link between RIMT information and - * the platform device, the IOMMU driver should register itself with the - * RIMT module. This is true for PCI based IOMMU as well. - */ -int rimt_iommu_register(struct device *dev) -{ - struct fwnode_handle *rimt_fwnode; - struct acpi_rimt_node *node; - - node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_IOMMU, dev); - if (!node) { - pr_err("Could not find IOMMU node in RIMT\n"); - return -ENODEV; - } - - if (dev_is_pci(dev)) { - rimt_fwnode = acpi_alloc_fwnode_static(); - if (!rimt_fwnode) - return -ENOMEM; - - rimt_fwnode->dev = dev; - if (!dev->fwnode) - dev->fwnode = rimt_fwnode; - - rimt_set_fwnode(node, rimt_fwnode); - } else { - rimt_set_fwnode(node, dev->fwnode); - } - - return 0; -} - -#ifdef CONFIG_IOMMU_API - static struct acpi_rimt_node *rimt_node_map_id(struct acpi_rimt_node *node, u32 id_in, u32 *id_out, u8 type_mask) diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index a3f95a3fffde..d3edc3bcbf01 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -487,7 +487,7 @@ static int acpi_battery_read(struct acpi_battery *battery) if (result) return result; - battery->present = state & (1 << battery->id); + battery->present = !!(state & (1 << battery->id)); if (!battery->present) return 0; diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c index d4d52d5e9016..73cb933fdc89 100644 --- a/drivers/acpi/spcr.c +++ b/drivers/acpi/spcr.c @@ -155,7 +155,7 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console) * Baud Rate field. If this field is zero or not present, Configured * Baud Rate is used. */ - if (table->precise_baudrate) + if (table->header.revision >= 4 && table->precise_baudrate) baud_rate = table->precise_baudrate; else switch (table->baud_rate) { case 0: diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8c99ceaa303b..a3a1b5c33ba3 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -851,17 +851,8 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, } else { if (!internal) node->local_weak_refs++; - if (!node->has_weak_ref && list_empty(&node->work.entry)) { - if (target_list == NULL) { - pr_err("invalid inc weak node for %d\n", - node->debug_id); - return -EINVAL; - } - /* - * See comment above - */ + if (!node->has_weak_ref && target_list && list_empty(&node->work.entry)) binder_enqueue_work_ilocked(&node->work, target_list); - } } return 0; } @@ -2418,10 +2409,10 @@ err_fd_not_accepted: /** * struct binder_ptr_fixup - data to be fixed-up in target buffer - * @offset offset in target buffer to fixup - * @skip_size bytes to skip in copy (fixup will be written later) - * @fixup_data data to write at fixup offset - * @node list node + * @offset: offset in target buffer to fixup + * @skip_size: bytes to skip in copy (fixup will be written later) + * @fixup_data: data to write at fixup offset + * @node: list node * * This is used for the pointer fixup list (pf) which is created and consumed * during binder_transaction() and is only accessed locally. No @@ -2438,10 +2429,10 @@ struct binder_ptr_fixup { /** * struct binder_sg_copy - scatter-gather data to be copied - * @offset offset in target buffer - * @sender_uaddr user address in source buffer - * @length bytes to copy - * @node list node + * @offset: offset in target buffer + * @sender_uaddr: user address in source buffer + * @length: bytes to copy + * @node: list node * * This is used for the sg copy list (sgc) which is created and consumed * during binder_transaction() and is only accessed locally. No @@ -4063,14 +4054,15 @@ binder_freeze_notification_done(struct binder_proc *proc, /** * binder_free_buf() - free the specified buffer - * @proc: binder proc that owns buffer - * @buffer: buffer to be freed - * @is_failure: failed to send transaction + * @proc: binder proc that owns buffer + * @thread: binder thread performing the buffer release + * @buffer: buffer to be freed + * @is_failure: failed to send transaction * - * If buffer for an async transaction, enqueue the next async + * If the buffer is for an async transaction, enqueue the next async * transaction from the node. * - * Cleanup buffer and free it. + * Cleanup the buffer and free it. */ static void binder_free_buf(struct binder_proc *proc, diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs index e68c3c8bc55a..220de35ae85a 100644 --- a/drivers/android/binder/freeze.rs +++ b/drivers/android/binder/freeze.rs @@ -106,13 +106,22 @@ impl DeliverToRead for FreezeMessage { return Ok(true); } if freeze.is_clearing { - _removed_listener = freeze_entry.remove_node(); + kernel::warn_on!(freeze.num_cleared_duplicates != 0); + if freeze.num_pending_duplicates > 0 { + // The primary freeze listener was deleted, so convert a pending duplicate back + // into the primary one. + freeze.num_pending_duplicates -= 1; + freeze.is_pending = true; + freeze.is_clearing = true; + } else { + _removed_listener = freeze_entry.remove_node(); + } drop(node_refs); writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?; writer.write_payload(&self.cookie.0)?; Ok(true) } else { - let is_frozen = freeze.node.owner.inner.lock().is_frozen; + let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen(); if freeze.last_is_frozen == Some(is_frozen) { return Ok(true); } @@ -245,8 +254,9 @@ impl Process { ); return Err(EINVAL); } - if freeze.is_clearing { - // Immediately send another FreezeMessage for BR_CLEAR_FREEZE_NOTIFICATION_DONE. + let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen(); + if freeze.is_clearing || freeze.last_is_frozen != Some(is_frozen) { + // Immediately send another FreezeMessage. clear_msg = Some(FreezeMessage::init(alloc, cookie)); } freeze.is_pending = false; diff --git a/drivers/android/binder/node.rs b/drivers/android/binder/node.rs index ade895ef791e..08d362deaf61 100644 --- a/drivers/android/binder/node.rs +++ b/drivers/android/binder/node.rs @@ -687,7 +687,7 @@ impl Node { ); } if inner.freeze_list.is_empty() { - _unused_capacity = mem::replace(&mut inner.freeze_list, KVVec::new()); + _unused_capacity = mem::take(&mut inner.freeze_list); } } diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index f13a747e784c..7607353a5e92 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs @@ -72,6 +72,33 @@ impl Mapping { const PROC_DEFER_FLUSH: u8 = 1; const PROC_DEFER_RELEASE: u8 = 2; +#[derive(Copy, Clone)] +pub(crate) enum IsFrozen { + Yes, + No, + InProgress, +} + +impl IsFrozen { + /// Whether incoming transactions should be rejected due to freeze. + pub(crate) fn is_frozen(self) -> bool { + match self { + IsFrozen::Yes => true, + IsFrozen::No => false, + IsFrozen::InProgress => true, + } + } + + /// Whether freeze notifications consider this process frozen. + pub(crate) fn is_fully_frozen(self) -> bool { + match self { + IsFrozen::Yes => true, + IsFrozen::No => false, + IsFrozen::InProgress => false, + } + } +} + /// The fields of `Process` protected by the spinlock. pub(crate) struct ProcessInner { is_manager: bool, @@ -98,7 +125,7 @@ pub(crate) struct ProcessInner { /// are woken up. outstanding_txns: u32, /// Process is frozen and unable to service binder transactions. - pub(crate) is_frozen: bool, + pub(crate) is_frozen: IsFrozen, /// Process received sync transactions since last frozen. pub(crate) sync_recv: bool, /// Process received async transactions since last frozen. @@ -124,7 +151,7 @@ impl ProcessInner { started_thread_count: 0, defer_work: 0, outstanding_txns: 0, - is_frozen: false, + is_frozen: IsFrozen::No, sync_recv: false, async_recv: false, binderfs_file: None, @@ -1260,7 +1287,7 @@ impl Process { let is_manager = { let mut inner = self.inner.lock(); inner.is_dead = true; - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; inner.sync_recv = false; inner.async_recv = false; inner.is_manager @@ -1346,10 +1373,6 @@ impl Process { .alloc .take_for_each(|offset, size, debug_id, odata| { let ptr = offset + address; - pr_warn!( - "{}: removing orphan mapping {offset}:{size}\n", - self.pid_in_current_ns() - ); let mut alloc = Allocation::new(self.clone(), debug_id, offset, size, ptr, false); if let Some(data) = odata { @@ -1371,7 +1394,7 @@ impl Process { return; } inner.outstanding_txns -= 1; - inner.is_frozen && inner.outstanding_txns == 0 + inner.is_frozen.is_frozen() && inner.outstanding_txns == 0 }; if wake { @@ -1385,7 +1408,7 @@ impl Process { let mut inner = self.inner.lock(); inner.sync_recv = false; inner.async_recv = false; - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; drop(inner); msgs.send_messages(); return Ok(()); @@ -1394,7 +1417,7 @@ impl Process { let mut inner = self.inner.lock(); inner.sync_recv = false; inner.async_recv = false; - inner.is_frozen = true; + inner.is_frozen = IsFrozen::InProgress; if info.timeout_ms > 0 { let mut jiffies = kernel::time::msecs_to_jiffies(info.timeout_ms); @@ -1408,7 +1431,7 @@ impl Process { .wait_interruptible_timeout(&mut inner, jiffies) { CondVarTimeoutResult::Signal { .. } => { - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; return Err(ERESTARTSYS); } CondVarTimeoutResult::Woken { jiffies: remaining } => { @@ -1422,17 +1445,18 @@ impl Process { } if inner.txns_pending_locked() { - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; Err(EAGAIN) } else { drop(inner); match self.prepare_freeze_messages() { Ok(batch) => { + self.inner.lock().is_frozen = IsFrozen::Yes; batch.send_messages(); Ok(()) } Err(kernel::alloc::AllocError) => { - self.inner.lock().is_frozen = false; + self.inner.lock().is_frozen = IsFrozen::No; Err(ENOMEM) } } diff --git a/drivers/android/binder/transaction.rs b/drivers/android/binder/transaction.rs index 02512175d622..4bd3c0e417eb 100644 --- a/drivers/android/binder/transaction.rs +++ b/drivers/android/binder/transaction.rs @@ -249,7 +249,7 @@ impl Transaction { if oneway { if let Some(target_node) = self.target_node.clone() { - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { process_inner.async_recv = true; if self.flags & TF_UPDATE_TXN != 0 { if let Some(t_outdated) = @@ -270,7 +270,7 @@ impl Transaction { } } - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { return Err(BinderError::new_frozen_oneway()); } else { return Ok(()); @@ -280,7 +280,7 @@ impl Transaction { } } - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { process_inner.sync_recv = true; return Err(BinderError::new_frozen()); } diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1037169abb45..e1eff05bea4a 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -292,7 +292,7 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) * frequency (by keeping the initial capacity_freq_ref value). */ cpu_clk = of_clk_get(cpu_node, 0); - if (!PTR_ERR_OR_ZERO(cpu_clk)) { + if (!IS_ERR_OR_NULL(cpu_clk)) { per_cpu(capacity_freq_ref, cpu) = clk_get_rate(cpu_clk) / HZ_PER_KHZ; clk_put(cpu_clk); diff --git a/drivers/base/core.c b/drivers/base/core.c index 3c533dab8fa5..f69dc9c85954 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1784,7 +1784,7 @@ static int fw_devlink_dev_sync_state(struct device *dev, void *data) return 0; if (fw_devlink_sync_state == FW_DEVLINK_SYNC_STATE_STRICT) { - dev_warn(sup, "sync_state() pending due to %s\n", + dev_info(sup, "sync_state() pending due to %s\n", dev_name(link->consumer)); return 0; } diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 37faf6156d7c..55bdc7f5e59d 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -23,50 +23,46 @@ struct devcd_entry { void *data; size_t datalen; /* - * Here, mutex is required to serialize the calls to del_wk work between - * user/kernel space which happens when devcd is added with device_add() - * and that sends uevent to user space. User space reads the uevents, - * and calls to devcd_data_write() which try to modify the work which is - * not even initialized/queued from devcoredump. + * There are 2 races for which mutex is required. * + * The first race is between device creation and userspace writing to + * schedule immediately destruction. * + * This race is handled by arming the timer before device creation, but + * when device creation fails the timer still exists. * - * cpu0(X) cpu1(Y) + * To solve this, hold the mutex during device_add(), and set + * init_completed on success before releasing the mutex. * - * dev_coredump() uevent sent to user space - * device_add() ======================> user space process Y reads the - * uevents writes to devcd fd - * which results into writes to + * That way the timer will never fire until device_add() is called, + * it will do nothing if init_completed is not set. The timer is also + * cancelled in that case. * - * devcd_data_write() - * mod_delayed_work() - * try_to_grab_pending() - * timer_delete() - * debug_assert_init() - * INIT_DELAYED_WORK() - * schedule_delayed_work() - * - * - * Also, mutex alone would not be enough to avoid scheduling of - * del_wk work after it get flush from a call to devcd_free() - * mentioned as below. - * - * disabled_store() - * devcd_free() - * mutex_lock() devcd_data_write() - * flush_delayed_work() - * mutex_unlock() - * mutex_lock() - * mod_delayed_work() - * mutex_unlock() - * So, delete_work flag is required. + * The second race involves multiple parallel invocations of devcd_free(), + * add a deleted flag so only 1 can call the destructor. */ struct mutex mutex; - bool delete_work; + bool init_completed, deleted; struct module *owner; ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen); void (*free)(void *data); + /* + * If nothing interferes and device_add() was returns success, + * del_wk will destroy the device after the timer fires. + * + * Multiple userspace processes can interfere in the working of the timer: + * - Writing to the coredump will reschedule the timer to run immediately, + * if still armed. + * + * This is handled by using "if (cancel_delayed_work()) { + * schedule_delayed_work() }", to prevent re-arming after having + * been previously fired. + * - Writing to /sys/class/devcoredump/disabled will destroy the + * coredump synchronously. + * This is handled by using disable_delayed_work_sync(), and then + * checking if deleted flag is set with &devcd->mutex held. + */ struct delayed_work del_wk; struct device *failing_dev; }; @@ -95,14 +91,27 @@ static void devcd_dev_release(struct device *dev) kfree(devcd); } +static void __devcd_del(struct devcd_entry *devcd) +{ + devcd->deleted = true; + device_del(&devcd->devcd_dev); + put_device(&devcd->devcd_dev); +} + static void devcd_del(struct work_struct *wk) { struct devcd_entry *devcd; + bool init_completed; devcd = container_of(wk, struct devcd_entry, del_wk.work); - device_del(&devcd->devcd_dev); - put_device(&devcd->devcd_dev); + /* devcd->mutex serializes against dev_coredumpm_timeout */ + mutex_lock(&devcd->mutex); + init_completed = devcd->init_completed; + mutex_unlock(&devcd->mutex); + + if (init_completed) + __devcd_del(devcd); } static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, @@ -122,12 +131,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct devcd_entry *devcd = dev_to_devcd(dev); - mutex_lock(&devcd->mutex); - if (!devcd->delete_work) { - devcd->delete_work = true; - mod_delayed_work(system_wq, &devcd->del_wk, 0); - } - mutex_unlock(&devcd->mutex); + /* + * Although it's tempting to use mod_delayed work here, + * that will cause a reschedule if the timer already fired. + */ + if (cancel_delayed_work(&devcd->del_wk)) + schedule_delayed_work(&devcd->del_wk, 0); return count; } @@ -151,11 +160,21 @@ static int devcd_free(struct device *dev, void *data) { struct devcd_entry *devcd = dev_to_devcd(dev); + /* + * To prevent a race with devcd_data_write(), disable work and + * complete manually instead. + * + * We cannot rely on the return value of + * disable_delayed_work_sync() here, because it might be in the + * middle of a cancel_delayed_work + schedule_delayed_work pair. + * + * devcd->mutex here guards against multiple parallel invocations + * of devcd_free(). + */ + disable_delayed_work_sync(&devcd->del_wk); mutex_lock(&devcd->mutex); - if (!devcd->delete_work) - devcd->delete_work = true; - - flush_delayed_work(&devcd->del_wk); + if (!devcd->deleted) + __devcd_del(devcd); mutex_unlock(&devcd->mutex); return 0; } @@ -179,12 +198,10 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri * put_device() <- last reference * error = fn(dev, data) devcd_dev_release() * devcd_free(dev, data) kfree(devcd) - * mutex_lock(&devcd->mutex); * * * In the above diagram, it looks like disabled_store() would be racing with parallelly - * running devcd_del() and result in memory abort while acquiring devcd->mutex which - * is called after kfree of devcd memory after dropping its last reference with + * running devcd_del() and result in memory abort after dropping its last reference with * put_device(). However, this will not happens as fn(dev, data) runs * with its own reference to device via klist_node so it is not its last reference. * so, above situation would not occur. @@ -374,7 +391,7 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, devcd->read = read; devcd->free = free; devcd->failing_dev = get_device(dev); - devcd->delete_work = false; + devcd->deleted = false; mutex_init(&devcd->mutex); device_initialize(&devcd->devcd_dev); @@ -383,8 +400,14 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, atomic_inc_return(&devcd_count)); devcd->devcd_dev.class = &devcd_class; - mutex_lock(&devcd->mutex); dev_set_uevent_suppress(&devcd->devcd_dev, true); + + /* devcd->mutex prevents devcd_del() completing until init finishes */ + mutex_lock(&devcd->mutex); + devcd->init_completed = false; + INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); + schedule_delayed_work(&devcd->del_wk, timeout); + if (device_add(&devcd->devcd_dev)) goto put_device; @@ -401,13 +424,20 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, dev_set_uevent_suppress(&devcd->devcd_dev, false); kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); - INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); - schedule_delayed_work(&devcd->del_wk, timeout); + + /* + * Safe to run devcd_del() now that we are done with devcd_dev. + * Alternatively we could have taken a ref on devcd_dev before + * dropping the lock. + */ + devcd->init_completed = true; mutex_unlock(&devcd->mutex); return; put_device: - put_device(&devcd->devcd_dev); mutex_unlock(&devcd->mutex); + cancel_delayed_work_sync(&devcd->del_wk); + put_device(&devcd->devcd_dev); + put_module: module_put(owner); free: diff --git a/drivers/base/regmap/regmap-slimbus.c b/drivers/base/regmap/regmap-slimbus.c index 54eb7d227cf4..e523fae73004 100644 --- a/drivers/base/regmap/regmap-slimbus.c +++ b/drivers/base/regmap/regmap-slimbus.c @@ -48,8 +48,7 @@ struct regmap *__regmap_init_slimbus(struct slim_device *slimbus, if (IS_ERR(bus)) return ERR_CAST(bus); - return __regmap_init(&slimbus->dev, bus, &slimbus->dev, config, - lock_key, lock_name); + return __regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__regmap_init_slimbus); @@ -63,8 +62,7 @@ struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus, if (IS_ERR(bus)) return ERR_CAST(bus); - return __devm_regmap_init(&slimbus->dev, bus, &slimbus, config, - lock_key, lock_name); + return __devm_regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__devm_regmap_init_slimbus); diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 6ecfc821cf83..72f045e6ed51 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -294,6 +294,8 @@ static int bcma_register_devices(struct bcma_bus *bus) int err; list_for_each_entry(core, &bus->cores, list) { + struct device_node *np; + /* We support that core ourselves */ switch (core->id.id) { case BCMA_CORE_4706_CHIPCOMMON: @@ -311,6 +313,10 @@ static int bcma_register_devices(struct bcma_bus *bus) if (bcma_is_core_needed_early(core->id.id)) continue; + np = core->dev.of_node; + if (np && !of_device_is_available(np)) + continue; + /* Only first GMAC core on BCM4706 is connected and working */ if (core->id.id == BCMA_CORE_4706_MAC_GBIT && core->core_unit > 0) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1188f32a5e5e..a853c65ac65d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -52,6 +52,7 @@ static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); static struct workqueue_struct *nbd_del_wq; +static struct cred *nbd_cred; static int nbd_total_devices = 0; struct nbd_sock { @@ -554,6 +555,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, int result; struct msghdr msg = {} ; unsigned int noreclaim_flag; + const struct cred *old_cred; if (unlikely(!sock)) { dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -562,6 +564,8 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, return -EINVAL; } + old_cred = override_creds(nbd_cred); + msg.msg_iter = *iter; noreclaim_flag = memalloc_noreclaim_save(); @@ -586,6 +590,8 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, memalloc_noreclaim_restore(noreclaim_flag); + revert_creds(old_cred); + return result; } @@ -2677,7 +2683,15 @@ static int __init nbd_init(void) return -ENOMEM; } + nbd_cred = prepare_kernel_cred(&init_task); + if (!nbd_cred) { + destroy_workqueue(nbd_del_wq); + unregister_blkdev(NBD_MAJOR, "nbd"); + return -ENOMEM; + } + if (genl_register_family(&nbd_genl_family)) { + put_cred(nbd_cred); destroy_workqueue(nbd_del_wq); unregister_blkdev(NBD_MAJOR, "nbd"); return -EINVAL; @@ -2732,6 +2746,7 @@ static void __exit nbd_cleanup(void) /* Also wait for nbd_dev_remove_work() completes */ destroy_workqueue(nbd_del_wq); + put_cred(nbd_cred); idr_destroy(&nbd_index_idr); unregister_blkdev(NBD_MAJOR, "nbd"); } diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index f982027e8c85..0ee55f889cfd 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1949,6 +1949,7 @@ static int null_add_dev(struct nullb_device *dev) .logical_block_size = dev->blocksize, .physical_block_size = dev->blocksize, .max_hw_sectors = dev->max_sectors, + .dma_alignment = dev->blocksize - 1, }; struct nullb *nullb; diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index b7ba667a3d09..e305d04aac9d 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -41,6 +41,7 @@ struct bpa10x_data { struct usb_anchor rx_anchor; struct sk_buff *rx_skb[2]; + struct hci_uart hu; }; static void bpa10x_tx_complete(struct urb *urb) @@ -96,7 +97,7 @@ static void bpa10x_rx_complete(struct urb *urb) if (urb->status == 0) { bool idx = usb_pipebulk(urb->pipe); - data->rx_skb[idx] = h4_recv_buf(hdev, data->rx_skb[idx], + data->rx_skb[idx] = h4_recv_buf(&data->hu, data->rx_skb[idx], urb->transfer_buffer, urb->actual_length, bpa10x_recv_pkts, @@ -388,6 +389,7 @@ static int bpa10x_probe(struct usb_interface *intf, hci_set_drvdata(hdev, data); data->hdev = hdev; + data->hu.hdev = hdev; SET_HCIDEV_DEV(hdev, &intf->dev); diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 6d3963bd56a9..a075d8ec4677 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1467,11 +1467,6 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP1) btintel_pcie_msix_gp1_handler(data); - /* This interrupt is triggered by the firmware after updating - * boot_stage register and image_response register - */ - if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP0) - btintel_pcie_msix_gp0_handler(data); /* For TX */ if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) { @@ -1487,6 +1482,12 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) btintel_pcie_msix_tx_handle(data); } + /* This interrupt is triggered by the firmware after updating + * boot_stage register and image_response register + */ + if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP0) + btintel_pcie_msix_gp0_handler(data); + /* * Before sending the interrupt the HW disables it to prevent a nested * interrupt. This is done by writing 1 to the corresponding bit in diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 50abefba6d04..62db31bd6592 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1270,6 +1270,12 @@ static void btmtksdio_reset(struct hci_dev *hdev) sdio_claim_host(bdev->func); + /* set drv_pmctrl if BT is closed before doing reset */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) { + sdio_enable_func(bdev->func); + btmtksdio_drv_pmctrl(bdev); + } + sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, NULL); skb_queue_purge(&bdev->txq); cancel_work_sync(&bdev->txrx_work); @@ -1285,6 +1291,12 @@ static void btmtksdio_reset(struct hci_dev *hdev) goto err; } + /* set fw_pmctrl back if BT is closed after doing reset */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) { + btmtksdio_fw_pmctrl(bdev); + sdio_disable_func(bdev->func); + } + clear_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); err: sdio_release_host(bdev->func); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index d9b90ea2ad38..27aa48ff3ac2 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -79,6 +79,7 @@ struct btmtkuart_dev { u16 stp_dlen; const struct btmtkuart_data *data; + struct hci_uart hu; }; #define btmtkuart_is_standalone(bdev) \ @@ -368,7 +369,7 @@ static void btmtkuart_recv(struct hci_dev *hdev, const u8 *data, size_t count) sz_left -= adv; p_left += adv; - bdev->rx_skb = h4_recv_buf(bdev->hdev, bdev->rx_skb, p_h4, + bdev->rx_skb = h4_recv_buf(&bdev->hu, bdev->rx_skb, p_h4, sz_h4, mtk_recv_pkts, ARRAY_SIZE(mtk_recv_pkts)); if (IS_ERR(bdev->rx_skb)) { @@ -858,6 +859,7 @@ static int btmtkuart_probe(struct serdev_device *serdev) } bdev->hdev = hdev; + bdev->hu.hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, bdev); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index d5153fed0518..3b1e9224e965 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -212,6 +212,7 @@ struct btnxpuart_dev { struct ps_data psdata; struct btnxpuart_data *nxp_data; struct reset_control *pdn; + struct hci_uart hu; }; #define NXP_V1_FW_REQ_PKT 0xa5 @@ -1756,7 +1757,7 @@ static size_t btnxpuart_receive_buf(struct serdev_device *serdev, ps_start_timer(nxpdev); - nxpdev->rx_skb = h4_recv_buf(nxpdev->hdev, nxpdev->rx_skb, data, count, + nxpdev->rx_skb = h4_recv_buf(&nxpdev->hu, nxpdev->rx_skb, data, count, nxp_recv_pkts, ARRAY_SIZE(nxp_recv_pkts)); if (IS_ERR(nxpdev->rx_skb)) { int err = PTR_ERR(nxpdev->rx_skb); @@ -1875,6 +1876,7 @@ static int nxp_serdev_probe(struct serdev_device *serdev) reset_control_deassert(nxpdev->pdn); nxpdev->hdev = hdev; + nxpdev->hu.hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, nxpdev); diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 6abd962502e3..52794db2739b 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -50,7 +50,7 @@ #define RTL_CHIP_SUBVER (&(struct rtl_vendor_cmd) {{0x10, 0x38, 0x04, 0x28, 0x80}}) #define RTL_CHIP_REV (&(struct rtl_vendor_cmd) {{0x10, 0x3A, 0x04, 0x28, 0x80}}) -#define RTL_SEC_PROJ (&(struct rtl_vendor_cmd) {{0x10, 0xA4, 0x0D, 0x00, 0xb0}}) +#define RTL_SEC_PROJ (&(struct rtl_vendor_cmd) {{0x10, 0xA4, 0xAD, 0x00, 0xb0}}) #define RTL_PATCH_SNIPPETS 0x01 #define RTL_PATCH_DUMMY_HEADER 0x02 @@ -534,7 +534,6 @@ static int rtlbt_parse_firmware_v2(struct hci_dev *hdev, { struct rtl_epatch_header_v2 *hdr; int rc; - u8 reg_val[2]; u8 key_id; u32 num_sections; struct rtl_section *section; @@ -549,14 +548,7 @@ static int rtlbt_parse_firmware_v2(struct hci_dev *hdev, .len = btrtl_dev->fw_len - 7, /* Cut the tail */ }; - rc = btrtl_vendor_read_reg16(hdev, RTL_SEC_PROJ, reg_val); - if (rc < 0) - return -EIO; - key_id = reg_val[0]; - - rtl_dev_dbg(hdev, "%s: key id %u", __func__, key_id); - - btrtl_dev->key_id = key_id; + key_id = btrtl_dev->key_id; hdr = rtl_iov_pull_data(&iov, sizeof(*hdr)); if (!hdr) @@ -625,8 +617,10 @@ static int rtlbt_parse_firmware_v2(struct hci_dev *hdev, len += entry->len; } - if (!len) + if (!len) { + kvfree(ptr); return -EPERM; + } *_buf = ptr; return len; @@ -1068,6 +1062,8 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev, u16 hci_rev, lmp_subver; u8 hci_ver, lmp_ver, chip_type = 0; int ret; + int rc; + u8 key_id; u8 reg_val[2]; btrtl_dev = kzalloc(sizeof(*btrtl_dev), GFP_KERNEL); @@ -1178,6 +1174,14 @@ next: goto err_free; } + rc = btrtl_vendor_read_reg16(hdev, RTL_SEC_PROJ, reg_val); + if (rc < 0) + goto err_free; + + key_id = reg_val[0]; + btrtl_dev->key_id = key_id; + rtl_dev_info(hdev, "%s: key id %u", __func__, key_id); + btrtl_dev->fw_len = -EIO; if (lmp_subver == RTL_ROM_LMP_8852A && hci_rev == 0x000c) { snprintf(fw_name, sizeof(fw_name), "%s_v2.bin", @@ -1200,7 +1204,7 @@ next: goto err_free; } - if (btrtl_dev->ic_info->cfg_name) { + if (btrtl_dev->ic_info->cfg_name && !btrtl_dev->key_id) { if (postfix) { snprintf(cfg_name, sizeof(cfg_name), "%s-%s.bin", btrtl_dev->ic_info->cfg_name, postfix); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5e9ebf0c5312..a722446ec73d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -4361,6 +4361,11 @@ static void btusb_disconnect(struct usb_interface *intf) hci_unregister_dev(hdev); + if (data->oob_wake_irq) + device_init_wakeup(&data->udev->dev, false); + if (data->reset_gpio) + gpiod_put(data->reset_gpio); + if (intf == data->intf) { if (data->isoc) usb_driver_release_interface(&btusb_driver, data->isoc); @@ -4371,17 +4376,11 @@ static void btusb_disconnect(struct usb_interface *intf) usb_driver_release_interface(&btusb_driver, data->diag); usb_driver_release_interface(&btusb_driver, data->intf); } else if (intf == data->diag) { - usb_driver_release_interface(&btusb_driver, data->intf); if (data->isoc) usb_driver_release_interface(&btusb_driver, data->isoc); + usb_driver_release_interface(&btusb_driver, data->intf); } - if (data->oob_wake_irq) - device_init_wakeup(&data->udev->dev, false); - - if (data->reset_gpio) - gpiod_put(data->reset_gpio); - hci_free_dev(hdev); } diff --git a/drivers/bluetooth/hci_ag6xx.c b/drivers/bluetooth/hci_ag6xx.c index 2d40302409ff..94588676510f 100644 --- a/drivers/bluetooth/hci_ag6xx.c +++ b/drivers/bluetooth/hci_ag6xx.c @@ -105,7 +105,7 @@ static int ag6xx_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - ag6xx->rx_skb = h4_recv_buf(hu->hdev, ag6xx->rx_skb, data, count, + ag6xx->rx_skb = h4_recv_buf(hu, ag6xx->rx_skb, data, count, ag6xx_recv_pkts, ARRAY_SIZE(ag6xx_recv_pkts)); if (IS_ERR(ag6xx->rx_skb)) { diff --git a/drivers/bluetooth/hci_aml.c b/drivers/bluetooth/hci_aml.c index 707e90f80130..b1f32c5a8a3f 100644 --- a/drivers/bluetooth/hci_aml.c +++ b/drivers/bluetooth/hci_aml.c @@ -650,7 +650,7 @@ static int aml_recv(struct hci_uart *hu, const void *data, int count) struct aml_data *aml_data = hu->priv; int err; - aml_data->rx_skb = h4_recv_buf(hu->hdev, aml_data->rx_skb, data, count, + aml_data->rx_skb = h4_recv_buf(hu, aml_data->rx_skb, data, count, aml_recv_pkts, ARRAY_SIZE(aml_recv_pkts)); if (IS_ERR(aml_data->rx_skb)) { diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index dbfe34664633..8d2b5e7f0d6a 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -191,7 +191,7 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count) { struct ath_struct *ath = hu->priv; - ath->rx_skb = h4_recv_buf(hu->hdev, ath->rx_skb, data, count, + ath->rx_skb = h4_recv_buf(hu, ath->rx_skb, data, count, ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts)); if (IS_ERR(ath->rx_skb)) { int err = PTR_ERR(ath->rx_skb); diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index f96617b85d87..fff845ed44e3 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -698,7 +698,7 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - bcm->rx_skb = h4_recv_buf(hu->hdev, bcm->rx_skb, data, count, + bcm->rx_skb = h4_recv_buf(hu, bcm->rx_skb, data, count, bcm_recv_pkts, ARRAY_SIZE(bcm_recv_pkts)); if (IS_ERR(bcm->rx_skb)) { int err = PTR_ERR(bcm->rx_skb); diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 9070e31a68bf..ec017df8572c 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -112,7 +112,7 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - h4->rx_skb = h4_recv_buf(hu->hdev, h4->rx_skb, data, count, + h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { int err = PTR_ERR(h4->rx_skb); @@ -151,12 +151,12 @@ int __exit h4_deinit(void) return hci_uart_unregister_proto(&h4p); } -struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, +struct sk_buff *h4_recv_buf(struct hci_uart *hu, struct sk_buff *skb, const unsigned char *buffer, int count, const struct h4_recv_pkt *pkts, int pkts_count) { - struct hci_uart *hu = hci_get_drvdata(hdev); u8 alignment = hu->alignment ? hu->alignment : 1; + struct hci_dev *hdev = hu->hdev; /* Check for error from previous call */ if (IS_ERR(skb)) diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 9b353c3d6442..1d6e09508f1f 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -972,7 +972,7 @@ static int intel_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - intel->rx_skb = h4_recv_buf(hu->hdev, intel->rx_skb, data, count, + intel->rx_skb = h4_recv_buf(hu, intel->rx_skb, data, count, intel_recv_pkts, ARRAY_SIZE(intel_recv_pkts)); if (IS_ERR(intel->rx_skb)) { diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 7044c86325ce..6f4e25917b86 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -429,7 +429,7 @@ static int ll_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - ll->rx_skb = h4_recv_buf(hu->hdev, ll->rx_skb, data, count, + ll->rx_skb = h4_recv_buf(hu, ll->rx_skb, data, count, ll_recv_pkts, ARRAY_SIZE(ll_recv_pkts)); if (IS_ERR(ll->rx_skb)) { int err = PTR_ERR(ll->rx_skb); diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c index e08222395772..8767522ec4c6 100644 --- a/drivers/bluetooth/hci_mrvl.c +++ b/drivers/bluetooth/hci_mrvl.c @@ -264,9 +264,9 @@ static int mrvl_recv(struct hci_uart *hu, const void *data, int count) !test_bit(STATE_FW_LOADED, &mrvl->flags)) return count; - mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count, - mrvl_recv_pkts, - ARRAY_SIZE(mrvl_recv_pkts)); + mrvl->rx_skb = h4_recv_buf(hu, mrvl->rx_skb, data, count, + mrvl_recv_pkts, + ARRAY_SIZE(mrvl_recv_pkts)); if (IS_ERR(mrvl->rx_skb)) { int err = PTR_ERR(mrvl->rx_skb); bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c index cd7575c20f65..1e65b541f8ad 100644 --- a/drivers/bluetooth/hci_nokia.c +++ b/drivers/bluetooth/hci_nokia.c @@ -624,8 +624,8 @@ static int nokia_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - btdev->rx_skb = h4_recv_buf(hu->hdev, btdev->rx_skb, data, count, - nokia_recv_pkts, ARRAY_SIZE(nokia_recv_pkts)); + btdev->rx_skb = h4_recv_buf(hu, btdev->rx_skb, data, count, + nokia_recv_pkts, ARRAY_SIZE(nokia_recv_pkts)); if (IS_ERR(btdev->rx_skb)) { err = PTR_ERR(btdev->rx_skb); dev_err(dev, "Frame reassembly failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4cff4d9be313..888176b0faa9 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1277,7 +1277,7 @@ static int qca_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - qca->rx_skb = h4_recv_buf(hu->hdev, qca->rx_skb, data, count, + qca->rx_skb = h4_recv_buf(hu, qca->rx_skb, data, count, qca_recv_pkts, ARRAY_SIZE(qca_recv_pkts)); if (IS_ERR(qca->rx_skb)) { int err = PTR_ERR(qca->rx_skb); diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index cbbe79b241ce..48ac7ca9334e 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -162,7 +162,7 @@ struct h4_recv_pkt { int h4_init(void); int h4_deinit(void); -struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, +struct sk_buff *h4_recv_buf(struct hci_uart *hu, struct sk_buff *skb, const unsigned char *buffer, int count, const struct h4_recv_pkt *pkts, int pkts_count); #endif diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index ed97344f2324..c75a531cfb98 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -133,8 +133,7 @@ static inline bool tpm_crb_has_idle(u32 start_method) { return !(start_method == ACPI_TPM2_START_METHOD || start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD || - start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC || - start_method == ACPI_TPM2_CRB_WITH_ARM_FFA); + start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC); } static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value, @@ -191,7 +190,7 @@ static int crb_try_pluton_doorbell(struct crb_priv *priv, bool wait_for_complete * * Return: 0 always */ -static int __crb_go_idle(struct device *dev, struct crb_priv *priv) +static int __crb_go_idle(struct device *dev, struct crb_priv *priv, int loc) { int rc; @@ -200,6 +199,12 @@ static int __crb_go_idle(struct device *dev, struct crb_priv *priv) iowrite32(CRB_CTRL_REQ_GO_IDLE, &priv->regs_t->ctrl_req); + if (priv->sm == ACPI_TPM2_CRB_WITH_ARM_FFA) { + rc = tpm_crb_ffa_start(CRB_FFA_START_TYPE_COMMAND, loc); + if (rc) + return rc; + } + rc = crb_try_pluton_doorbell(priv, true); if (rc) return rc; @@ -220,7 +225,7 @@ static int crb_go_idle(struct tpm_chip *chip) struct device *dev = &chip->dev; struct crb_priv *priv = dev_get_drvdata(dev); - return __crb_go_idle(dev, priv); + return __crb_go_idle(dev, priv, chip->locality); } /** @@ -238,7 +243,7 @@ static int crb_go_idle(struct tpm_chip *chip) * * Return: 0 on success -ETIME on timeout; */ -static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) +static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv, int loc) { int rc; @@ -247,6 +252,12 @@ static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) iowrite32(CRB_CTRL_REQ_CMD_READY, &priv->regs_t->ctrl_req); + if (priv->sm == ACPI_TPM2_CRB_WITH_ARM_FFA) { + rc = tpm_crb_ffa_start(CRB_FFA_START_TYPE_COMMAND, loc); + if (rc) + return rc; + } + rc = crb_try_pluton_doorbell(priv, true); if (rc) return rc; @@ -267,7 +278,7 @@ static int crb_cmd_ready(struct tpm_chip *chip) struct device *dev = &chip->dev; struct crb_priv *priv = dev_get_drvdata(dev); - return __crb_cmd_ready(dev, priv); + return __crb_cmd_ready(dev, priv, chip->locality); } static int __crb_request_locality(struct device *dev, @@ -444,7 +455,7 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t bufsiz, size_t len) /* Seems to be necessary for every command */ if (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) - __crb_cmd_ready(&chip->dev, priv); + __crb_cmd_ready(&chip->dev, priv, chip->locality); memcpy_toio(priv->cmd, buf, len); @@ -672,7 +683,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, * PTT HW bug w/a: wake up the device to access * possibly not retained registers. */ - ret = __crb_cmd_ready(dev, priv); + ret = __crb_cmd_ready(dev, priv, 0); if (ret) goto out_relinquish_locality; @@ -744,7 +755,7 @@ out: if (!ret) priv->cmd_size = cmd_size; - __crb_go_idle(dev, priv); + __crb_go_idle(dev, priv, 0); out_relinquish_locality: diff --git a/drivers/comedi/comedi_buf.c b/drivers/comedi/comedi_buf.c index 002c0e76baff..c7c262a2d8ca 100644 --- a/drivers/comedi/comedi_buf.c +++ b/drivers/comedi/comedi_buf.c @@ -317,7 +317,7 @@ static unsigned int comedi_buf_munge(struct comedi_subdevice *s, unsigned int count = 0; const unsigned int num_sample_bytes = comedi_bytes_per_sample(s); - if (!s->munge || (async->cmd.flags & CMDF_RAWDATA)) { + if (!s->munge || (async->cmd.flags & CMDF_RAWDATA) || async->cmd.chanlist_len == 0) { async->munge_count += num_bytes; return num_bytes; } diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 298e92d8cc03..b44f0f7a5ba1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1614,7 +1614,11 @@ static int amd_pstate_cpu_offline(struct cpufreq_policy *policy) * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the * limits, epp and desired perf will get reset to the cached values in cpudata struct */ - return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); + return amd_pstate_update_perf(policy, perf.bios_min_perf, + FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), + false); } static int amd_pstate_suspend(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 38897bb14a2c..492a10f1bdbf 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -603,9 +603,6 @@ static bool turbo_is_disabled(void) { u64 misc_en; - if (!cpu_feature_enabled(X86_FEATURE_IDA)) - return true; - rdmsrq(MSR_IA32_MISC_ENABLE, misc_en); return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); @@ -2106,7 +2103,8 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled) && + cpu_feature_enabled(X86_FEATURE_IDA)) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -2271,7 +2269,8 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled) && + cpu_feature_enabled(X86_FEATURE_IDA)) val |= (u64)1 << 32; return val; diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index a360bc4d20b7..19be6475d356 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/platform_device.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> @@ -303,8 +304,8 @@ static int sbi_cpuidle_init_cpu(struct device *dev, int cpu) drv->states[0].exit_latency = 1; drv->states[0].target_residency = 1; drv->states[0].power_usage = UINT_MAX; - strcpy(drv->states[0].name, "WFI"); - strcpy(drv->states[0].desc, "RISC-V WFI"); + strscpy(drv->states[0].name, "WFI"); + strscpy(drv->states[0].desc, "RISC-V WFI"); /* * If no DT idle states are detected (ret == 0) let the driver diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 4d9aa5ce31f0..23239b0c04f9 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -188,20 +188,17 @@ again: * * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. + * + * However, if the number of remaining samples is too small to exclude + * any more outliers, allow the deepest available idle state to be + * selected because there are systems where the time spent by CPUs in + * deep idle states is correlated to the maximum frequency the CPUs + * can get to. On those systems, shallow idle states should be avoided + * unless there is a clear indication that the given CPU is most likley + * going to be woken up shortly. */ - if (divisor * 4 <= INTERVALS * 3) { - /* - * If there are sufficiently many data points still under - * consideration after the outliers have been eliminated, - * returning without a prediction would be a mistake because it - * is likely that the next interval will not exceed the current - * maximum, so return the latter in that case. - */ - if (divisor >= INTERVALS / 2) - return max; - + if (divisor * 4 <= INTERVALS * 3) return UINT_MAX; - } /* Update the thresholds for the next round. */ if (avg - min > max - avg) @@ -321,10 +318,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* * Use a physical idle state, not busy polling, unless a timer - * is going to trigger soon enough. + * is going to trigger soon enough or the exit latency of the + * idle state in question is greater than the predicted idle + * duration. */ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && - s->target_residency_ns <= data->next_timer_ns) { + s->target_residency_ns <= data->next_timer_ns && + s->exit_latency_ns <= predicted_ns) { predicted_ns = s->target_residency_ns; idx = i; break; diff --git a/drivers/crypto/aspeed/aspeed-acry.c b/drivers/crypto/aspeed/aspeed-acry.c index 8d1c79aaca07..5993bcba9716 100644 --- a/drivers/crypto/aspeed/aspeed-acry.c +++ b/drivers/crypto/aspeed/aspeed-acry.c @@ -787,7 +787,6 @@ static int aspeed_acry_probe(struct platform_device *pdev) err_engine_rsa_start: crypto_engine_exit(acry_dev->crypt_engine_rsa); clk_exit: - clk_disable_unprepare(acry_dev->clk); return rc; } @@ -799,7 +798,6 @@ static void aspeed_acry_remove(struct platform_device *pdev) aspeed_acry_unregister(acry_dev); crypto_engine_exit(acry_dev->crypt_engine_rsa); tasklet_kill(&acry_dev->done_task); - clk_disable_unprepare(acry_dev->clk); } MODULE_DEVICE_TABLE(of, aspeed_acry_of_matches); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index a5b96adf2d1e..3b391a146635 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3871,10 +3871,12 @@ static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf, pdev = container_of(dev, struct pci_dev, dev); if (pci_physfn(pdev) != qm->pdev) { pci_err(qm->pdev, "the pdev input does not match the pf!\n"); + put_device(dev); return -EINVAL; } *fun_index = pdev->devfn; + put_device(dev); return 0; } diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d7a5539d07d4..bd2e282ca93a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -348,7 +348,7 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) struct resource res; int nid, rc; - res = DEFINE_RES(start, size, 0); + res = DEFINE_RES_MEM(start, size); nid = phys_to_target_node(start); rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index 7c750599ea69..4bc484b46f43 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -371,6 +371,9 @@ cxl_feature_info(struct cxl_features_state *cxlfs, { struct cxl_feat_entry *feat; + if (!cxlfs || !cxlfs->entries) + return ERR_PTR(-EOPNOTSUPP); + for (int i = 0; i < cxlfs->entries->num_features; i++) { feat = &cxlfs->entries->ent[i]; if (uuid_equal(uuid, &feat->uuid)) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index d5f71eb1ade8..8128fd2b5b31 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1182,6 +1182,20 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) { + xa_erase(&port->dports, (unsigned long)dport->dport_dev); + return ERR_PTR(rc); + } + port->component_reg_phys = CXL_RESOURCE_NONE; + } + get_device(dport_dev); rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); if (rc) @@ -1200,18 +1214,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, cxl_debugfs_create_dport_dir(dport); - /* - * Setup port register if this is the first dport showed up. Having - * a dport also means that there is at least 1 active link. - */ - if (port->nr_dports == 1 && - port->component_reg_phys != CXL_RESOURCE_NONE) { - rc = cxl_port_setup_regs(port, port->component_reg_phys); - if (rc) - return ERR_PTR(rc); - port->component_reg_phys = CXL_RESOURCE_NONE; - } - return dport; } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e14c1d305b22..41b64d871c5a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -839,7 +839,7 @@ static int match_free_decoder(struct device *dev, const void *data) } static bool region_res_match_cxl_range(const struct cxl_region_params *p, - struct range *range) + const struct range *range) { if (!p->res) return false; @@ -3398,10 +3398,7 @@ static int match_region_by_range(struct device *dev, const void *data) p = &cxlr->params; guard(rwsem_read)(&cxl_rwsem.region); - if (p->res && p->res->start == r->start && p->res->end == r->end) - return 1; - - return 0; + return region_res_match_cxl_range(p, r); } static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, @@ -3666,14 +3663,14 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset) if (offset < p->cache_size) { dev_err(&cxlr->dev, - "Offset %#llx is within extended linear cache %pr\n", + "Offset %#llx is within extended linear cache %pa\n", offset, &p->cache_size); return -EINVAL; } region_size = resource_size(p->res); if (offset >= region_size) { - dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n", + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n", offset, ®ion_size); return -EINVAL; } @@ -3705,6 +3702,7 @@ static int cxl_region_debugfs_poison_inject(void *data, u64 offset) if (validate_region_offset(cxlr, offset)) return -EINVAL; + offset -= cxlr->params.cache_size; rc = region_offset_to_dpa_result(cxlr, offset, &result); if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { dev_dbg(&cxlr->dev, @@ -3737,6 +3735,7 @@ static int cxl_region_debugfs_poison_clear(void *data, u64 offset) if (validate_region_offset(cxlr, offset)) return -EINVAL; + offset -= cxlr->params.cache_size; rc = region_offset_to_dpa_result(cxlr, offset, &result); if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { dev_dbg(&cxlr->dev, diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a53ec4798b12..a972e4ef1936 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -1068,7 +1068,7 @@ TRACE_EVENT(cxl_poison, __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, __entry->dpa); if (__entry->hpa != ULLONG_MAX && cxlr->params.cache_size) - __entry->hpa_alias0 = __entry->hpa + + __entry->hpa_alias0 = __entry->hpa - cxlr->params.cache_size; else __entry->hpa_alias0 = ULLONG_MAX; diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 3f78c56b58dc..39e6f93dc310 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -1141,7 +1141,7 @@ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) "RCU protection is required for safe access to returned string"); if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return fence->ops->get_driver_name(fence); + return fence->ops->get_timeline_name(fence); else return "signaled-timeline"; } diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 74c1f0ca95f2..a4153bcb6dcf 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -1559,16 +1559,18 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) return -EMSGSIZE; } pin = dpll_pin_find_from_nlattr(info); - if (!IS_ERR(pin)) { - if (!dpll_pin_available(pin)) { - nlmsg_free(msg); - return -ENODEV; - } - ret = dpll_msg_add_pin_handle(msg, pin); - if (ret) { - nlmsg_free(msg); - return ret; - } + if (IS_ERR(pin)) { + nlmsg_free(msg); + return PTR_ERR(pin); + } + if (!dpll_pin_available(pin)) { + nlmsg_free(msg); + return -ENODEV; + } + ret = dpll_msg_add_pin_handle(msg, pin); + if (ret) { + nlmsg_free(msg); + return ret; } genlmsg_end(msg, hdr); @@ -1735,12 +1737,14 @@ int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info) } dpll = dpll_device_find_from_nlattr(info); - if (!IS_ERR(dpll)) { - ret = dpll_msg_add_dev_handle(msg, dpll); - if (ret) { - nlmsg_free(msg); - return ret; - } + if (IS_ERR(dpll)) { + nlmsg_free(msg); + return PTR_ERR(dpll); + } + ret = dpll_msg_add_dev_handle(msg, dpll); + if (ret) { + nlmsg_free(msg); + return ret; } genlmsg_end(msg, hdr); diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 93dc93eec79e..f93f9a458324 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1904,7 +1904,7 @@ zl3073x_dpll_pin_is_registrable(struct zl3073x_dpll *zldpll, } is_diff = zl3073x_out_is_diff(zldev, out); - is_enabled = zl3073x_out_is_enabled(zldev, out); + is_enabled = zl3073x_output_pin_is_enabled(zldev, index); } /* Skip N-pin if the corresponding input/output is differential */ diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 103b2c2eba2a..0c5b94e64ea1 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1184,10 +1184,22 @@ altr_check_ocram_deps_init(struct altr_edac_device_dev *device) if (ret) return ret; - /* Verify OCRAM has been initialized */ + /* + * Verify that OCRAM has been initialized. + * During a warm reset, OCRAM contents are retained, but the control + * and status registers are reset to their default values. Therefore, + * ECC must be explicitly re-enabled in the control register. + * Error condition: if INITCOMPLETEA is clear and ECC_EN is already set. + */ if (!ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA, - (base + ALTR_A10_ECC_INITSTAT_OFST))) - return -ENODEV; + (base + ALTR_A10_ECC_INITSTAT_OFST))) { + if (!ecc_test_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST))) + ecc_set_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST)); + else + return -ENODEV; + } /* Enable IRQ on Single Bit Error */ writel(ALTR_A10_ECC_SERRINTEN, (base + ALTR_A10_ECC_ERRINTENS_OFST)); @@ -1357,7 +1369,7 @@ static const struct edac_device_prv_data a10_enetecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject2_fops, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_ETHERNET */ @@ -1447,7 +1459,7 @@ static const struct edac_device_prv_data a10_usbecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject2_fops, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_USB */ diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index 7c5db8bf0595..1a1092793092 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c @@ -433,7 +433,7 @@ static void handle_error(struct mc_priv *priv, struct ecc_status *stat, phys_addr_t pfn; int err; - if (WARN_ON_ONCE(ctl_num > NUM_CONTROLLERS)) + if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS)) return; mci = priv->mci[ctl_num]; @@ -605,21 +605,23 @@ static int rpmsg_cb(struct rpmsg_device *rpdev, void *data, length = result[MSG_ERR_LENGTH]; offset = result[MSG_ERR_OFFSET]; + /* + * The data can come in two stretches. Construct the regs from two + * messages. The offset indicates the offset from which the data is to + * be taken. + */ + for (i = 0 ; i < length; i++) { + k = offset + i; + j = ERROR_DATA + i; + mc_priv->regs[k] = result[j]; + } + if (result[TOTAL_ERR_LENGTH] > length) { if (!mc_priv->part_len) mc_priv->part_len = length; else mc_priv->part_len += length; - /* - * The data can come in 2 stretches. Construct the regs from 2 - * messages the offset indicates the offset from which the data is to - * be taken - */ - for (i = 0 ; i < length; i++) { - k = offset + i; - j = ERROR_DATA + i; - mc_priv->regs[k] = result[j]; - } + if (mc_priv->part_len < result[TOTAL_ERR_LENGTH]) return 0; mc_priv->part_len = 0; @@ -705,7 +707,7 @@ static int rpmsg_cb(struct rpmsg_device *rpdev, void *data, /* Convert to bytes */ length = result[TOTAL_ERR_LENGTH] * 4; log_non_standard_event(sec_type, &amd_versalnet_guid, mc_priv->message, - sec_sev, (void *)&result[ERROR_DATA], length); + sec_sev, (void *)&mc_priv->regs, length); return 0; } diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index e5e0174a0335..66e1106db5e7 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -577,6 +577,8 @@ void fw_card_initialize(struct fw_card *card, INIT_LIST_HEAD(&card->transactions.list); spin_lock_init(&card->transactions.lock); + spin_lock_init(&card->topology_map.lock); + card->split_timeout.hi = DEFAULT_SPLIT_TIMEOUT / 8000; card->split_timeout.lo = (DEFAULT_SPLIT_TIMEOUT % 8000) << 19; card->split_timeout.cycles = DEFAULT_SPLIT_TIMEOUT; diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index 2f73bcd5696f..ed3ae8cdb0cd 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -441,12 +441,13 @@ static void update_topology_map(__be32 *buffer, size_t buffer_size, int root_nod const u32 *self_ids, int self_id_count) { __be32 *map = buffer; + u32 next_generation = be32_to_cpu(buffer[1]) + 1; int node_count = (root_node_id & 0x3f) + 1; memset(map, 0, buffer_size); *map++ = cpu_to_be32((self_id_count + 2) << 16); - *map++ = cpu_to_be32(be32_to_cpu(buffer[1]) + 1); + *map++ = cpu_to_be32(next_generation); *map++ = cpu_to_be32((node_count << 16) | self_id_count); while (self_id_count--) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index dd3656a0c1ff..c65f491c54d0 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -269,7 +269,7 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel, } static int allocate_tlabel(struct fw_card *card) -__must_hold(&card->transactions_lock) +__must_hold(&card->transactions.lock) { int tlabel; diff --git a/drivers/firewire/init_ohci1394_dma.c b/drivers/firewire/init_ohci1394_dma.c index 48b879e9e831..121f0c2f6401 100644 --- a/drivers/firewire/init_ohci1394_dma.c +++ b/drivers/firewire/init_ohci1394_dma.c @@ -167,6 +167,7 @@ static inline void __init init_ohci1394_initialize(struct ohci *ohci) /** * init_ohci1394_wait_for_busresets - wait until bus resets are completed + * @ohci: Pointer to the OHCI-1394 controller structure * * OHCI1394 initialization itself and any device going on- or offline * and any cable issue cause a IEEE1394 bus reset. The OHCI1394 spec @@ -189,6 +190,8 @@ static inline void __init init_ohci1394_wait_for_busresets(struct ohci *ohci) /** * init_ohci1394_enable_physical_dma - Enable physical DMA for remote debugging + * @ohci: Pointer to the OHCI-1394 controller structure + * * This enables remote DMA access over IEEE1394 from every host for the low * 4GB of address space. DMA accesses above 4GB are not available currently. */ @@ -201,6 +204,8 @@ static inline void __init init_ohci1394_enable_physical_dma(struct ohci *ohci) /** * init_ohci1394_reset_and_init_dma - init controller and enable DMA + * @ohci: Pointer to the OHCI-1394 controller structure + * * This initializes the given controller and enables physical DMA engine in it. */ static inline void __init init_ohci1394_reset_and_init_dma(struct ohci *ohci) @@ -230,6 +235,10 @@ static inline void __init init_ohci1394_reset_and_init_dma(struct ohci *ohci) /** * init_ohci1394_controller - Map the registers of the controller and init DMA + * @num: PCI bus number + * @slot: PCI device number + * @func: PCI function number + * * This maps the registers of the specified controller and initializes it */ static inline void __init init_ohci1394_controller(int num, int slot, int func) @@ -284,6 +293,7 @@ void __init init_ohci1394_dma_on_all_controllers(void) /** * setup_ohci1394_dma - enables early OHCI1394 DMA initialization + * @opt: Kernel command line parameter string */ static int __init setup_ohci1394_dma(char *opt) { diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 65bf1685350a..c72ee4756585 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -649,6 +649,26 @@ static u16 ffa_memory_attributes_get(u32 func_id) return FFA_MEM_NORMAL | FFA_MEM_WRITE_BACK | FFA_MEM_INNER_SHAREABLE; } +static void ffa_emad_impdef_value_init(u32 version, void *dst, void *src) +{ + struct ffa_mem_region_attributes *ep_mem_access; + + if (FFA_EMAD_HAS_IMPDEF_FIELD(version)) + memcpy(dst, src, sizeof(ep_mem_access->impdef_val)); +} + +static void +ffa_mem_region_additional_setup(u32 version, struct ffa_mem_region *mem_region) +{ + if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version)) { + mem_region->ep_mem_size = 0; + } else { + mem_region->ep_mem_size = ffa_emad_size_get(version); + mem_region->ep_mem_offset = sizeof(*mem_region); + memset(mem_region->reserved, 0, 12); + } +} + static int ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, struct ffa_mem_ops_args *args) @@ -667,27 +687,24 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, mem_region->flags = args->flags; mem_region->sender_id = drv_info->vm_id; mem_region->attributes = ffa_memory_attributes_get(func_id); - ep_mem_access = buffer + - ffa_mem_desc_offset(buffer, 0, drv_info->version); composite_offset = ffa_mem_desc_offset(buffer, args->nattrs, drv_info->version); - for (idx = 0; idx < args->nattrs; idx++, ep_mem_access++) { + for (idx = 0; idx < args->nattrs; idx++) { + ep_mem_access = buffer + + ffa_mem_desc_offset(buffer, idx, drv_info->version); ep_mem_access->receiver = args->attrs[idx].receiver; ep_mem_access->attrs = args->attrs[idx].attrs; ep_mem_access->composite_off = composite_offset; ep_mem_access->flag = 0; ep_mem_access->reserved = 0; + ffa_emad_impdef_value_init(drv_info->version, + ep_mem_access->impdef_val, + args->attrs[idx].impdef_val); } mem_region->handle = 0; mem_region->ep_count = args->nattrs; - if (drv_info->version <= FFA_VERSION_1_0) { - mem_region->ep_mem_size = 0; - } else { - mem_region->ep_mem_size = sizeof(*ep_mem_access); - mem_region->ep_mem_offset = sizeof(*mem_region); - memset(mem_region->reserved, 0, 12); - } + ffa_mem_region_additional_setup(drv_info->version, mem_region); composite = buffer + composite_offset; composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg); diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 07b9e629276d..7c35c95fddba 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -309,16 +309,36 @@ enum debug_counters { SCMI_DEBUG_COUNTERS_LAST }; -static inline void scmi_inc_count(atomic_t *arr, int stat) +/** + * struct scmi_debug_info - Debug common info + * @top_dentry: A reference to the top debugfs dentry + * @name: Name of this SCMI instance + * @type: Type of this SCMI instance + * @is_atomic: Flag to state if the transport of this instance is atomic + * @counters: An array of atomic_c's used for tracking statistics (if enabled) + */ +struct scmi_debug_info { + struct dentry *top_dentry; + const char *name; + const char *type; + bool is_atomic; + atomic_t counters[SCMI_DEBUG_COUNTERS_LAST]; +}; + +static inline void scmi_inc_count(struct scmi_debug_info *dbg, int stat) { - if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) - atomic_inc(&arr[stat]); + if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { + if (dbg) + atomic_inc(&dbg->counters[stat]); + } } -static inline void scmi_dec_count(atomic_t *arr, int stat) +static inline void scmi_dec_count(struct scmi_debug_info *dbg, int stat) { - if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) - atomic_dec(&arr[stat]); + if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { + if (dbg) + atomic_dec(&dbg->counters[stat]); + } } enum scmi_bad_msg { diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index bd56a877fdfc..5caa9191a8d1 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -116,22 +116,6 @@ struct scmi_protocol_instance { #define ph_to_pi(h) container_of(h, struct scmi_protocol_instance, ph) /** - * struct scmi_debug_info - Debug common info - * @top_dentry: A reference to the top debugfs dentry - * @name: Name of this SCMI instance - * @type: Type of this SCMI instance - * @is_atomic: Flag to state if the transport of this instance is atomic - * @counters: An array of atomic_c's used for tracking statistics (if enabled) - */ -struct scmi_debug_info { - struct dentry *top_dentry; - const char *name; - const char *type; - bool is_atomic; - atomic_t counters[SCMI_DEBUG_COUNTERS_LAST]; -}; - -/** * struct scmi_info - Structure representing a SCMI instance * * @id: A sequence number starting from zero identifying this instance @@ -610,7 +594,7 @@ scmi_xfer_inflight_register_unlocked(struct scmi_xfer *xfer, /* Set in-flight */ set_bit(xfer->hdr.seq, minfo->xfer_alloc_table); hash_add(minfo->pending_xfers, &xfer->node, xfer->hdr.seq); - scmi_inc_count(info->dbg->counters, XFERS_INFLIGHT); + scmi_inc_count(info->dbg, XFERS_INFLIGHT); xfer->pending = true; } @@ -819,8 +803,9 @@ __scmi_xfer_put(struct scmi_xfers_info *minfo, struct scmi_xfer *xfer) hash_del(&xfer->node); xfer->pending = false; - scmi_dec_count(info->dbg->counters, XFERS_INFLIGHT); + scmi_dec_count(info->dbg, XFERS_INFLIGHT); } + xfer->flags = 0; hlist_add_head(&xfer->node, &minfo->free_xfers); } spin_unlock_irqrestore(&minfo->xfer_lock, flags); @@ -839,8 +824,6 @@ void scmi_xfer_raw_put(const struct scmi_handle *handle, struct scmi_xfer *xfer) { struct scmi_info *info = handle_to_scmi_info(handle); - xfer->flags &= ~SCMI_XFER_FLAG_IS_RAW; - xfer->flags &= ~SCMI_XFER_FLAG_CHAN_SET; return __scmi_xfer_put(&info->tx_minfo, xfer); } @@ -1034,7 +1017,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr) spin_unlock_irqrestore(&minfo->xfer_lock, flags); scmi_bad_message_trace(cinfo, msg_hdr, MSG_UNEXPECTED); - scmi_inc_count(info->dbg->counters, ERR_MSG_UNEXPECTED); + scmi_inc_count(info->dbg, ERR_MSG_UNEXPECTED); return xfer; } @@ -1062,7 +1045,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr) msg_type, xfer_id, msg_hdr, xfer->state); scmi_bad_message_trace(cinfo, msg_hdr, MSG_INVALID); - scmi_inc_count(info->dbg->counters, ERR_MSG_INVALID); + scmi_inc_count(info->dbg, ERR_MSG_INVALID); /* On error the refcount incremented above has to be dropped */ __scmi_xfer_put(minfo, xfer); @@ -1107,7 +1090,7 @@ static void scmi_handle_notification(struct scmi_chan_info *cinfo, PTR_ERR(xfer)); scmi_bad_message_trace(cinfo, msg_hdr, MSG_NOMEM); - scmi_inc_count(info->dbg->counters, ERR_MSG_NOMEM); + scmi_inc_count(info->dbg, ERR_MSG_NOMEM); scmi_clear_channel(info, cinfo); return; @@ -1123,7 +1106,7 @@ static void scmi_handle_notification(struct scmi_chan_info *cinfo, trace_scmi_msg_dump(info->id, cinfo->id, xfer->hdr.protocol_id, xfer->hdr.id, "NOTI", xfer->hdr.seq, xfer->hdr.status, xfer->rx.buf, xfer->rx.len); - scmi_inc_count(info->dbg->counters, NOTIFICATION_OK); + scmi_inc_count(info->dbg, NOTIFICATION_OK); scmi_notify(cinfo->handle, xfer->hdr.protocol_id, xfer->hdr.id, xfer->rx.buf, xfer->rx.len, ts); @@ -1183,10 +1166,10 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo, if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP) { scmi_clear_channel(info, cinfo); complete(xfer->async_done); - scmi_inc_count(info->dbg->counters, DELAYED_RESPONSE_OK); + scmi_inc_count(info->dbg, DELAYED_RESPONSE_OK); } else { complete(&xfer->done); - scmi_inc_count(info->dbg->counters, RESPONSE_OK); + scmi_inc_count(info->dbg, RESPONSE_OK); } if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { @@ -1296,7 +1279,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); ret = -ETIMEDOUT; - scmi_inc_count(info->dbg->counters, XFERS_RESPONSE_POLLED_TIMEOUT); + scmi_inc_count(info->dbg, XFERS_RESPONSE_POLLED_TIMEOUT); } } @@ -1321,7 +1304,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, "RESP" : "resp", xfer->hdr.seq, xfer->hdr.status, xfer->rx.buf, xfer->rx.len); - scmi_inc_count(info->dbg->counters, RESPONSE_POLLED_OK); + scmi_inc_count(info->dbg, RESPONSE_POLLED_OK); if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { scmi_raw_message_report(info->raw, xfer, @@ -1336,7 +1319,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, dev_err(dev, "timed out in resp(caller: %pS)\n", (void *)_RET_IP_); ret = -ETIMEDOUT; - scmi_inc_count(info->dbg->counters, XFERS_RESPONSE_TIMEOUT); + scmi_inc_count(info->dbg, XFERS_RESPONSE_TIMEOUT); } } @@ -1420,13 +1403,13 @@ static int do_xfer(const struct scmi_protocol_handle *ph, !is_transport_polling_capable(info->desc)) { dev_warn_once(dev, "Polling mode is not supported by transport.\n"); - scmi_inc_count(info->dbg->counters, SENT_FAIL_POLLING_UNSUPPORTED); + scmi_inc_count(info->dbg, SENT_FAIL_POLLING_UNSUPPORTED); return -EINVAL; } cinfo = idr_find(&info->tx_idr, pi->proto->id); if (unlikely(!cinfo)) { - scmi_inc_count(info->dbg->counters, SENT_FAIL_CHANNEL_NOT_FOUND); + scmi_inc_count(info->dbg, SENT_FAIL_CHANNEL_NOT_FOUND); return -EINVAL; } /* True ONLY if also supported by transport. */ @@ -1461,19 +1444,19 @@ static int do_xfer(const struct scmi_protocol_handle *ph, ret = info->desc->ops->send_message(cinfo, xfer); if (ret < 0) { dev_dbg(dev, "Failed to send message %d\n", ret); - scmi_inc_count(info->dbg->counters, SENT_FAIL); + scmi_inc_count(info->dbg, SENT_FAIL); return ret; } trace_scmi_msg_dump(info->id, cinfo->id, xfer->hdr.protocol_id, xfer->hdr.id, "CMND", xfer->hdr.seq, xfer->hdr.status, xfer->tx.buf, xfer->tx.len); - scmi_inc_count(info->dbg->counters, SENT_OK); + scmi_inc_count(info->dbg, SENT_OK); ret = scmi_wait_for_message_response(cinfo, xfer); if (!ret && xfer->hdr.status) { ret = scmi_to_linux_errno(xfer->hdr.status); - scmi_inc_count(info->dbg->counters, ERR_PROTOCOL); + scmi_inc_count(info->dbg, ERR_PROTOCOL); } if (info->desc->ops->mark_txdone) @@ -3044,9 +3027,6 @@ static int scmi_debugfs_raw_mode_setup(struct scmi_info *info) u8 channels[SCMI_MAX_CHANNELS] = {}; DECLARE_BITMAP(protos, SCMI_MAX_CHANNELS) = {}; - if (!info->dbg) - return -EINVAL; - /* Enumerate all channels to collect their ids */ idr_for_each_entry(&info->tx_idr, cinfo, id) { /* @@ -3218,7 +3198,7 @@ static int scmi_probe(struct platform_device *pdev) if (!info->dbg) dev_warn(dev, "Failed to setup SCMI debugfs.\n"); - if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { + if (info->dbg && IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { ret = scmi_debugfs_raw_mode_setup(info); if (!coex) { if (ret) @@ -3423,6 +3403,9 @@ int scmi_inflight_count(const struct scmi_handle *handle) if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { struct scmi_info *info = handle_to_scmi_info(handle); + if (!info->dbg) + return 0; + return atomic_read(&info->dbg->counters[XFERS_INFLIGHT]); } else { return 0; diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c index ffe7e1cb6b23..fe5c10cd5c32 100644 --- a/drivers/gpio/gpio-104-idio-16.c +++ b/drivers/gpio/gpio-104-idio-16.c @@ -59,6 +59,7 @@ static const struct regmap_config idio_16_regmap_config = { .reg_stride = 1, .val_bits = 8, .io_port = true, + .max_register = 0x5, .wr_table = &idio_16_wr_table, .rd_table = &idio_16_rd_table, .volatile_table = &idio_16_rd_table, diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 37600faf4a4b..416f265d09d0 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -723,6 +723,7 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, chip->get_multiple = gpio_fwd_get_multiple_locked; chip->set = gpio_fwd_set; chip->set_multiple = gpio_fwd_set_multiple_locked; + chip->set_config = gpio_fwd_set_config; chip->to_irq = gpio_fwd_to_irq; chip->base = -1; chip->ngpio = ngpios; diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c index 0103be977c66..4fbae6f6a497 100644 --- a/drivers/gpio/gpio-idio-16.c +++ b/drivers/gpio/gpio-idio-16.c @@ -6,6 +6,7 @@ #define DEFAULT_SYMBOL_NAMESPACE "GPIO_IDIO_16" +#include <linux/bitmap.h> #include <linux/bits.h> #include <linux/device.h> #include <linux/err.h> @@ -107,6 +108,7 @@ int devm_idio_16_regmap_register(struct device *const dev, struct idio_16_data *data; struct regmap_irq_chip *chip; struct regmap_irq_chip_data *chip_data; + DECLARE_BITMAP(fixed_direction_output, IDIO_16_NGPIO); if (!config->parent) return -EINVAL; @@ -164,6 +166,9 @@ int devm_idio_16_regmap_register(struct device *const dev, gpio_config.irq_domain = regmap_irq_get_domain(chip_data); gpio_config.reg_mask_xlate = idio_16_reg_mask_xlate; + bitmap_from_u64(fixed_direction_output, GENMASK_U64(15, 0)); + gpio_config.fixed_direction_output = fixed_direction_output; + return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &gpio_config)); } EXPORT_SYMBOL_GPL(devm_idio_16_regmap_register); diff --git a/drivers/gpio/gpio-ljca.c b/drivers/gpio/gpio-ljca.c index 3b4f8830c741..f32d1d237795 100644 --- a/drivers/gpio/gpio-ljca.c +++ b/drivers/gpio/gpio-ljca.c @@ -286,22 +286,14 @@ static void ljca_gpio_event_cb(void *context, u8 cmd, const void *evt_data, { const struct ljca_gpio_packet *packet = evt_data; struct ljca_gpio_dev *ljca_gpio = context; - int i, irq; + int i; if (cmd != LJCA_GPIO_INT_EVENT) return; for (i = 0; i < packet->num; i++) { - irq = irq_find_mapping(ljca_gpio->gc.irq.domain, - packet->item[i].index); - if (!irq) { - dev_err(ljca_gpio->gc.parent, - "gpio_id %u does not mapped to IRQ yet\n", - packet->item[i].index); - return; - } - - generic_handle_domain_irq(ljca_gpio->gc.irq.domain, irq); + generic_handle_domain_irq(ljca_gpio->gc.irq.domain, + packet->item[i].index); set_bit(packet->item[i].index, ljca_gpio->reenable_irqs); } diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c index 476cea1b5ed7..9d28ca8e1d6f 100644 --- a/drivers/gpio/gpio-pci-idio-16.c +++ b/drivers/gpio/gpio-pci-idio-16.c @@ -41,6 +41,7 @@ static const struct regmap_config idio_16_regmap_config = { .reg_stride = 1, .val_bits = 8, .io_port = true, + .max_register = 0x7, .wr_table = &idio_16_wr_table, .rd_table = &idio_16_rd_table, .volatile_table = &idio_16_rd_table, diff --git a/drivers/gpio/gpio-regmap.c b/drivers/gpio/gpio-regmap.c index ab9e4077fa60..f4267af00027 100644 --- a/drivers/gpio/gpio-regmap.c +++ b/drivers/gpio/gpio-regmap.c @@ -31,6 +31,7 @@ struct gpio_regmap { unsigned int reg_clr_base; unsigned int reg_dir_in_base; unsigned int reg_dir_out_base; + unsigned long *fixed_direction_output; #ifdef CONFIG_REGMAP_IRQ int regmap_irq_line; @@ -134,6 +135,13 @@ static int gpio_regmap_get_direction(struct gpio_chip *chip, unsigned int base, val, reg, mask; int invert, ret; + if (gpio->fixed_direction_output) { + if (test_bit(offset, gpio->fixed_direction_output)) + return GPIO_LINE_DIRECTION_OUT; + else + return GPIO_LINE_DIRECTION_IN; + } + if (gpio->reg_dat_base && !gpio->reg_set_base) return GPIO_LINE_DIRECTION_IN; if (gpio->reg_set_base && !gpio->reg_dat_base) @@ -284,6 +292,17 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config goto err_free_gpio; } + if (config->fixed_direction_output) { + gpio->fixed_direction_output = bitmap_alloc(chip->ngpio, + GFP_KERNEL); + if (!gpio->fixed_direction_output) { + ret = -ENOMEM; + goto err_free_gpio; + } + bitmap_copy(gpio->fixed_direction_output, + config->fixed_direction_output, chip->ngpio); + } + /* if not set, assume there is only one register */ gpio->ngpio_per_reg = config->ngpio_per_reg; if (!gpio->ngpio_per_reg) @@ -300,7 +319,7 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config ret = gpiochip_add_data(chip, gpio); if (ret < 0) - goto err_free_gpio; + goto err_free_bitmap; #ifdef CONFIG_REGMAP_IRQ if (config->regmap_irq_chip) { @@ -309,7 +328,7 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config config->regmap_irq_line, config->regmap_irq_flags, 0, config->regmap_irq_chip, &gpio->irq_chip_data); if (ret) - goto err_free_gpio; + goto err_free_bitmap; irq_domain = regmap_irq_get_domain(gpio->irq_chip_data); } else @@ -326,6 +345,8 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config err_remove_gpiochip: gpiochip_remove(chip); +err_free_bitmap: + bitmap_free(gpio->fixed_direction_output); err_free_gpio: kfree(gpio); return ERR_PTR(ret); @@ -344,6 +365,7 @@ void gpio_regmap_unregister(struct gpio_regmap *gpio) #endif gpiochip_remove(&gpio->gpio_chip); + bitmap_free(gpio->fixed_direction_output); kfree(gpio); } EXPORT_SYMBOL_GPL(gpio_regmap_unregister); diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c index 09a448ce3eec..3c8fd322a713 100644 --- a/drivers/gpio/gpio-tb10x.c +++ b/drivers/gpio/gpio-tb10x.c @@ -50,25 +50,6 @@ static inline u32 tb10x_reg_read(struct tb10x_gpio *gpio, unsigned int offs) return ioread32(gpio->base + offs); } -static inline void tb10x_reg_write(struct tb10x_gpio *gpio, unsigned int offs, - u32 val) -{ - iowrite32(val, gpio->base + offs); -} - -static inline void tb10x_set_bits(struct tb10x_gpio *gpio, unsigned int offs, - u32 mask, u32 val) -{ - u32 r; - - guard(gpio_generic_lock_irqsave)(&gpio->chip); - - r = tb10x_reg_read(gpio, offs); - r = (r & ~mask) | (val & mask); - - tb10x_reg_write(gpio, offs, r); -} - static int tb10x_gpio_to_irq(struct gpio_chip *chip, unsigned offset) { struct tb10x_gpio *tb10x_gpio = gpiochip_get_data(chip); diff --git a/drivers/gpio/gpiolib-acpi-core.c b/drivers/gpio/gpiolib-acpi-core.c index 284e762d92c4..d441c1236d8c 100644 --- a/drivers/gpio/gpiolib-acpi-core.c +++ b/drivers/gpio/gpiolib-acpi-core.c @@ -291,6 +291,19 @@ acpi_gpio_to_gpiod_flags(const struct acpi_resource_gpio *agpio, int polarity) return GPIOD_ASIS; } +static void acpi_gpio_set_debounce_timeout(struct gpio_desc *desc, + unsigned int acpi_debounce) +{ + int ret; + + /* ACPI uses hundredths of milliseconds units */ + acpi_debounce *= 10; + ret = gpio_set_debounce_timeout(desc, acpi_debounce); + if (ret) + gpiod_warn(desc, "Failed to set debounce-timeout %u: %d\n", + acpi_debounce, ret); +} + static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, struct acpi_resource_gpio *agpio, unsigned int index, @@ -300,18 +313,12 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, enum gpiod_flags flags = acpi_gpio_to_gpiod_flags(agpio, polarity); unsigned int pin = agpio->pin_table[index]; struct gpio_desc *desc; - int ret; desc = gpiochip_request_own_desc(chip, pin, label, polarity, flags); if (IS_ERR(desc)) return desc; - /* ACPI uses hundredths of milliseconds units */ - ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10); - if (ret) - dev_warn(chip->parent, - "Failed to set debounce-timeout for pin 0x%04X, err %d\n", - pin, ret); + acpi_gpio_set_debounce_timeout(desc, agpio->debounce_timeout); return desc; } @@ -375,8 +382,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, desc = acpi_request_own_gpiod(chip, agpio, 0, "ACPI:Event"); if (IS_ERR(desc)) { dev_err(chip->parent, - "Failed to request GPIO for pin 0x%04X, err %ld\n", - pin, PTR_ERR(desc)); + "Failed to request GPIO for pin 0x%04X, err %pe\n", + pin, desc); return AE_OK; } @@ -944,7 +951,6 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, bool can_fallback = acpi_can_fallback_to_crs(adev, con_id); struct acpi_gpio_info info = {}; struct gpio_desc *desc; - int ret; desc = __acpi_find_gpio(fwnode, con_id, idx, can_fallback, &info); if (IS_ERR(desc)) @@ -959,10 +965,7 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, acpi_gpio_update_gpiod_flags(dflags, &info); acpi_gpio_update_gpiod_lookup_flags(lookupflags, &info); - /* ACPI uses hundredths of milliseconds units */ - ret = gpio_set_debounce_timeout(desc, info.debounce * 10); - if (ret) - return ERR_PTR(ret); + acpi_gpio_set_debounce_timeout(desc, info.debounce); return desc; } diff --git a/drivers/gpio/gpiolib-swnode.c b/drivers/gpio/gpiolib-swnode.c index f21dbc28cf2c..e3806db1c0e0 100644 --- a/drivers/gpio/gpiolib-swnode.c +++ b/drivers/gpio/gpiolib-swnode.c @@ -41,7 +41,7 @@ static struct gpio_device *swnode_get_gpio_device(struct fwnode_handle *fwnode) !strcmp(gdev_node->name, GPIOLIB_SWNODE_UNDEFINED_NAME)) return ERR_PTR(-ENOENT); - gdev = gpio_device_find_by_label(gdev_node->name); + gdev = gpio_device_find_by_fwnode(fwnode); return gdev ?: ERR_PTR(-EPROBE_DEFER); } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 9952e412da50..cd8800ba5825 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -5296,6 +5296,8 @@ static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos) struct gpio_device *gdev; loff_t index = *pos; + s->private = NULL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return NULL; @@ -5329,7 +5331,11 @@ static void *gpiolib_seq_next(struct seq_file *s, void *v, loff_t *pos) static void gpiolib_seq_stop(struct seq_file *s, void *v) { - struct gpiolib_seq_priv *priv = s->private; + struct gpiolib_seq_priv *priv; + + priv = s->private; + if (!priv) + return; srcu_read_unlock(&gpio_devices_srcu, priv->idx); kfree(priv); diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 4b2f7d794275..da2565e6de71 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -245,7 +245,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = \ $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \ - PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ + PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2a0df4cabb99..6f5b4a0e0a34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1290,6 +1290,7 @@ struct amdgpu_device { bool debug_disable_gpu_ring_reset; bool debug_vm_userptr; bool debug_disable_ce_logs; + bool debug_enable_ce_cs; /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 83020963dfde..923f0fa7350c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1267,6 +1267,10 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + /* VM entity stopped if process killed, don't clear freed pt bo */ + if (!amdgpu_vm_ready(vm)) + return 0; + (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); @@ -2329,10 +2333,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *mem) { - if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { + if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; - mb(); /* make sure read happened */ - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index ef996493115f..425a3e564360 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: MIT /* * Copyright 2025 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h index bcb97d245673..353421807387 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2025 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9cd7741d2254..2f6a96af7fb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; + if (!p->adev->debug_enable_ce_cs && + chunk_ib->flags & AMDGPU_IB_FLAG_CE) { + dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n"); + return -EINVAL; + } + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) @@ -702,7 +708,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) { + if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) { *max_bytes = 0; *max_vis_bytes = 0; return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f5d5c45ddc0d..afedea02188d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -236,7 +236,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, &num_scheds, &scheds); if (r) - goto cleanup_entity; + goto error_free_entity; } /* disable load balance if the hw engine retains context among dependent jobs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7a899fb4de29..2819aceaab74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1882,6 +1882,13 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) { + /* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4. + * It's unclear if this is a platform-specific or GPU-specific issue. + * Disable ASPM on SI for the time being. + */ + if (adev->family == AMDGPU_FAMILY_SI) + return true; + #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); @@ -5236,10 +5243,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); - r = amdgpu_dpm_notify_rlc_state(adev, false); - if (r) - return r; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 73401f0aeb34..dd7b2b796427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1033,7 +1033,9 @@ static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev, /* Until a uniform way is figured, get mask based on hwid */ switch (hw_id) { case VCN_HWID: - harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; + /* VCN vs UVD+VCE */ + if (!amdgpu_ip_version(adev, VCE_HWIP, 0)) + harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; break; case DMU_HWID: if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK) @@ -2565,7 +2567,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0); @@ -2592,7 +2596,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1); @@ -2619,8 +2625,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 1; + adev->sdma.sdma_mask = 1; adev->vcn.num_vcn_inst = 1; adev->gmc.num_umc = 2; + adev->gfx.xcc_mask = 1; if (adev->apu_flags & AMD_APU_IS_RAVEN2) { adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0); @@ -2665,7 +2673,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega20_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 8; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0); @@ -2693,8 +2703,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); arct_reg_base_init(adev); adev->sdma.num_instances = 8; + adev->sdma.sdma_mask = 0xff; adev->vcn.num_vcn_inst = 2; adev->gmc.num_umc = 8; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1); @@ -2726,8 +2738,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); aldebaran_reg_base_init(adev); adev->sdma.num_instances = 5; + adev->sdma.sdma_mask = 0x1f; adev->vcn.num_vcn_inst = 2; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0); @@ -2762,6 +2776,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) } else { cyan_skillfish_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 8561ad7f6180..ed3bef1edfe4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -82,6 +82,18 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + /* + * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+. + * Such buffers cannot be safely accessed over P2P due to device-local + * compression metadata. Fallback to system-memory path instead. + * Device supports GFX12 (GC 12.x or newer) + * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag + * + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + attach->peer2peer = false; + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bff25ef3e2d0..7333e19291cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), AMDGPU_DEBUG_SMU_POOL = BIT(7), AMDGPU_DEBUG_VM_USERPTR = BIT(8), - AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9) + AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9), + AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10) }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: disable kernel logs of correctable errors\n"); adev->debug_disable_ce_logs = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) { + pr_info("debug: allowing command submission to CE engine\n"); + adev->debug_enable_ce_cs = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2626,9 +2632,14 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; - if (amdgpu_acpi_should_gpu_reset(adev)) - return amdgpu_asic_reset(adev); + if (amdgpu_acpi_should_gpu_reset(adev)) { + amdgpu_device_lock_reset_domain(adev->reset_domain); + r = amdgpu_asic_reset(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); + return r; + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index fd8cca241da6..18a7829122d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -758,11 +758,42 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) * @fence: fence of the ring to signal * */ -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence) +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) { - dma_fence_set_error(&fence->base, -ETIME); - amdgpu_fence_write(fence->ring, fence->seq); - amdgpu_fence_process(fence->ring); + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + struct amdgpu_ring *ring = af->ring; + unsigned long flags; + u32 seq, last_seq; + + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; + + /* mark all fences from the guilty context with an error */ + spin_lock_irqsave(&ring->fence_drv.lock, flags); + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + if (fence == af) + dma_fence_set_error(&fence->base, -ETIME); + else if (fence->context == af->context) + dma_fence_set_error(&fence->base, -ECANCELED); + } + rcu_read_unlock(); + } while (last_seq != seq); + spin_unlock_irqrestore(&ring->fence_drv.lock, flags); + /* signal the guilty fence */ + amdgpu_fence_write(ring, af->seq); + amdgpu_fence_process(ring); } void amdgpu_fence_save_wptr(struct dma_fence *fence) @@ -790,14 +821,19 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - u64 wptr, i, seqno; + u64 wptr; + u32 seq, last_seq; - seqno = amdgpu_fence_read(ring); + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; wptr = ring->fence_drv.signalled_wptr; ring->ring_backup_entries_to_copy = 0; - for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { - ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; rcu_read_lock(); unprocessed = rcu_dereference(*ptr); @@ -813,7 +849,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, wptr = fence->wptr; } rcu_read_unlock(); - } + } while (last_seq != seq); } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 9cddbf50442a..37270c4dab8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -280,6 +280,8 @@ int isp_kernel_buffer_alloc(struct device *dev, u64 size, if (ret) return ret; + /* Ensure *bo is NULL so a new BO will be created */ + *bo = NULL; ret = amdgpu_bo_create_kernel(adev, size, ISP_MC_ADDR_ALIGN, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 6b7d66b6d4cc..63ee6ba6a931 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -371,7 +371,7 @@ static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val) for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; - if (val & (BIT_ULL(1) << ((i * adev->jpeg.num_jpeg_rings) + j))) + if (val & (BIT_ULL((i * adev->jpeg.num_jpeg_rings) + j))) ring->sched.ready = true; else ring->sched.ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a9327472c651..b3e6b3fcdf2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -758,7 +758,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); + ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); @@ -804,8 +805,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.vram.usable_heap_size = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size) - AMDGPU_VM_RESERVED_VRAM; - mem.vram.heap_usage = - ttm_resource_manager_usage(vram_man); + mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(vram_man) : 0; mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 5bf9be073cdd..4883adcfbb4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -409,7 +409,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, return -EINVAL; /* Clear the doorbell array before detection */ - memset(adev->mes.hung_queue_db_array_cpu_addr, 0, + memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET, adev->mes.hung_queue_db_array_size * sizeof(u32)); input.queue_type = queue_type; input.detect_only = detect_only; @@ -420,12 +420,17 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, dev_err(adev->dev, "failed to detect and reset\n"); } else { *hung_db_num = 0; - for (i = 0; i < adev->mes.hung_queue_db_array_size; i++) { + for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { hung_db_array[i] = db_array[i]; *hung_db_num += 1; } } + + /* + * TODO: return HQD info for MES scheduled user compute queue reset cases + * stored in hung_db_array hqd info offset to full array size + */ } return r; @@ -686,14 +691,11 @@ out: bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) { uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; - bool is_supported = false; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && - mes_rev >= 0x63) - is_supported = true; - return is_supported; + return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) || + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); } /* Fix me -- node_id is used to identify the correct MES instances in the future */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 6b506fc72f58..97c137c90f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -149,6 +149,7 @@ struct amdgpu_mes { void *resource_1_addr[AMDGPU_MAX_MES_PIPES]; int hung_queue_db_array_size; + int hung_queue_hqd_info_offset; struct amdgpu_bo *hung_queue_db_array_gpu_obj; uint64_t hung_queue_db_array_gpu_addr; void *hung_queue_db_array_cpu_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8c0e5d03de50..aa7987d0806c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2355,8 +2355,11 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (!ret && !psp->securedisplay_context.context.resp_status) { psp->securedisplay_context.context.initialized = true; mutex_init(&psp->securedisplay_context.mutex); - } else + } else { + /* don't try again */ + psp->securedisplay_context.context.bin_desc.size_bytes = 0; return ret; + } mutex_lock(&psp->securedisplay_context.mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 8f6ce948c684..5ec5c3ff22bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -811,7 +811,7 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, if (r) return r; - /* signal the fence of the bad job */ + /* signal the guilty fence and set an error on all fences from the context */ if (guilty_fence) amdgpu_fence_driver_guilty_force_completion(guilty_fence); /* Re-emit the non-guilty commands */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b6b649179776..4b46e3c26ff3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -155,7 +155,7 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence); +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 761bad98da3e..4d0096d0baa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -151,15 +151,16 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d { struct amdgpu_userq_fence *userq_fence, *tmp; struct dma_fence *fence; + unsigned long flags; u64 rptr; int i; if (!fence_drv) return; + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); - spin_lock(&fence_drv->fence_list_lock); list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { fence = &userq_fence->base; @@ -174,7 +175,7 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d list_del(&userq_fence->link); dma_fence_put(fence); } - spin_unlock(&fence_drv->fence_list_lock); + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); } void amdgpu_userq_fence_driver_destroy(struct kref *ref) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 3328ab63376b..f96beb96c75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -598,8 +598,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->driver_cert = 0; vf2pf_info->os_info.all = 0; - vf2pf_info->fb_usage = - ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20; + vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0; vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20; vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 474bfe36c0c2..aa78c2ee9e21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -322,6 +322,26 @@ static int vpe_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static bool vpe_need_dpm0_at_power_down(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { + case IP_VERSION(6, 1, 1): + return adev->pm.fw_version < 0x0a640500; + default: + return false; + } +} + +static int vpe_get_dpm_level(struct amdgpu_device *adev) +{ + struct amdgpu_vpe *vpe = &adev->vpe; + + if (!adev->pm.dpm_enabled) + return 0; + + return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv)); +} + static void vpe_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = @@ -329,11 +349,17 @@ static void vpe_idle_work_handler(struct work_struct *work) unsigned int fences = 0; fences += amdgpu_fence_count_emitted(&adev->vpe.ring); + if (fences) + goto reschedule; - if (fences == 0) - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); - else - schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); + if (vpe_need_dpm0_at_power_down(adev) && vpe_get_dpm_level(adev) != 0) + goto reschedule; + + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + return; + +reschedule: + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); } static int vpe_common_init(struct amdgpu_vpe *vpe) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a5adb2ed9b3c..9d934c07fa6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -234,6 +234,9 @@ static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, !adev->gmc.vram_vendor) return 0; + if (!ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) + return 0; + return attr->mode; } diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 811124ff88a8..f9e2edf5260b 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -407,7 +407,8 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, return -EINVAL; } - if (adev->kfd.init_complete && !amdgpu_in_reset(adev)) + if (adev->kfd.init_complete && !amdgpu_in_reset(adev) && + !adev->in_suspend) flags |= AMDGPU_XCP_OPS_KFD; if (flags & AMDGPU_XCP_OPS_KFD) { diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c index 96616a865aac..ed1e25661706 100644 --- a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 66c47c466532..d61eb9f187c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5862,8 +5862,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 710ec9c34e43..93fde0f9af87 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4419,8 +4419,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 7693b7953426..80565392313f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3102,6 +3102,11 @@ static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5976ed55d9db..2b7aba22ecc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4399,6 +4399,11 @@ static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v7_0_gpu_early_init(adev); + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0856ff65288c..8a81713d97aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2023,6 +2023,11 @@ static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 77f9d5b9a556..c90cbe053ef3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2292,7 +2292,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode); } else { - if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + if (adev->in_suspend) + amdgpu_xcp_restore_partition_mode(adev->xcp_mgr); + else if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) r = amdgpu_xcp_switch_partition_mode( diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 93d7ccb7d013..0e5e54d0a9a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1068,7 +1068,7 @@ static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block) GFP_KERNEL); if (!adev->gmc.vm_fault_info) return -ENOMEM; - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); return 0; } @@ -1290,7 +1290,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) { struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1306,8 +1306,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, info->prot_read = protections & 0x8 ? true : false; info->prot_write = protections & 0x10 ? true : false; info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 1); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 1); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index c5e2a2c41e06..e1509480dfc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1183,7 +1183,7 @@ static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block) GFP_KERNEL); if (!adev->gmc.vm_fault_info) return -ENOMEM; - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); return 0; } @@ -1478,7 +1478,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) { struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1494,8 +1494,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, info->prot_read = protections & 0x8 ? true : false; info->prot_write = protections & 0x10 ? true : false; info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 1); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 1); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index baf097d2e1ac..ab0bf880d3d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -878,6 +878,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 2db9b2c63693..1cd9eaeef38f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -208,10 +208,10 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, struct amdgpu_userq_mgr *uqm, *tmp; unsigned int hung_db_num = 0; int queue_id, r, i; - u32 db_array[4]; + u32 db_array[8]; - if (db_array_size > 4) { - dev_err(adev->dev, "DB array size (%d vs 4) too small\n", + if (db_array_size > 8) { + dev_err(adev->dev, "DB array size (%d vs 8) too small\n", db_array_size); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index e82188431f79..da575bb1377f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -66,7 +66,8 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define GFX_MES_DRAM_SIZE 0x80000 #define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE) -#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 4 +#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset, [4:7] = hqd info */ +#define MES11_HUNG_HQD_INFO_OFFSET 4 static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -1720,8 +1721,9 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; - adev->mes.hung_queue_db_array_size = - MES11_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_db_array_size = MES11_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_hqd_info_offset = MES11_HUNG_HQD_INFO_OFFSET; + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index aff06f06aeee..7f3512d9de07 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -47,7 +47,8 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 -#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 4 +#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */ +#define MES12_HUNG_HQD_INFO_OFFSET 4 static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -228,7 +229,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, pipe, x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, seq, timeout); - if (r < 1 || !*status_ptr) { + + /* + * status_ptr[31:0] == 0 (fail) or status_ptr[63:0] == 1 (success). + * If status_ptr[31:0] == 0 then status_ptr[63:32] will have debug error information. + */ + if (r < 1 || !(lower_32_bits(*status_ptr))) { if (misc_op_str) dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", @@ -1899,8 +1905,9 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; - adev->mes.hung_queue_db_array_size = - MES12_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET; + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { r = amdgpu_mes_init_microcode(adev, pipe); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 64b240b51f1a..a9be7a505026 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -142,13 +142,37 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) return err; } -static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +static int psp_v11_wait_for_tos_unload(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg1, sol_reg2; + int retry_loop; + /* Wait for the TOS to be unloaded */ + for (retry_loop = 0; retry_loop < 20; retry_loop++) { + sol_reg1 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + usleep_range(1000, 2000); + sol_reg2 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg1 == sol_reg2) + return 0; + } + dev_err(adev->dev, "TOS unload failed, C2PMSG_33: %x C2PMSG_81: %x", + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_33), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81)); + + return -ETIME; +} + +static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; int ret; int retry_loop; + /* For a reset done at the end of S3, only wait for TOS to be unloaded */ + if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev)) + return psp_v11_wait_for_tos_unload(psp); + for (retry_loop = 0; retry_loop < 20; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 6c5c7c1bf5ed..6e7bc983fc0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1209,6 +1209,15 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, pr_debug_ratelimited("Evicting process pid %d queues\n", pdd->process->lead_thread->pid); + if (dqm->dev->kfd->shared_resources.enable_mes) { + pdd->last_evict_timestamp = get_jiffies_64(); + retval = suspend_all_queues_mes(dqm); + if (retval) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + } + /* Mark all queues as evicted. Deactivate all active queues on * the qpd. */ @@ -1221,23 +1230,27 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - int err; - - err = remove_queue_mes(dqm, q, qpd); - if (err) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - retval = err; + goto out; } } } - pdd->last_evict_timestamp = get_jiffies_64(); - if (!dqm->dev->kfd->shared_resources.enable_mes) + + if (!dqm->dev->kfd->shared_resources.enable_mes) { + pdd->last_evict_timestamp = get_jiffies_64(); retval = execute_queues_cpsch(dqm, qpd->is_debug ? KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); + } else { + retval = resume_all_queues_mes(dqm); + if (retval) + dev_err(dev, "Resuming all queues failed"); + } out: dqm_unlock(dqm); @@ -3098,61 +3111,17 @@ out: return ret; } -static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct device *dev = dqm->dev->adev->dev; - int ret = 0; - - /* Check if process is already evicted */ - dqm_lock(dqm); - if (qpd->evicted) { - /* Increment the evicted count to make sure the - * process stays evicted before its terminated. - */ - qpd->evicted++; - dqm_unlock(dqm); - goto out; - } - dqm_unlock(dqm); - - ret = suspend_all_queues_mes(dqm); - if (ret) { - dev_err(dev, "Suspending all queues failed"); - goto out; - } - - ret = dqm->ops.evict_process_queues(dqm, qpd); - if (ret) { - dev_err(dev, "Evicting process queues failed"); - goto out; - } - - ret = resume_all_queues_mes(dqm); - if (ret) - dev_err(dev, "Resuming all queues failed"); - -out: - return ret; -} - int kfd_evict_process_device(struct kfd_process_device *pdd) { struct device_queue_manager *dqm; struct kfd_process *p; - int ret = 0; p = pdd->process; dqm = pdd->dev->dqm; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); - if (dqm->dev->kfd->shared_resources.enable_mes) - ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); - else - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); - - return ret; + return dqm->ops.evict_process_queues(dqm, &pdd->qpd); } int reserve_debug_trap_vmid(struct device_queue_manager *dqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index a65c67cf56ff..f1e7583650c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -297,16 +297,16 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope goto out_err_unreserve; } - if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { - pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", + if (properties->ctx_save_restore_area_size < topo_dev->node_props.cwsr_size) { + pr_debug("queue cwsr size 0x%x not sufficient for node cwsr size 0x%x\n", properties->ctx_save_restore_area_size, topo_dev->node_props.cwsr_size); err = -EINVAL; goto out_err_unreserve; } - total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) - * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = (properties->ctx_save_restore_area_size + + topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask); total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, @@ -352,8 +352,8 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope topo_dev = kfd_topology_device_by_id(pdd->dev->id); if (!topo_dev) return -EINVAL; - total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) - * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = (properties->ctx_save_restore_area_size + + topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask); total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 9d72411c3379..74a1d3e1d52b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3687,6 +3687,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping); /* TODO: unmap ranges from GPU that lost access */ } + update_mapping |= !p->xnack_enabled && !list_empty(&remap_list); + list_for_each_entry_safe(prange, next, &remove_list, update_list) { pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0d03e324d5b9..91c0188a29b2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -551,13 +551,13 @@ static void schedule_dc_vmin_vmax(struct amdgpu_device *adev, struct dc_stream_state *stream, struct dc_crtc_timing_adjust *adjust) { - struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL); + struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_NOWAIT); if (!offload_work) { drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n"); return; } - struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_KERNEL); + struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_NOWAIT); if (!adjust_copy) { drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n"); kfree(offload_work); @@ -2085,8 +2085,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); - adev->dm.restore_backlight = true; - adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n"); @@ -3442,7 +3440,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); dc_resume(dm->dc); - adev->dm.restore_backlight = true; amdgpu_dm_irq_resume_early(adev); @@ -3566,6 +3563,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) /* Do mst topology probing after resuming cached state*/ drm_connector_list_iter_begin(ddev, &iter); drm_for_each_connector_iter(connector, &iter) { + bool init = false; if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; @@ -3575,7 +3573,14 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) aconnector->mst_root) continue; - drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr); + scoped_guard(mutex, &aconnector->mst_mgr.lock) { + init = !aconnector->mst_mgr.mst_primary; + } + if (init) + dm_helpers_dp_mst_start_top_mgr(aconnector->dc_link->ctx, + aconnector->dc_link, false); + else + drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr); } drm_connector_list_iter_end(&iter); @@ -8033,7 +8038,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, "mode %dx%d@%dHz is not native, enabling scaling\n", adjusted_mode->hdisplay, adjusted_mode->vdisplay, drm_mode_vrefresh(adjusted_mode)); - dm_new_connector_state->scaling = RMX_FULL; + dm_new_connector_state->scaling = RMX_ASPECT; } return 0; } @@ -9969,6 +9974,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, bool mode_set_reset_required = false; u32 i; struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count}; + bool set_backlight_level = false; /* Disable writeback */ for_each_old_connector_in_state(state, connector, old_con_state, i) { @@ -10088,6 +10094,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->hw_mode = new_crtc_state->mode; crtc->hwmode = new_crtc_state->mode; mode_set_reset_required = true; + set_backlight_level = true; } else if (modereset_required(new_crtc_state)) { drm_dbg_atomic(dev, "Atomic commit: RESET. crtc id %d:[%p]\n", @@ -10144,16 +10151,13 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, * to fix a flicker issue. * It will cause the dm->actual_brightness is not the current panel brightness * level. (the dm->brightness is the correct panel level) - * So we set the backlight level with dm->brightness value after initial - * set mode. Use restore_backlight flag to avoid setting backlight level - * for every subsequent mode set. + * So we set the backlight level with dm->brightness value after set mode */ - if (dm->restore_backlight) { + if (set_backlight_level) { for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i]) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } - dm->restore_backlight = false; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 009f206226f0..db75e991ac7b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -631,13 +631,6 @@ struct amdgpu_display_manager { u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; /** - * @restore_backlight: - * - * Flag to indicate whether to restore backlight after modeset. - */ - bool restore_backlight; - - /** * @aux_hpd_discon_quirk: * * quirk for hpd discon while aux is on-going. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 1ec9d03ad747..38f9ea313dcb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -248,6 +248,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) struct vblank_control_work *vblank_work = container_of(work, struct vblank_control_work, work); struct amdgpu_display_manager *dm = vblank_work->dm; + struct amdgpu_device *adev = drm_to_adev(dm->ddev); + int r; mutex_lock(&dm->dc_lock); @@ -277,7 +279,16 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) if (dm->active_vblank_irq_count == 0) { dc_post_update_surfaces_to_stream(dm->dc); + + r = amdgpu_dpm_pause_power_profile(adev, true); + if (r) + dev_warn(adev->dev, "failed to set default power profile mode\n"); + dc_allow_idle_optimizations(dm->dc, true); + + r = amdgpu_dpm_pause_power_profile(adev, false); + if (r) + dev_warn(adev->dev, "failed to restore the power profile mode\n"); } mutex_unlock(&dm->dc_lock); @@ -297,8 +308,12 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) int irq_type; int rc = 0; - if (acrtc->otg_inst == -1) - goto skip; + if (enable && !acrtc->base.enabled) { + drm_dbg_vbl(crtc->dev, + "Reject vblank enable on unconfigured CRTC %d (enabled=%d)\n", + acrtc->crtc_id, acrtc->base.enabled); + return -EINVAL; + } irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); @@ -383,7 +398,7 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) return rc; } #endif -skip: + if (amdgpu_in_reset(adev)) return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index f263e1a4537e..00dac862b665 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1302,7 +1302,8 @@ static int odm_combine_segments_show(struct seq_file *m, void *unused) if (connector->status != connector_status_connected) return -ENODEV; - if (pipe_ctx != NULL && pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments) + if (pipe_ctx && pipe_ctx->stream_res.tg && + pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments) pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments(pipe_ctx->stream_res.tg, &segments); seq_printf(m, "%d\n", segments); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index fe100e4c9801..cc21337a182f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -83,6 +83,7 @@ static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct edid_caps->panel_patch.remove_sink_ext_caps = true; break; case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154): + case drm_edid_encode_panel_id('S', 'D', 'C', 0x4171): drm_dbg_driver(dev, "Disabling VSC on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_colorimetry = true; break; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index 09be2a90cc79..4f569cd8a5d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -578,9 +578,6 @@ static void dpp3_power_on_blnd_lut( dpp_base->ctx->dc->optimized_required = true; dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; } - } else { - REG_SET(CM_MEM_PWR_CTRL, 0, - BLNDGAM_MEM_PWR_FORCE, power_on == true ? 0 : 1); } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 7c276c319086..ce3d0b45fb4c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -200,6 +200,9 @@ void dcn401_init_hw(struct dc *dc) */ struct dc_link *link = dc->links[i]; + if (link->ep_type != DISPLAY_ENDPOINT_PHY) + continue; + link->link_enc->funcs->hw_init(link->link_enc); /* Check for enabled DIG to identify enabled display */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 41c76ba9ba56..62a39204fe0b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -44,7 +44,13 @@ */ #define MAX_PIPES 6 #define MAX_PHANTOM_PIPES (MAX_PIPES / 2) -#define MAX_LINKS (MAX_PIPES * 2 +2) + +#define MAX_DPIA 6 +#define MAX_CONNECTOR 6 +#define MAX_VIRTUAL_LINKS 4 + +#define MAX_LINKS (MAX_DPIA + MAX_CONNECTOR + MAX_VIRTUAL_LINKS) + #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 #define MAX_HPO_DP2_ENCODERS 4 diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 9e33bf937a69..2676ae9f6fe8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -78,6 +78,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, struct audio_output audio_output[MAX_PIPES]; struct dc_stream_state *streams_on_link[MAX_PIPES]; int num_streams_on_link = 0; + struct dc *dc = (struct dc *)link->dc; needs_divider_update = (link->dc->link_srv->dp_get_encoding_format(link_setting) != link->dc->link_srv->dp_get_encoding_format((const struct dc_link_settings *) &link->cur_link_settings)); @@ -150,7 +151,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, if (streams_on_link[i] && streams_on_link[i]->link && streams_on_link[i]->link == link) { stream_update.stream = streams_on_link[i]; stream_update.dpms_off = &dpms_off; - dc_update_planes_and_stream(state->clk_mgr->ctx->dc, NULL, 0, streams_on_link[i], &stream_update); + dc_update_planes_and_stream(dc, NULL, 0, streams_on_link[i], &stream_update); } } } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 85303167a553..1173c53359b0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1141,6 +1141,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, !sink->edid_caps.edid_hdmi) sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; else if (dc_is_dvi_signal(sink->sink_signal) && + dc_is_dvi_signal(link->connector_signal) && aud_support->hdmi_audio_native && sink->edid_caps.edid_hdmi) sink->sink_signal = SIGNAL_TYPE_HDMI_TYPE_A; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index ce421bcddcb0..1aae46d703ba 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -1260,6 +1260,17 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync, update_v_total_for_static_ramp( core_freesync, stream, in_out_vrr); } + + /* + * If VRR is inactive, set vtotal min and max to nominal vtotal + */ + if (in_out_vrr->state == VRR_STATE_INACTIVE) { + in_out_vrr->adjust.v_total_min = + mod_freesync_calc_v_total_from_refresh(stream, + in_out_vrr->max_refresh_in_uhz); + in_out_vrr->adjust.v_total_max = in_out_vrr->adjust.v_total_min; + return; + } } unsigned long long mod_freesync_calc_nominal_field_rate( diff --git a/drivers/gpu/drm/amd/include/amd_cper.h b/drivers/gpu/drm/amd/include/amd_cper.h index 086869264425..a252ee4c7874 100644 --- a/drivers/gpu/drm/amd/include/amd_cper.h +++ b/drivers/gpu/drm/amd/include/amd_cper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2025 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h index 64b553e7de1a..e7fdcee22a71 100644 --- a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h +++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 518d07afc7df..bc29a923fa6e 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -195,24 +195,6 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, return ret; } -int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en) -{ - int ret = 0; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (pp_funcs && pp_funcs->notify_rlc_state) { - mutex_lock(&adev->pm.mutex); - - ret = pp_funcs->notify_rlc_state( - adev->powerplay.pp_handle, - en); - - mutex_unlock(&adev->pm.mutex); - } - - return ret; -} - int amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index b5fbb0fd1dc0..a7e6d7854b7b 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4724,14 +4724,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) ret = devm_device_add_group(adev->dev, &amdgpu_pm_policy_attr_group); if (ret) - goto err_out0; + goto err_out1; } if (amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) { ret = devm_device_add_group(adev->dev, &amdgpu_board_attr_group); if (ret) - goto err_out0; + goto err_out1; if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, (void *)&tmp) != -EOPNOTSUPP) { sysfs_add_file_to_group(&adev->dev->kobj, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 65c1d98af26c..af48aead12f7 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -424,8 +424,6 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, enum pp_mp1_state mp1_state); -int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en); - int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev); int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index cf9932e68055..3a9522c17fee 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3500,6 +3500,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, * for these GPUs to calculate bandwidth requirements. */ if (high_pixelclock_count) { + /* Work around flickering lines at the bottom edge + * of the screen when using a single 4K 60Hz monitor. + */ + disable_mclk_switching = true; + /* On Oland, we observe some flickering when two 4K 60Hz * displays are connected, possibly because voltage is too low. * Raise the voltage by requiring a higher SCLK. diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 8da882c51856..9b28c0728269 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -5444,8 +5444,7 @@ static int smu7_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = table_info->cac_dtp_table->usSoftwareShutdownTemp * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; else if (hwmgr->pp_table_version == PP_TABLE_V0) - thermal_data->max = data->thermal_temp_setting.temperature_shutdown * - PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->max = data->thermal_temp_setting.temperature_shutdown; thermal_data->sw_ctf_threshold = thermal_data->max; diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c index d2dbd90bb427..0a876c840c79 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c @@ -2024,7 +2024,7 @@ static int fiji_init_smc_table(struct pp_hwmgr *hwmgr) table->VoltageResponseTime = 0; table->PhaseResponseTime = 0; table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; /* 0:Gen1 1:Gen2 2:Gen3*/ + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); table->PCIeGenInterval = 1; table->VRConfig = 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c index 1f50f1e74c48..aa3ae9b115c4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c @@ -2028,7 +2028,7 @@ static int iceland_init_smc_table(struct pp_hwmgr *hwmgr) table->VoltageResponseTime = 0; table->PhaseResponseTime = 0; table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); table->PCIeGenInterval = 1; result = iceland_populate_smc_svi2_config(hwmgr, table); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fb8086859857..244b8c364d45 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2040,6 +2040,12 @@ static int smu_disable_dpms(struct smu_context *smu) smu->is_apu && (amdgpu_in_reset(adev) || adev->in_s0ix)) return 0; + /* vangogh s0ix */ + if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 5, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 5, 2)) && + adev->in_s0ix) + return 0; + /* * For gpu reset, runpm and hibernation through BACO, * BACO feature has to be kept enabled. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 2c9869feba61..0708d0f0938b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -2217,6 +2217,9 @@ static int vangogh_post_smu_init(struct smu_context *smu) uint32_t total_cu = adev->gfx.config.max_cu_per_sh * adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines; + if (adev->in_s0ix) + return 0; + /* allow message will be sent after enable message on Vangogh*/ if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index f532f7c69259..a8961a8f5c42 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -969,7 +969,7 @@ int smu_cmn_update_table(struct smu_context *smu, table_index); uint32_t table_size; int ret = 0; - if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0) + if (!table_data || table_index >= SMU_TABLE_COUNT || table_id < 0) return -EINVAL; table_size = smu_table->tables[table_index].size; diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index c15aef014f69..d41bd876167c 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -282,13 +282,13 @@ static inline void __ast_write8_i(void __iomem *addr, u32 reg, u8 index, u8 val) __ast_write8(addr, reg + 1, val); } -static inline void __ast_write8_i_masked(void __iomem *addr, u32 reg, u8 index, u8 read_mask, +static inline void __ast_write8_i_masked(void __iomem *addr, u32 reg, u8 index, u8 preserve_mask, u8 val) { - u8 tmp = __ast_read8_i_masked(addr, reg, index, read_mask); + u8 tmp = __ast_read8_i_masked(addr, reg, index, preserve_mask); - tmp |= val; - __ast_write8_i(addr, reg, index, tmp); + val &= ~preserve_mask; + __ast_write8_i(addr, reg, index, tmp | val); } static inline u32 ast_read32(struct ast_device *ast, u32 reg) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index b4e8edc7c767..30b011ed0a05 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -836,22 +836,24 @@ ast_crtc_helper_atomic_flush(struct drm_crtc *crtc, static void ast_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct ast_device *ast = to_ast_device(crtc->dev); + u8 vgacr17 = 0x00; + u8 vgacrb6 = 0xff; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, 0x00); - ast_set_index_reg_mask(ast, AST_IO_VGASRI, 0x01, 0xdf, 0x00); + vgacr17 |= AST_IO_VGACR17_SYNC_ENABLE; + vgacrb6 &= ~(AST_IO_VGACRB6_VSYNC_OFF | AST_IO_VGACRB6_HSYNC_OFF); + + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x17, 0x7f, vgacr17); + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, vgacrb6); } static void ast_crtc_helper_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); struct ast_device *ast = to_ast_device(crtc->dev); - u8 vgacrb6; + u8 vgacr17 = 0xff; - ast_set_index_reg_mask(ast, AST_IO_VGASRI, 0x01, 0xdf, AST_IO_VGASR1_SD); - - vgacrb6 = AST_IO_VGACRB6_VSYNC_OFF | - AST_IO_VGACRB6_HSYNC_OFF; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, vgacrb6); + vgacr17 &= ~AST_IO_VGACR17_SYNC_ENABLE; + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x17, 0x7f, vgacr17); /* * HW cursors require the underlying primary plane and CRTC to diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h index e15adaf3a80e..30578e3b07e4 100644 --- a/drivers/gpu/drm/ast/ast_reg.h +++ b/drivers/gpu/drm/ast/ast_reg.h @@ -29,6 +29,7 @@ #define AST_IO_VGAGRI (0x4E) #define AST_IO_VGACRI (0x54) +#define AST_IO_VGACR17_SYNC_ENABLE BIT(7) /* called "Hardware reset" in docs */ #define AST_IO_VGACR80_PASSWORD (0xa8) #define AST_IO_VGACR99_VGAMEM_RSRV_MASK GENMASK(1, 0) #define AST_IO_VGACRA1_VGAIO_DISABLED BIT(1) diff --git a/drivers/gpu/drm/bridge/lontium-lt9211.c b/drivers/gpu/drm/bridge/lontium-lt9211.c index 399fa7eebd49..03fc8fd10f20 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9211.c +++ b/drivers/gpu/drm/bridge/lontium-lt9211.c @@ -121,8 +121,7 @@ static int lt9211_read_chipid(struct lt9211 *ctx) } /* Test for known Chip ID. */ - if (chipid[0] != REG_CHIPID0_VALUE || chipid[1] != REG_CHIPID1_VALUE || - chipid[2] != REG_CHIPID2_VALUE) { + if (chipid[0] != REG_CHIPID0_VALUE || chipid[1] != REG_CHIPID1_VALUE) { dev_err(ctx->dev, "Unknown Chip ID: 0x%02x 0x%02x 0x%02x\n", chipid[0], chipid[1], chipid[2]); return -EINVAL; diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml index d502d146b177..56638814bb28 100644 --- a/drivers/gpu/drm/ci/gitlab-ci.yml +++ b/drivers/gpu/drm/ci/gitlab-ci.yml @@ -280,7 +280,7 @@ sanity: GIT_STRATEGY: none script: # ci-fairy check-commits --junit-xml=check-commits.xml - - ci-fairy check-merge-request --require-allow-collaboration --junit-xml=check-merge-request.xml + # - ci-fairy check-merge-request --require-allow-collaboration --junit-xml=check-merge-request.xml - | set -eu image_tags=( diff --git a/drivers/gpu/drm/clients/drm_client_setup.c b/drivers/gpu/drm/clients/drm_client_setup.c index 72480db1f00d..515aceac22b1 100644 --- a/drivers/gpu/drm/clients/drm_client_setup.c +++ b/drivers/gpu/drm/clients/drm_client_setup.c @@ -13,8 +13,8 @@ static char drm_client_default[16] = CONFIG_DRM_CLIENT_DEFAULT; module_param_string(active, drm_client_default, sizeof(drm_client_default), 0444); MODULE_PARM_DESC(active, - "Choose which drm client to start, default is" - CONFIG_DRM_CLIENT_DEFAULT "]"); + "Choose which drm client to start, default is " + CONFIG_DRM_CLIENT_DEFAULT); /** * drm_client_setup() - Setup in-kernel DRM clients diff --git a/drivers/gpu/drm/drm_draw.c b/drivers/gpu/drm/drm_draw.c index 9dc0408fbbea..5b956229c82f 100644 --- a/drivers/gpu/drm/drm_draw.c +++ b/drivers/gpu/drm/drm_draw.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL(drm_draw_fill16); void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, - u16 color) + u32 color) { unsigned int y, x; diff --git a/drivers/gpu/drm/drm_draw_internal.h b/drivers/gpu/drm/drm_draw_internal.h index f121ee7339dc..20cb404e23ea 100644 --- a/drivers/gpu/drm/drm_draw_internal.h +++ b/drivers/gpu/drm/drm_draw_internal.h @@ -47,7 +47,7 @@ void drm_draw_fill16(struct iosys_map *dmap, unsigned int dpitch, void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, - u16 color); + u32 color); void drm_draw_fill32(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index ebf305fb24f0..6fb55601252f 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -310,8 +310,12 @@ EXPORT_SYMBOL(drm_gem_destroy_shadow_plane_state); void __drm_gem_reset_shadow_plane(struct drm_plane *plane, struct drm_shadow_plane_state *shadow_plane_state) { - __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); - drm_format_conv_state_init(&shadow_plane_state->fmtcnv_state); + if (shadow_plane_state) { + __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); + drm_format_conv_state_init(&shadow_plane_state->fmtcnv_state); + } else { + __drm_atomic_helper_plane_reset(plane, NULL); + } } EXPORT_SYMBOL(__drm_gem_reset_shadow_plane); diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 1d6312fa1429..d4b6ea42db0f 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -174,6 +174,33 @@ static void drm_panic_write_pixel24(void *vaddr, unsigned int offset, u32 color) *p = color & 0xff; } +/* + * Special case if the pixel crosses page boundaries + */ +static void drm_panic_write_pixel24_xpage(void *vaddr, struct page *next_page, + unsigned int offset, u32 color) +{ + u8 *vaddr2; + u8 *p = vaddr + offset; + + vaddr2 = kmap_local_page_try_from_panic(next_page); + + *p++ = color & 0xff; + color >>= 8; + + if (offset == PAGE_SIZE - 1) + p = vaddr2; + + *p++ = color & 0xff; + color >>= 8; + + if (offset == PAGE_SIZE - 2) + p = vaddr2; + + *p = color & 0xff; + kunmap_local(vaddr2); +} + static void drm_panic_write_pixel32(void *vaddr, unsigned int offset, u32 color) { u32 *p = vaddr + offset; @@ -231,7 +258,14 @@ static void drm_panic_blit_page(struct page **pages, unsigned int dpitch, page = new_page; vaddr = kmap_local_page_try_from_panic(pages[page]); } - if (vaddr) + if (!vaddr) + continue; + + // Special case for 24bit, as a pixel might cross page boundaries + if (cpp == 3 && offset + 3 > PAGE_SIZE) + drm_panic_write_pixel24_xpage(vaddr, pages[page + 1], + offset, fg32); + else drm_panic_write_pixel(vaddr, offset, fg32, cpp); } } @@ -321,7 +355,15 @@ static void drm_panic_fill_page(struct page **pages, unsigned int dpitch, page = new_page; vaddr = kmap_local_page_try_from_panic(pages[page]); } - drm_panic_write_pixel(vaddr, offset, color, cpp); + if (!vaddr) + continue; + + // Special case for 24bit, as a pixel might cross page boundaries + if (cpp == 3 && offset + 3 > PAGE_SIZE) + drm_panic_write_pixel24_xpage(vaddr, pages[page + 1], + offset, color); + else + drm_panic_write_pixel(vaddr, offset, color, cpp); } } if (vaddr) @@ -429,6 +471,9 @@ static void drm_panic_logo_rect(struct drm_rect *rect, const struct font_desc *f static void drm_panic_logo_draw(struct drm_scanout_buffer *sb, struct drm_rect *rect, const struct font_desc *font, u32 fg_color) { + if (rect->x2 > sb->width || rect->y2 > sb->height) + return; + if (logo_mono) drm_panic_blit(sb, rect, logo_mono->data, DIV_ROUND_UP(drm_rect_width(rect), 8), 1, fg_color); @@ -477,7 +522,7 @@ static int draw_line_with_wrap(struct drm_scanout_buffer *sb, const struct font_ struct drm_panic_line *line, int yoffset, u32 fg_color) { int chars_per_row = sb->width / font->width; - struct drm_rect r_txt = DRM_RECT_INIT(0, yoffset, sb->width, sb->height); + struct drm_rect r_txt = DRM_RECT_INIT(0, yoffset, sb->width, font->height); struct drm_panic_line line_wrap; if (line->len > chars_per_row) { @@ -520,7 +565,7 @@ static void draw_panic_static_kmsg(struct drm_scanout_buffer *sb) struct drm_panic_line line; int yoffset; - if (!font) + if (!font || font->width > sb->width) return; yoffset = sb->height - font->height - (sb->height % font->height) / 2; @@ -733,7 +778,10 @@ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) pr_debug("QR width %d and scale %d\n", qr_width, scale); r_qr_canvas = DRM_RECT_INIT(0, 0, qr_canvas_width * scale, qr_canvas_width * scale); - v_margin = (sb->height - drm_rect_height(&r_qr_canvas) - drm_rect_height(&r_msg)) / 5; + v_margin = sb->height - drm_rect_height(&r_qr_canvas) - drm_rect_height(&r_msg); + if (v_margin < 0) + return -ENOSPC; + v_margin /= 5; drm_rect_translate(&r_qr_canvas, (sb->width - r_qr_canvas.x2) / 2, 2 * v_margin); r_qr = DRM_RECT_INIT(r_qr_canvas.x1 + QR_MARGIN * scale, r_qr_canvas.y1 + QR_MARGIN * scale, @@ -746,7 +794,7 @@ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) /* Fill with the background color, and draw text on top */ drm_panic_fill(sb, &r_screen, bg_color); - if (!drm_rect_overlap(&r_logo, &r_msg) && !drm_rect_overlap(&r_logo, &r_qr)) + if (!drm_rect_overlap(&r_logo, &r_msg) && !drm_rect_overlap(&r_logo, &r_qr_canvas)) drm_panic_logo_draw(sb, &r_logo, font, fg_color); draw_txt_rectangle(sb, font, panic_msg, panic_msg_lines, true, &r_msg, fg_color); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index b13a17276d07..88385dc3b30d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -347,7 +347,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, u32 link_target, link_dwords; bool switch_context = gpu->exec_state != exec_state; bool switch_mmu_context = gpu->mmu_context != mmu_context; - unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq); + unsigned int new_flush_seq = READ_ONCE(mmu_context->flush_seq); bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq; bool has_blt = !!(gpu->identity.minor_features5 & chipMinorFeatures5_BLT_ENGINE); diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e58c0c158b3a..b91575380708 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -413,7 +413,7 @@ obj-$(CONFIG_DRM_I915_GVT_KVMGT) += kvmgt.o # # Enable locally for CONFIG_DRM_I915_WERROR=y. See also scripts/Makefile.build ifdef CONFIG_DRM_I915_WERROR - cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none -Werror $< + cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none -Werror $< endif # header test diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 77a0199f9ea5..4a4cace1f879 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -546,6 +546,36 @@ static bool is_event_handler(struct intel_display *display, REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == event_id; } +static bool fixup_dmc_evt(struct intel_display *display, + enum intel_dmc_id dmc_id, + i915_reg_t reg_ctl, u32 *data_ctl, + i915_reg_t reg_htp, u32 *data_htp) +{ + if (!is_dmc_evt_ctl_reg(display, dmc_id, reg_ctl)) + return false; + + if (!is_dmc_evt_htp_reg(display, dmc_id, reg_htp)) + return false; + + /* make sure reg_ctl and reg_htp are for the same event */ + if (i915_mmio_reg_offset(reg_ctl) - i915_mmio_reg_offset(DMC_EVT_CTL(display, dmc_id, 0)) != + i915_mmio_reg_offset(reg_htp) - i915_mmio_reg_offset(DMC_EVT_HTP(display, dmc_id, 0))) + return false; + + /* + * On ADL-S the HRR event handler is not restored after DC6. + * Clear it to zero from the beginning to avoid mismatches later. + */ + if (display->platform.alderlake_s && dmc_id == DMC_FW_MAIN && + is_event_handler(display, dmc_id, MAINDMC_EVENT_VBLANK_A, reg_ctl, *data_ctl)) { + *data_ctl = 0; + *data_htp = 0; + return true; + } + + return false; +} + static bool disable_dmc_evt(struct intel_display *display, enum intel_dmc_id dmc_id, i915_reg_t reg, u32 data) @@ -1064,9 +1094,32 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, for (i = 0; i < mmio_count; i++) { dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); dmc_info->mmiodata[i] = mmiodata[i]; + } + + for (i = 0; i < mmio_count - 1; i++) { + u32 orig_mmiodata[2] = { + dmc_info->mmiodata[i], + dmc_info->mmiodata[i+1], + }; + + if (!fixup_dmc_evt(display, dmc_id, + dmc_info->mmioaddr[i], &dmc_info->mmiodata[i], + dmc_info->mmioaddr[i+1], &dmc_info->mmiodata[i+1])) + continue; + + drm_dbg_kms(display->drm, + " mmio[%d]: 0x%x = 0x%x->0x%x (EVT_CTL)\n", + i, i915_mmio_reg_offset(dmc_info->mmioaddr[i]), + orig_mmiodata[0], dmc_info->mmiodata[i]); + drm_dbg_kms(display->drm, + " mmio[%d]: 0x%x = 0x%x->0x%x (EVT_HTP)\n", + i+1, i915_mmio_reg_offset(dmc_info->mmioaddr[i+1]), + orig_mmiodata[1], dmc_info->mmiodata[i+1]); + } + for (i = 0; i < mmio_count; i++) { drm_dbg_kms(display->drm, " mmio[%d]: 0x%x = 0x%x%s%s\n", - i, mmioaddr[i], mmiodata[i], + i, i915_mmio_reg_offset(dmc_info->mmioaddr[i]), dmc_info->mmiodata[i], is_dmc_evt_ctl_reg(display, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_CTL)" : is_dmc_evt_htp_reg(display, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_HTP)" : "", disable_dmc_evt(display, dmc_id, dmc_info->mmioaddr[i], diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 22a4a1575d22..c48384e58ea1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2113,10 +2113,11 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); - intel_frontbuffer_put(intel_fb->frontbuffer); - intel_fb_bo_framebuffer_fini(intel_fb_bo(fb)); + intel_frontbuffer_put(intel_fb->frontbuffer); + + kfree(intel_fb->panic); kfree(intel_fb); } @@ -2215,19 +2216,27 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct intel_display *display = to_intel_display(obj->dev); struct drm_framebuffer *fb = &intel_fb->base; u32 max_stride; - int ret = -EINVAL; + int ret; int i; - ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); - if (ret) - return ret; + intel_fb->panic = intel_panic_alloc(); + if (!intel_fb->panic) + return -ENOMEM; + /* + * intel_frontbuffer_get() must be done before + * intel_fb_bo_framebuffer_init() to avoid set_tiling vs. addfb race. + */ intel_fb->frontbuffer = intel_frontbuffer_get(obj); if (!intel_fb->frontbuffer) { ret = -ENOMEM; - goto err; + goto err_free_panic; } + ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); + if (ret) + goto err_frontbuffer_put; + ret = -EINVAL; if (!drm_any_plane_has_format(display->drm, mode_cmd->pixel_format, @@ -2235,7 +2244,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "unsupported pixel format %p4cc / modifier 0x%llx\n", &mode_cmd->pixel_format, mode_cmd->modifier[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } max_stride = intel_fb_max_stride(display, mode_cmd->pixel_format, @@ -2246,7 +2255,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], max_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -2254,7 +2263,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane 0 offset (0x%08x) must be 0\n", mode_cmd->offsets[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } drm_helper_mode_fill_fb_struct(display->drm, fb, info, mode_cmd); @@ -2264,7 +2273,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(display->drm, "bad plane %d handle\n", i); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } stride_alignment = intel_fb_stride_alignment(fb, i); @@ -2272,7 +2281,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) { @@ -2282,7 +2291,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "ccs aux plane %d pitch (%d) must be %d\n", i, fb->pitches[i], ccs_aux_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } } @@ -2291,7 +2300,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, ret = intel_fill_fb_info(display, intel_fb); if (ret) - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; if (intel_fb_uses_dpt(fb)) { struct i915_address_space *vm; @@ -2317,10 +2326,13 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, err_free_dpt: if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); +err_bo_framebuffer_fini: + intel_fb_bo_framebuffer_fini(obj); err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); -err: - intel_fb_bo_framebuffer_fini(obj); +err_free_panic: + kfree(intel_fb->panic); + return ret; } @@ -2347,20 +2359,11 @@ intel_user_framebuffer_create(struct drm_device *dev, struct intel_framebuffer *intel_framebuffer_alloc(void) { struct intel_framebuffer *intel_fb; - struct intel_panic *panic; intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); if (!intel_fb) return NULL; - panic = intel_panic_alloc(); - if (!panic) { - kfree(intel_fb); - return NULL; - } - - intel_fb->panic = panic; - return intel_fb; } diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 43be5377ddc1..73ed28ac9573 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -270,6 +270,8 @@ static void frontbuffer_release(struct kref *ref) spin_unlock(&display->fb_tracking.lock); i915_active_fini(&front->write); + + drm_gem_object_put(obj); kfree_rcu(front, rcu); } @@ -287,6 +289,8 @@ intel_frontbuffer_get(struct drm_gem_object *obj) if (!front) return NULL; + drm_gem_object_get(obj); + front->obj = obj; kref_init(&front->ref); atomic_set(&front->bits, 0); @@ -299,8 +303,12 @@ intel_frontbuffer_get(struct drm_gem_object *obj) spin_lock(&display->fb_tracking.lock); cur = intel_bo_set_frontbuffer(obj, front); spin_unlock(&display->fb_tracking.lock); - if (cur != front) + + if (cur != front) { + drm_gem_object_put(obj); kfree(front); + } + return cur; } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 01bf304c705f..4619237f1346 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -585,6 +585,10 @@ static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); int ret; + /* TODO: Enable Panel Replay on MST once it's properly implemented. */ + if (intel_dp->mst_detect == DRM_DP_MST) + return; + ret = drm_dp_dpcd_read_data(&intel_dp->aux, DP_PANEL_REPLAY_CAP_SUPPORT, &intel_dp->pr_dpcd, sizeof(intel_dp->pr_dpcd)); if (ret < 0) @@ -888,7 +892,8 @@ static bool is_dc5_dc6_blocked(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); u32 current_dc_state = intel_display_power_get_current_dc_state(display); - struct drm_vblank_crtc *vblank = &display->drm->vblank[intel_dp->psr.pipe]; + struct intel_crtc *crtc = intel_crtc_for_pipe(display, intel_dp->psr.pipe); + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); return (current_dc_state != DC_STATE_EN_UPTO_DC5 && current_dc_state != DC_STATE_EN_UPTO_DC6) || @@ -3402,6 +3407,7 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { + /* Selective fetch prior LNL */ if (intel_dp->psr.psr2_sel_fetch_cff_enabled) { /* can we turn CFF off? */ if (intel_dp->psr.busy_frontbuffer_bits == 0) @@ -3420,12 +3426,19 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) intel_psr_configure_full_frame_update(intel_dp); intel_psr_force_update(intel_dp); + } else if (!intel_dp->psr.psr2_sel_fetch_enabled) { + /* + * PSR1 on all platforms + * PSR2 HW tracking + * Panel Replay Full frame update + */ + intel_psr_force_update(intel_dp); } else { + /* Selective update LNL onwards */ intel_psr_exit(intel_dp); } - if ((!intel_dp->psr.psr2_sel_fetch_enabled || DISPLAY_VER(display) >= 20) && - !intel_dp->psr.busy_frontbuffer_bits) + if (!intel_dp->psr.active && !intel_dp->psr.busy_frontbuffer_bits) queue_work(display->wq.unordered, &intel_dp->psr.work); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h index b6dc3d1b9bb1..b682969e3a29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h @@ -89,12 +89,10 @@ i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj, if (!front) { RCU_INIT_POINTER(obj->frontbuffer, NULL); - drm_gem_object_put(intel_bo_to_drm_bo(obj)); } else if (rcu_access_pointer(obj->frontbuffer)) { cur = rcu_dereference_protected(obj->frontbuffer, true); kref_get(&cur->ref); } else { - drm_gem_object_get(intel_bo_to_drm_bo(obj)); rcu_assign_pointer(obj->frontbuffer, front); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 88b147fa5cb1..c90b35881a26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -205,7 +205,7 @@ static u64 div_u64_roundup(u64 nom, u32 den) u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count) { - return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency); + return mul_u64_u32_div(count, NSEC_PER_SEC, gt->clock_frequency); } u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) @@ -215,7 +215,7 @@ u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns) { - return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC); + return mul_u64_u32_div(ns, gt->clock_frequency, NSEC_PER_SEC); } u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 3e7e5badcc2b..2c651ec024ef 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1325,9 +1325,16 @@ static int ct_receive(struct intel_guc_ct *ct) static void ct_try_receive_message(struct intel_guc_ct *ct) { + struct intel_guc *guc = ct_to_guc(ct); int ret; - if (GEM_WARN_ON(!ct->enabled)) + if (!ct->enabled) { + GEM_WARN_ON(!guc_to_gt(guc)->uc.reset_in_progress); + return; + } + + /* When interrupt disabled, message handling is not expected */ + if (!guc->interrupts.enabled) return; ret = ct_receive(ct); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 25e97031d76e..30d5889fc809 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1595,8 +1595,20 @@ err_unlock: err_vma_res: i915_vma_resource_free(vma_res); err_fence: - if (work) - dma_fence_work_commit_imm(&work->base); + if (work) { + /* + * When pinning VMA to GGTT on CHV or BXT with VTD enabled, + * commit VMA binding asynchronously to avoid risk of lock + * inversion among reservation_ww locks held here and + * cpu_hotplug_lock acquired from stop_machine(), which we + * wrap around GGTT updates when running in those environments. + */ + if (i915_vma_is_ggtt(vma) && + intel_vm_no_concurrent_access_wa(vma->vm->i915)) + dma_fence_work_commit(&work->base); + else + dma_fence_work_commit_imm(&work->base); + } err_rpm: intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig index 682dd2633d0c..0482bfcefdde 100644 --- a/drivers/gpu/drm/imagination/Kconfig +++ b/drivers/gpu/drm/imagination/Kconfig @@ -7,6 +7,7 @@ config DRM_POWERVR depends on DRM depends on MMU depends on PM + depends on POWER_SEQUENCING || !POWER_SEQUENCING select DRM_EXEC select DRM_GEM_SHMEM_HELPER select DRM_SCHED diff --git a/drivers/gpu/drm/imx/ipuv3/parallel-display.c b/drivers/gpu/drm/imx/ipuv3/parallel-display.c index 6d8325c76697..7fc6af703307 100644 --- a/drivers/gpu/drm/imx/ipuv3/parallel-display.c +++ b/drivers/gpu/drm/imx/ipuv3/parallel-display.c @@ -25,19 +25,18 @@ struct imx_parallel_display_encoder { struct drm_encoder encoder; - struct drm_bridge bridge; - struct imx_parallel_display *pd; }; struct imx_parallel_display { struct device *dev; u32 bus_format; struct drm_bridge *next_bridge; + struct drm_bridge bridge; }; static inline struct imx_parallel_display *bridge_to_imxpd(struct drm_bridge *b) { - return container_of(b, struct imx_parallel_display_encoder, bridge)->pd; + return container_of(b, struct imx_parallel_display, bridge); } static const u32 imx_pd_bus_fmts[] = { @@ -195,15 +194,13 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data) if (IS_ERR(imxpd_encoder)) return PTR_ERR(imxpd_encoder); - imxpd_encoder->pd = imxpd; encoder = &imxpd_encoder->encoder; - bridge = &imxpd_encoder->bridge; + bridge = &imxpd->bridge; ret = imx_drm_encoder_parse_of(drm, encoder, imxpd->dev->of_node); if (ret) return ret; - bridge->funcs = &imx_pd_bridge_funcs; drm_bridge_attach(encoder, bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); connector = drm_bridge_connector_init(drm, encoder); @@ -228,9 +225,10 @@ static int imx_pd_probe(struct platform_device *pdev) u32 bus_format = 0; const char *fmt; - imxpd = devm_kzalloc(dev, sizeof(*imxpd), GFP_KERNEL); - if (!imxpd) - return -ENOMEM; + imxpd = devm_drm_bridge_alloc(dev, struct imx_parallel_display, bridge, + &imx_pd_bridge_funcs); + if (IS_ERR(imxpd)) + return PTR_ERR(imxpd); /* port@1 is the output port */ imxpd->next_bridge = devm_drm_of_get_bridge(dev, np, 1, 0); @@ -258,6 +256,8 @@ static int imx_pd_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxpd); + devm_drm_bridge_add(dev, &imxpd->bridge); + return component_add(dev, &imx_pd_ops); } diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c index bc7527542fdc..c4c6d0249df5 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_crtc.c @@ -283,6 +283,10 @@ static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) unsigned int i; unsigned long flags; + /* release GCE HW usage and start autosuspend */ + pm_runtime_mark_last_busy(cmdq_cl->chan->mbox->dev); + pm_runtime_put_autosuspend(cmdq_cl->chan->mbox->dev); + if (data->sta < 0) return; @@ -618,6 +622,9 @@ static void mtk_crtc_update_config(struct mtk_crtc *mtk_crtc, bool needs_vblank) mtk_crtc->config_updating = false; spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + if (pm_runtime_resume_and_get(mtk_crtc->cmdq_client.chan->mbox->dev) < 0) + goto update_config_out; + mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); goto update_config_out; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index eb5537f0ac90..31ff2922758a 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -686,10 +686,6 @@ err_free: for (i = 0; i < private->data->mmsys_dev_num; i++) private->all_drm_private[i]->drm = NULL; err_put_dev: - for (i = 0; i < private->data->mmsys_dev_num; i++) { - /* For device_find_child in mtk_drm_get_all_priv() */ - put_device(private->all_drm_private[i]->dev); - } put_device(private->mutex_dev); return ret; } @@ -697,18 +693,12 @@ err_put_dev: static void mtk_drm_unbind(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); - int i; /* for multi mmsys dev, unregister drm dev in mmsys master */ if (private->drm_master) { drm_dev_unregister(private->drm); mtk_drm_kms_deinit(private->drm); drm_dev_put(private->drm); - - for (i = 0; i < private->data->mmsys_dev_num; i++) { - /* For device_find_child in mtk_drm_get_all_priv() */ - put_device(private->all_drm_private[i]->dev); - } put_device(private->mutex_dev); } private->mtk_drm_bound = false; diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c index 02349bd44001..788b52c1d10c 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_plane.c @@ -21,9 +21,6 @@ static const u64 modifiers[] = { DRM_FORMAT_MOD_LINEAR, - DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | - AFBC_FORMAT_MOD_SPLIT | - AFBC_FORMAT_MOD_SPARSE), DRM_FORMAT_MOD_INVALID, }; @@ -71,26 +68,7 @@ static bool mtk_plane_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) { - if (modifier == DRM_FORMAT_MOD_LINEAR) - return true; - - if (modifier != DRM_FORMAT_MOD_ARM_AFBC( - AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | - AFBC_FORMAT_MOD_SPLIT | - AFBC_FORMAT_MOD_SPARSE)) - return false; - - if (format != DRM_FORMAT_XRGB8888 && - format != DRM_FORMAT_ARGB8888 && - format != DRM_FORMAT_BGRX8888 && - format != DRM_FORMAT_BGRA8888 && - format != DRM_FORMAT_ABGR8888 && - format != DRM_FORMAT_XBGR8888 && - format != DRM_FORMAT_RGB888 && - format != DRM_FORMAT_BGR888) - return false; - - return true; + return modifier == DRM_FORMAT_MOD_LINEAR; } static void mtk_plane_destroy_state(struct drm_plane *plane, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index fc62fef2fed8..4e6dc16e4a4c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -780,6 +780,9 @@ static bool fw_block_mem(struct a6xx_gmu_bo *bo, const struct block_header *blk) return true; } +#define NEXT_BLK(blk) \ + ((const struct block_header *)((const char *)(blk) + sizeof(*(blk)) + (blk)->size)) + static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); @@ -811,7 +814,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) for (blk = (const struct block_header *) fw_image->data; (const u8*) blk < fw_image->data + fw_image->size; - blk = (const struct block_header *) &blk->data[blk->size >> 2]) { + blk = NEXT_BLK(blk)) { if (blk->size == 0) continue; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index afaa3cfefd35..4b5a4edd0702 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -348,13 +348,6 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, return 0; } -static bool -adreno_smmu_has_prr(struct msm_gpu *gpu) -{ - struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&gpu->pdev->dev); - return adreno_smmu && adreno_smmu->set_prr_addr; -} - int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, uint32_t param, uint64_t *value, uint32_t *len) { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 4b970a59deaf..2f8156051d9b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1545,6 +1545,9 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, adjusted_mode_clk = dpu_core_perf_adjusted_mode_clk(mode->clock, dpu_kms->perf.perf_cfg); + if (dpu_kms->catalog->caps->has_3d_merge) + adjusted_mode_clk /= 2; + /* * The given mode, adjusted for the perf clock factor, should not exceed * the max core clock rate diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 6641455c4ec6..9f8d1bba9139 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -267,8 +267,8 @@ static const u32 wb2_formats_rgb_yuv[] = { .base = 0x200, .len = 0xa0,}, \ .csc_blk = {.name = "csc", \ .base = 0x320, .len = 0x100,}, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .format_list = plane_formats, \ + .num_formats = ARRAY_SIZE(plane_formats), \ .rotation_cfg = NULL, \ } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index f54cf0faa1c7..905524ceeb1f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -500,13 +500,15 @@ static void _dpu_plane_setup_pixel_ext(struct dpu_hw_scaler3_cfg *scale_cfg, int i; for (i = 0; i < DPU_MAX_PLANES; i++) { + uint32_t w = src_w, h = src_h; + if (i == DPU_SSPP_COMP_1_2 || i == DPU_SSPP_COMP_2) { - src_w /= chroma_subsmpl_h; - src_h /= chroma_subsmpl_v; + w /= chroma_subsmpl_h; + h /= chroma_subsmpl_v; } - pixel_ext->num_ext_pxls_top[i] = src_h; - pixel_ext->num_ext_pxls_left[i] = src_w; + pixel_ext->num_ext_pxls_top[i] = h; + pixel_ext->num_ext_pxls_left[i] = w; } } @@ -740,7 +742,7 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, * We already have verified scaling against platform limitations. * Now check if the SSPP supports scaling at all. */ - if (!sblk->scaler_blk.len && + if (!(sblk->scaler_blk.len && pipe->sspp->ops.setup_scaler) && ((drm_rect_width(&new_plane_state->src) >> 16 != drm_rect_width(&new_plane_state->dst)) || (drm_rect_height(&new_plane_state->src) >> 16 != @@ -1278,7 +1280,7 @@ int dpu_assign_plane_resources(struct dpu_global_state *global_state, state, plane_state, prev_adjacent_plane_state); if (ret) - break; + return ret; prev_adjacent_plane_state = plane_state; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 2c77c74fac0f..d9c3b0a1d091 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -842,7 +842,7 @@ struct dpu_hw_sspp *dpu_rm_reserve_sspp(struct dpu_rm *rm, if (!reqs->scale && !reqs->yuv) hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_DMA); - if (!hw_sspp && reqs->scale) + if (!hw_sspp && !reqs->yuv) hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_RGB); if (!hw_sspp) hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_VIG); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index cd73468e369a..7545c0293efb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -72,6 +72,9 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector, DPU_ERROR("invalid fb w=%d, maxlinewidth=%u\n", fb->width, dpu_wb_conn->maxlinewidth); return -EINVAL; + } else if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { + DPU_ERROR("unsupported fb modifier:%#llx\n", fb->modifier); + return -EINVAL; } return drm_atomic_helper_check_wb_connector_state(conn_state->connector, conn_state->state); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index e391505fdaf0..3cbf08231492 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -109,7 +109,6 @@ struct msm_dsi_phy { struct msm_dsi_dphy_timing timing; const struct msm_dsi_phy_cfg *cfg; void *tuning_cfg; - void *pll_data; enum msm_dsi_phy_usecase usecase; bool regulator_ldo_mode; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 32f06edd21a9..c5e1d2016bcc 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -426,11 +426,8 @@ static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll) u32 data; spin_lock_irqsave(&pll->pll_enable_lock, flags); - if (pll->pll_enable_cnt++) { - spin_unlock_irqrestore(&pll->pll_enable_lock, flags); - WARN_ON(pll->pll_enable_cnt == INT_MAX); - return; - } + pll->pll_enable_cnt++; + WARN_ON(pll->pll_enable_cnt == INT_MAX); data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); data |= DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; @@ -876,7 +873,6 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) spin_lock_init(&pll_7nm->pll_enable_lock); pll_7nm->phy = phy; - phy->pll_data = pll_7nm; ret = pll_7nm_register(pll_7nm, phy->provided_clocks->hws); if (ret) { @@ -965,10 +961,8 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, u32 const delay_us = 5; u32 const timeout_us = 1000; struct msm_dsi_dphy_timing *timing = &phy->timing; - struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; bool less_than_1500_mhz; - unsigned long flags; u32 vreg_ctrl_0, vreg_ctrl_1, lane_ctrl0; u32 glbl_pemph_ctrl_0; u32 glbl_str_swi_cal_sel_ctrl, glbl_hstx_str_ctrl_0; @@ -1090,13 +1084,10 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, glbl_rescode_bot_ctrl = 0x3c; } - spin_lock_irqsave(&pll->pll_enable_lock, flags); - pll->pll_enable_cnt = 1; /* de-assert digital and pll power down */ data = DSI_7nm_PHY_CMN_CTRL_0_DIGTOP_PWRDN_B | DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); - spin_unlock_irqrestore(&pll->pll_enable_lock, flags); /* Assert PLL core reset */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_PLL_CNTRL); @@ -1209,9 +1200,7 @@ static bool dsi_7nm_set_continuous_clock(struct msm_dsi_phy *phy, bool enable) static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) { - struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; - unsigned long flags; u32 data; DBG(""); @@ -1238,11 +1227,8 @@ static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); writel(0, base + REG_DSI_7nm_PHY_CMN_LANE_CTRL0); - spin_lock_irqsave(&pll->pll_enable_lock, flags); - pll->pll_enable_cnt = 0; /* Turn off all PHY blocks */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_CTRL_0); - spin_unlock_irqrestore(&pll->pll_enable_lock, flags); /* make sure phy is turned off */ wmb(); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 07d8cdd6bb2e..9f7fbe577abb 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1120,12 +1120,16 @@ static void msm_gem_free_object(struct drm_gem_object *obj) put_pages(obj); } - if (obj->resv != &obj->_resv) { + /* + * In error paths, we could end up here before msm_gem_new_handle() + * has changed obj->resv to point to the shared resv. In this case, + * we don't want to drop a ref to the shared r_obj that we haven't + * taken yet. + */ + if ((msm_obj->flags & MSM_BO_NO_SHARE) && (obj->resv != &obj->_resv)) { struct drm_gem_object *r_obj = container_of(obj->resv, struct drm_gem_object, _resv); - WARN_ON(!(msm_obj->flags & MSM_BO_NO_SHARE)); - /* Drop reference we hold to shared resv obj: */ drm_gem_object_put(r_obj); } diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3ab3b27134f9..75d9f3574370 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -414,6 +414,11 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) submit->user_fence, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); + + last_fence = vm->last_fence; + vm->last_fence = dma_fence_unwrap_merge(submit->user_fence, last_fence); + dma_fence_put(last_fence); + return; } @@ -427,10 +432,6 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) dma_resv_add_fence(obj->resv, submit->user_fence, DMA_RESV_USAGE_READ); } - - last_fence = vm->last_fence; - vm->last_fence = dma_fence_unwrap_merge(submit->user_fence, last_fence); - dma_fence_put(last_fence); } static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 8316af1723c2..89a95977f41e 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -971,6 +971,7 @@ static int lookup_op(struct msm_vm_bind_job *job, const struct drm_msm_vm_bind_op *op) { struct drm_device *dev = job->vm->drm; + struct msm_drm_private *priv = dev->dev_private; int i = job->nr_ops++; int ret = 0; @@ -1017,6 +1018,11 @@ lookup_op(struct msm_vm_bind_job *job, const struct drm_msm_vm_bind_op *op) break; } + if ((op->op == MSM_VM_BIND_OP_MAP_NULL) && + !adreno_smmu_has_prr(priv->gpu)) { + ret = UERR(EINVAL, dev, "PRR not supported\n"); + } + return ret; } @@ -1421,7 +1427,7 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file) * Maybe we could allow just UNMAP ops? OTOH userspace should just * immediately close the device file and all will be torn down. */ - if (to_msm_vm(ctx->vm)->unusable) + if (to_msm_vm(msm_context_vm(dev, ctx))->unusable) return UERR(EPIPE, dev, "context is unusable"); /* diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index a597f2bee30b..2894fc118485 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -299,6 +299,17 @@ static inline struct msm_gpu *dev_to_gpu(struct device *dev) return container_of(adreno_smmu, struct msm_gpu, adreno_smmu); } +static inline bool +adreno_smmu_has_prr(struct msm_gpu *gpu) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&gpu->pdev->dev); + + if (!adreno_smmu) + return false; + + return adreno_smmu && adreno_smmu->set_prr_addr; +} + /* It turns out that all targets use the same ringbuffer size */ #define MSM_GPU_RINGBUFFER_SZ SZ_32K #define MSM_GPU_RINGBUFFER_BLKSIZE 32 diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 0e18619f96cb..a188617653e8 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -338,6 +338,8 @@ msm_iommu_pagetable_prealloc_allocate(struct msm_mmu *mmu, struct msm_mmu_preall ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, p->count, p->pages); if (ret != p->count) { + kfree(p->pages); + p->pages = NULL; p->count = ret; return -ENOMEM; } @@ -351,6 +353,9 @@ msm_iommu_pagetable_prealloc_cleanup(struct msm_mmu *mmu, struct msm_mmu_preallo struct kmem_cache *pt_cache = get_pt_cache(mmu); uint32_t remaining_pt_count = p->count - p->ptr; + if (!p->pages) + return; + if (p->count > 0) trace_msm_mmu_prealloc_cleanup(p->count, remaining_pt_count); diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index e97e39abf3a2..12b1dba8e05d 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2867,7 +2867,9 @@ nv50_display_create(struct drm_device *dev) } /* Assign the correct format modifiers */ - if (disp->disp->object.oclass >= TU102_DISP) + if (disp->disp->object.oclass >= GB202_DISP) + nouveau_display(dev)->format_modifiers = wndwca7e_modifiers; + else if (disp->disp->object.oclass >= TU102_DISP) nouveau_display(dev)->format_modifiers = wndwc57e_modifiers; else if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h index 15f9242b72ac..5d998f0319dc 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h @@ -104,4 +104,5 @@ struct nouveau_encoder *nv50_real_outp(struct drm_encoder *encoder); extern const u64 disp50xx_modifiers[]; extern const u64 disp90xx_modifiers[]; extern const u64 wndwc57e_modifiers[]; +extern const u64 wndwca7e_modifiers[]; #endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index e2c55f4b9c5a..ef9e410babbf 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -786,13 +786,14 @@ nv50_wndw_destroy(struct drm_plane *plane) } /* This function assumes the format has already been validated against the plane - * and the modifier was validated against the device-wides modifier list at FB + * and the modifier was validated against the device-wide modifier list at FB * creation time. */ static bool nv50_plane_format_mod_supported(struct drm_plane *plane, u32 format, u64 modifier) { struct nouveau_drm *drm = nouveau_drm(plane->dev); + const struct drm_format_info *info = drm_format_info(format); uint8_t i; /* All chipsets can display all formats in linear layout */ @@ -800,13 +801,32 @@ static bool nv50_plane_format_mod_supported(struct drm_plane *plane, return true; if (drm->client.device.info.chipset < 0xc0) { - const struct drm_format_info *info = drm_format_info(format); const uint8_t kind = (modifier >> 12) & 0xff; if (!format) return false; for (i = 0; i < info->num_planes; i++) if ((info->cpp[i] != 4) && kind != 0x70) return false; + } else if (drm->client.device.info.chipset >= 0x1b2) { + const uint8_t slayout = ((modifier >> 22) & 0x1) | + ((modifier >> 25) & 0x6); + + if (!format) + return false; + + /* + * Note in practice this implies only formats where cpp is equal + * for each plane, or >= 4 for all planes, are supported. + */ + for (i = 0; i < info->num_planes; i++) { + if (((info->cpp[i] == 2) && slayout != 3) || + ((info->cpp[i] == 1) && slayout != 2) || + ((info->cpp[i] >= 4) && slayout != 1)) + return false; + + /* 24-bit not supported. It has yet another layout */ + WARN_ON(info->cpp[i] == 3); + } } return true; diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c index 0d8e9a9d1a57..2cec8cfbd546 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c @@ -179,6 +179,39 @@ wndwca7e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) return 0; } +/**************************************************************** + * Log2(block height) ----------------------------+ * + * Page Kind ----------------------------------+ | * + * Gob Height/Page Kind Generation ------+ | | * + * Sector layout -------+ | | | * + * Compression ------+ | | | | */ +const u64 wndwca7e_modifiers[] = { /* | | | | | */ + /* 4cpp+ modifiers */ + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 0), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 1), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 2), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 3), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 4), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 5), + /* 1cpp/8bpp modifiers */ + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 0), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 1), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 2), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 3), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 4), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 5), + /* 2cpp/16bpp modifiers */ + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 0), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 1), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 2), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 3), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 4), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 5), + /* All formats support linear */ + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + static const struct nv50_wndw_func wndwca7e = { .acquire = wndwc37e_acquire, diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index e60f7892f5ce..a7bf539e5d86 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -482,6 +482,17 @@ nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, return 0; } +static bool +nouveau_sched_job_list_empty(struct nouveau_sched *sched) +{ + bool empty; + + spin_lock(&sched->job.list.lock); + empty = list_empty(&sched->job.list.head); + spin_unlock(&sched->job.list.lock); + + return empty; +} static void nouveau_sched_fini(struct nouveau_sched *sched) @@ -489,8 +500,7 @@ nouveau_sched_fini(struct nouveau_sched *sched) struct drm_gpu_scheduler *drm_sched = &sched->base; struct drm_sched_entity *entity = &sched->entity; - rmb(); /* for list_empty to work without lock */ - wait_event(sched->job.wq, list_empty(&sched->job.list.head)); + wait_event(sched->job.wq, nouveau_sched_job_list_empty(sched)); drm_sched_entity_fini(entity); drm_sched_fini(drm_sched); diff --git a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c index 2fc7b0779b37..893af9b16756 100644 --- a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c +++ b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c @@ -359,7 +359,7 @@ static int kingdisplay_panel_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM; + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; kingdisplay = devm_drm_panel_alloc(&dsi->dev, __typeof(*kingdisplay), base, &kingdisplay_panel_funcs, diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c index 04d91929eedd..d5f821d6b23c 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c @@ -249,6 +249,11 @@ static const struct drm_display_mode default_mode = { .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC, }; +/* + * The mode data for this panel has been reverse engineered without access + * to the panel datasheet / manual. Using DRM_MODE_FLAG_PHSYNC like all + * other panels results in garbage data on the display. + */ static const struct drm_display_mode t28cp45tn89_mode = { .clock = 6008, .hdisplay = 240, @@ -261,7 +266,7 @@ static const struct drm_display_mode t28cp45tn89_mode = { .vtotal = 320 + 8 + 4 + 4, .width_mm = 43, .height_mm = 57, - .flags = DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_NVSYNC, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC, }; static const struct drm_display_mode et028013dma_mode = { diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 9bf06e55eaee..df767e82148a 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -1099,6 +1099,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) } panthor_job_irq_suspend(&ptdev->fw->irq); + panthor_fw_stop(ptdev); } /** diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 156c7a0b62a2..3f43686f0195 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -288,6 +288,23 @@ panthor_gem_create_with_handle(struct drm_file *file, panthor_gem_debugfs_set_usage_flags(bo, 0); + /* If this is a write-combine mapping, we query the sgt to force a CPU + * cache flush (dma_map_sgtable() is called when the sgt is created). + * This ensures the zero-ing is visible to any uncached mapping created + * by vmap/mmap. + * FIXME: Ideally this should be done when pages are allocated, not at + * BO creation time. + */ + if (shmem->map_wc) { + struct sg_table *sgt; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto out_put_gem; + } + } + /* * Allocate an id of idr table where the obj is registered * and handle has the id what user can see. @@ -296,6 +313,7 @@ panthor_gem_create_with_handle(struct drm_file *file, if (!ret) *size = bo->base.base.size; +out_put_gem: /* drop reference from allocate - handle holds it now. */ drm_gem_object_put(&shmem->base); diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 6dec4354e378..7870e7dbaa5d 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1175,10 +1175,14 @@ panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx) break; case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: - /* Partial unmaps might trigger a remap with either a prev or a next VA, - * but not both. + /* Two VMAs can be needed for an unmap, as an unmap can happen + * in the middle of a drm_gpuva, requiring a remap with both + * prev & next VA. Or an unmap can span more than one drm_gpuva + * where the first and last ones are covered partially, requring + * a remap for the first with a prev VA and remap for the last + * with a next VA. */ - vma_count = 1; + vma_count = 2; break; default: diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 88e821d67af7..9c8907bc61d9 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -314,17 +314,17 @@ static int radeon_pci_probe(struct pci_dev *pdev, ret = pci_enable_device(pdev); if (ret) - goto err_free; + return ret; pci_set_drvdata(pdev, ddev); ret = radeon_driver_load_kms(ddev, flags); if (ret) - goto err_agp; + goto err; ret = drm_dev_register(ddev, flags); if (ret) - goto err_agp; + goto err; if (rdev->mc.real_vram_size <= (8 * 1024 * 1024)) format = drm_format_info(DRM_FORMAT_C8); @@ -337,30 +337,14 @@ static int radeon_pci_probe(struct pci_dev *pdev, return 0; -err_agp: +err: pci_disable_device(pdev); -err_free: - drm_dev_put(ddev); return ret; } static void -radeon_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - - drm_put_dev(dev); -} - -static void radeon_pci_shutdown(struct pci_dev *pdev) { - /* if we are running in a VM, make sure the device - * torn down properly on reboot/shutdown - */ - if (radeon_device_is_virtual()) - radeon_pci_remove(pdev); - #if defined(CONFIG_PPC64) || defined(CONFIG_MACH_LOONGSON64) /* * Some adapters need to be suspended before a @@ -613,7 +597,6 @@ static struct pci_driver radeon_kms_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, .probe = radeon_pci_probe, - .remove = radeon_pci_remove, .shutdown = radeon_pci_shutdown, .driver.pm = &radeon_pm_ops, }; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 645e33bf7947..ba1446acd703 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -84,7 +84,6 @@ void radeon_driver_unload_kms(struct drm_device *dev) rdev->agp = NULL; done_free: - kfree(rdev); dev->dev_private = NULL; } diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 7b613997bb50..727cdf768161 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -361,7 +361,7 @@ static void dw_hdmi_rk3228_setup_hpd(struct dw_hdmi *dw_hdmi, void *data) regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON2, FIELD_PREP_WM16(RK3228_HDMI_SDAIN_MSK, 1) | - FIELD_PREP_WM16(RK3328_HDMI_SCLIN_MSK, 1)); + FIELD_PREP_WM16(RK3228_HDMI_SCLIN_MSK, 1)); } static enum drm_connector_status diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index b50927a824b4..7ec7bea5e38e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1031,7 +1031,7 @@ static int vop2_plane_atomic_check(struct drm_plane *plane, return format; if (drm_rect_width(src) >> 16 < 4 || drm_rect_height(src) >> 16 < 4 || - drm_rect_width(dest) < 4 || drm_rect_width(dest) < 4) { + drm_rect_width(dest) < 4 || drm_rect_height(dest) < 4) { drm_err(vop2->drm, "Invalid size: %dx%d->%dx%d, min size is 4x4\n", drm_rect_width(src) >> 16, drm_rect_height(src) >> 16, drm_rect_width(dest), drm_rect_height(dest)); diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 5a4697f636f2..fe174a4857be 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -70,6 +70,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; + entity->last_user = current->group_leader; /* * It's perfectly valid to initialize an entity without having a valid * scheduler attached. It's just not valid to use the scheduler before it @@ -172,26 +173,15 @@ int drm_sched_entity_error(struct drm_sched_entity *entity) } EXPORT_SYMBOL(drm_sched_entity_error); +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb); + static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); - - drm_sched_fence_scheduled(job->s_fence, NULL); - drm_sched_fence_finished(job->s_fence, -ESRCH); - WARN_ON(job->s_fence->parent); - job->sched->ops->free_job(job); -} - -/* Signal the scheduler finished fence when the entity in question is killed. */ -static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, - finish_cb); + struct dma_fence *f; unsigned long index; - dma_fence_put(f); - /* Wait for all dependencies to avoid data corruptions */ xa_for_each(&job->dependencies, index, f) { struct drm_sched_fence *s_fence = to_drm_sched_fence(f); @@ -219,6 +209,21 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, dma_fence_put(f); } + drm_sched_fence_scheduled(job->s_fence, NULL); + drm_sched_fence_finished(job->s_fence, -ESRCH); + WARN_ON(job->s_fence->parent); + job->sched->ops->free_job(job); +} + +/* Signal the scheduler finished fence when the entity in question is killed. */ +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct drm_sched_job *job = container_of(cb, struct drm_sched_job, + finish_cb); + + dma_fence_put(f); + INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); schedule_work(&job->work); } @@ -302,7 +307,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) /* For a killed process disallow further enqueueing of jobs. */ last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); - if ((!last_user || last_user == current->group_leader) && + if (last_user == current->group_leader && (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) drm_sched_entity_kill(entity); @@ -552,10 +557,11 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; } - spin_unlock(&entity->lock); if (entity->num_sched_list == 1) entity->sched_list = NULL; + + spin_unlock(&entity->lock); } /** diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 46119aacb809..c39f0245e3a9 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -965,13 +965,14 @@ int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job, dma_resv_assert_held(resv); dma_resv_for_each_fence(&cursor, resv, usage, fence) { - /* Make sure to grab an additional ref on the added fence */ - dma_fence_get(fence); - ret = drm_sched_job_add_dependency(job, fence); - if (ret) { - dma_fence_put(fence); + /* + * As drm_sched_job_add_dependency always consumes the fence + * reference (even when it fails), and dma_resv_for_each_fence + * is not obtaining one, we need to grab one before calling. + */ + ret = drm_sched_job_add_dependency(job, dma_fence_get(fence)); + if (ret) return ret; - } } return 0; } diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index 7d9e85e932d7..f0e72d4b6a47 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -85,6 +85,7 @@ config DRM_PANEL_MIPI_DBI config DRM_PIXPAPER tristate "DRM support for PIXPAPER display panels" depends on DRM && SPI + depends on MMU select DRM_CLIENT_SELECTION select DRM_GEM_SHMEM_HELPER select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c index 718832b08d96..c46f17ba7236 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c @@ -100,8 +100,10 @@ vmw_cursor_update_type(struct vmw_private *vmw, struct vmw_plane_state *vps) if (vmw->has_mob) { if ((vmw->capabilities2 & SVGA_CAP2_CURSOR_MOB) != 0) return VMW_CURSOR_UPDATE_MOB; + else + return VMW_CURSOR_UPDATE_GB_ONLY; } - + drm_warn_once(&vmw->drm, "Unknown Cursor Type!\n"); return VMW_CURSOR_UPDATE_NONE; } @@ -139,6 +141,7 @@ static u32 vmw_cursor_mob_size(enum vmw_cursor_update_type update_type, { switch (update_type) { case VMW_CURSOR_UPDATE_LEGACY: + case VMW_CURSOR_UPDATE_GB_ONLY: case VMW_CURSOR_UPDATE_NONE: return 0; case VMW_CURSOR_UPDATE_MOB: @@ -623,6 +626,7 @@ int vmw_cursor_plane_prepare_fb(struct drm_plane *plane, if (!surface || vps->cursor.legacy.id == surface->snooper.id) vps->cursor.update_type = VMW_CURSOR_UPDATE_NONE; break; + case VMW_CURSOR_UPDATE_GB_ONLY: case VMW_CURSOR_UPDATE_MOB: { bo = vmw_user_object_buffer(&vps->uo); if (bo) { @@ -737,6 +741,7 @@ void vmw_cursor_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { + struct vmw_bo *bo; struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct drm_plane_state *old_state = @@ -762,6 +767,15 @@ vmw_cursor_plane_atomic_update(struct drm_plane *plane, case VMW_CURSOR_UPDATE_MOB: vmw_cursor_update_mob(dev_priv, vps); break; + case VMW_CURSOR_UPDATE_GB_ONLY: + bo = vmw_user_object_buffer(&vps->uo); + if (bo) + vmw_send_define_cursor_cmd(dev_priv, bo->map.virtual, + vps->base.crtc_w, + vps->base.crtc_h, + vps->base.hotspot_x, + vps->base.hotspot_y); + break; case VMW_CURSOR_UPDATE_NONE: /* do nothing */ break; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h b/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h index 40694925a70e..0c2cc0699b0d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h @@ -33,6 +33,7 @@ static const u32 __maybe_unused vmw_cursor_plane_formats[] = { enum vmw_cursor_update_type { VMW_CURSOR_UPDATE_NONE = 0, VMW_CURSOR_UPDATE_LEGACY, + VMW_CURSOR_UPDATE_GB_ONLY, VMW_CURSOR_UPDATE_MOB, }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index d539f25b5fbe..3057f8baa7d2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3668,6 +3668,11 @@ static int vmw_cmd_check(struct vmw_private *dev_priv, cmd_id = header->id; + if (header->size > SVGA_CMD_MAX_DATASIZE) { + VMW_DEBUG_USER("SVGA3D command: %d is too big.\n", + cmd_id + SVGA_3D_CMD_BASE); + return -E2BIG; + } *size = header->size + sizeof(SVGA3dCmdHeader); cmd_id -= SVGA_3D_CMD_BASE; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 7de20e56082c..fd4e76486f2d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -32,22 +32,22 @@ enum vmw_bo_dirty_method { /** * struct vmw_bo_dirty - Dirty information for buffer objects + * @ref_count: Reference count for this structure. Must be first member! * @start: First currently dirty bit * @end: Last currently dirty bit + 1 * @method: The currently used dirty method * @change_count: Number of consecutive method change triggers - * @ref_count: Reference count for this structure * @bitmap_size: The size of the bitmap in bits. Typically equal to the * nuber of pages in the bo. * @bitmap: A bitmap where each bit represents a page. A set bit means a * dirty page. */ struct vmw_bo_dirty { + struct kref ref_count; unsigned long start; unsigned long end; enum vmw_bo_dirty_method method; unsigned int change_count; - unsigned int ref_count; unsigned long bitmap_size; unsigned long bitmap[]; }; @@ -221,7 +221,7 @@ int vmw_bo_dirty_add(struct vmw_bo *vbo) int ret; if (dirty) { - dirty->ref_count++; + kref_get(&dirty->ref_count); return 0; } @@ -235,7 +235,7 @@ int vmw_bo_dirty_add(struct vmw_bo *vbo) dirty->bitmap_size = num_pages; dirty->start = dirty->bitmap_size; dirty->end = 0; - dirty->ref_count = 1; + kref_init(&dirty->ref_count); if (num_pages < PAGE_SIZE / sizeof(pte_t)) { dirty->method = VMW_BO_DIRTY_PAGETABLE; } else { @@ -274,10 +274,8 @@ void vmw_bo_dirty_release(struct vmw_bo *vbo) { struct vmw_bo_dirty *dirty = vbo->dirty; - if (dirty && --dirty->ref_count == 0) { - kvfree(dirty); + if (dirty && kref_put(&dirty->ref_count, (void *)kvfree)) vbo->dirty = NULL; - } } /** diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 06cb6b02ec64..f680c8b8f258 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -168,6 +168,7 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define FAST_CLEAR_VALIGN_FIX REG_BIT(13) #define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) @@ -342,6 +343,7 @@ #define POWERGATE_ENABLE XE_REG(0xa210) #define RENDER_POWERGATE_ENABLE REG_BIT(0) #define MEDIA_POWERGATE_ENABLE REG_BIT(1) +#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2) #define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) #define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 69e2840c7ef0..663a79ec960d 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -66,6 +66,7 @@ KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); /** * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -242,6 +243,7 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); /** * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -266,6 +268,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); /** * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -290,6 +293,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); /** * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -376,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); /** * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters + * @test: test context object * @prev: the previously returned value, or NULL for the first iteration * @desc: the buffer for a parameter name * diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index d5dbc51e8612..bc5b4c5fab81 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -182,7 +182,6 @@ int xe_bo_evict_all(struct xe_device *xe) static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) { - struct xe_device *xe = xe_bo_device(bo); int ret; ret = xe_bo_restore_pinned(bo); @@ -201,13 +200,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) } } - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || - !iosys_map_is_null(&bo->vmap)); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2883b39c9b37..456899238377 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -988,16 +988,16 @@ void xe_device_shutdown(struct xe_device *xe) drm_dbg(&xe->drm, "Shutting down device\n"); - if (xe_driver_flr_disabled(xe)) { - xe_display_pm_shutdown(xe); + xe_display_pm_shutdown(xe); - xe_irq_suspend(xe); + xe_irq_suspend(xe); - for_each_gt(gt, xe, id) - xe_gt_shutdown(gt); + for_each_gt(gt, xe, id) + xe_gt_shutdown(gt); - xe_display_pm_shutdown_late(xe); - } else { + xe_display_pm_shutdown_late(xe); + + if (!xe_driver_flr_disabled(xe)) { /* BOOM! */ __xe_driver_flr(xe); } @@ -1070,7 +1070,7 @@ void xe_device_l2_flush(struct xe_device *xe) spin_lock(>->global_invl_lock); xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); - if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) + if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); spin_unlock(>->global_invl_lock); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 7715e74bb945..a8ab363a8046 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -165,7 +165,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | + &syncs_user[num_syncs], NULL, 0, + SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 37b2b93b73d6..cb5f204c08ed 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -10,6 +10,7 @@ #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> #include "xe_dep_scheduler.h" @@ -324,6 +325,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } xe_vm_put(migrate_vm); + if (!IS_ERR(q)) { + int err = drm_syncobj_create(&q->ufence_syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) { + xe_exec_queue_put(q); + return ERR_PTR(err); + } + } + return q; } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); @@ -333,6 +344,9 @@ void xe_exec_queue_destroy(struct kref *ref) struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + if (q->ufence_syncobj) + drm_syncobj_put(q->ufence_syncobj); + if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 27b76cf9da89..df1c69dc81f1 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -15,6 +15,7 @@ #include "xe_hw_fence_types.h" #include "xe_lrc_types.h" +struct drm_syncobj; struct xe_execlist_exec_queue; struct xe_gt; struct xe_guc_exec_queue; @@ -155,6 +156,12 @@ struct xe_exec_queue { struct list_head link; } pxp; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7fdd0a97a628..5edc0cad47e2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -292,6 +292,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->pt_ops = &xelp_pt_ops; ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); + if (!ggtt->wq) + return -ENOMEM; + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3e0ad7e5b5df..6d3db5e55d98 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -813,12 +813,16 @@ static int gt_reset(struct xe_gt *gt) unsigned int fw_ref; int err; - if (xe_device_wedged(gt_to_xe(gt))) - return -ECANCELED; + if (xe_device_wedged(gt_to_xe(gt))) { + err = -ECANCELED; + goto err_pm_put; + } /* We only support GT resets with GuC submission */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; + if (!xe_device_uc_enabled(gt_to_xe(gt))) { + err = -ENODEV; + goto err_pm_put; + } xe_gt_info(gt, "reset started\n"); @@ -826,8 +830,6 @@ static int gt_reset(struct xe_gt *gt) if (!err) xe_gt_warn(gt, "reset block failed to get lifted"); - xe_pm_runtime_get(gt_to_xe(gt)); - if (xe_fault_inject_gt_reset()) { err = -ECANCELED; goto err_fail; @@ -874,6 +876,7 @@ err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(gt_to_xe(gt)); +err_pm_put: xe_pm_runtime_put(gt_to_xe(gt)); return err; @@ -895,7 +898,9 @@ void xe_gt_reset_async(struct xe_gt *gt) return; xe_gt_info(gt, "reset queued\n"); - queue_work(gt->ordered_wq, >->reset.worker); + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + if (!queue_work(gt->ordered_wq, >->reset.worker)) + xe_pm_runtime_put(gt_to_xe(gt)); } void xe_gt_suspend_prepare(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index f8950a52d0a4..bdc9d9877ec4 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -124,6 +124,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE; + if (xe->info.platform != XE_DG1) { for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) @@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, str_up_down(pg_status & media_slices[n].status_bit)); } + + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 18f6327bf552..283d846c3512 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -200,6 +200,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + cancel_work_sync(&ct->dead.worker); +#endif ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 53024eb5670b..94ed8159496f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -44,6 +44,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" +#include "xe_uc_fw.h" #include "xe_vm.h" static struct xe_guc * @@ -1489,7 +1490,17 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); - if (exec_queue_registered(q)) + /* + * Expected state transitions for cleanup: + * - If the exec queue is registered and GuC firmware is running, we must first + * disable scheduling and deregister the queue to ensure proper teardown and + * resource release in the GuC, then destroy the exec queue on driver side. + * - If the GuC is already stopped (e.g., during driver unload or GPU reset), + * we cannot expect a response for the deregister request. In this case, + * it is safe to directly destroy the exec queue on driver side, as the GuC + * will not process further requests and all resources must be cleaned up locally. + */ + if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) disable_scheduling_deregister(guc, q); else __guc_exec_queue_destroy(guc, q); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1d667fa36cf3..a36ce7dce8cc 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -434,7 +434,7 @@ int xe_migrate_init(struct xe_migrate *m) err = xe_migrate_lock_prepare_vm(tile, m, vm); if (err) - return err; + goto err_out; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, @@ -2113,7 +2113,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (current_bytes & ~PAGE_MASK) { int pitch = 4; - current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + current_bytes = min_t(int, current_bytes, + round_down(S16_MAX * pitch, + XE_CACHELINE_BYTES)); } __fence = xe_migrate_vram(m, current_bytes, diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a4894eb0d7f3..125698a9ecf1 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -10,6 +10,7 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> #include <generated/xe_wa_oob.h> @@ -1389,7 +1390,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro return 0; } -static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +static int xe_oa_parse_syncs(struct xe_oa *oa, + struct xe_oa_stream *stream, + struct xe_oa_open_param *param) { int ret, num_syncs, num_ufence = 0; @@ -1409,7 +1412,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], - ¶m->syncs_user[num_syncs], 0); + ¶m->syncs_user[num_syncs], + stream->ufence_syncobj, + ++stream->ufence_timeline_value, 0); if (ret) goto err_syncs; @@ -1539,7 +1544,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return -ENODEV; param.xef = stream->xef; - err = xe_oa_parse_syncs(stream->oa, ¶m); + err = xe_oa_parse_syncs(stream->oa, stream, ¶m); if (err) goto err_config_put; @@ -1635,6 +1640,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream) if (stream->exec_q) xe_exec_queue_put(stream->exec_q); + drm_syncobj_put(stream->ufence_syncobj); kfree(stream); } @@ -1826,6 +1832,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, struct xe_oa_open_param *param) { struct xe_oa_stream *stream; + struct drm_syncobj *ufence_syncobj; int stream_fd; int ret; @@ -1836,17 +1843,31 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, goto exit; } + ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (ret) + goto exit; + stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) { ret = -ENOMEM; - goto exit; + goto err_syncobj; } - + stream->ufence_syncobj = ufence_syncobj; stream->oa = oa; - ret = xe_oa_stream_init(stream, param); + + ret = xe_oa_parse_syncs(oa, stream, param); if (ret) goto err_free; + ret = xe_oa_stream_init(stream, param); + if (ret) { + while (param->num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[param->num_syncs]); + kfree(param->syncs); + goto err_free; + } + if (!param->disabled) { ret = xe_oa_enable_locked(stream); if (ret) @@ -1870,6 +1891,8 @@ err_destroy: xe_oa_stream_destroy(stream); err_free: kfree(stream); +err_syncobj: + drm_syncobj_put(ufence_syncobj); exit: return ret; } @@ -2083,22 +2106,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - ret = xe_oa_parse_syncs(oa, ¶m); - if (ret) - goto err_exec_q; - mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); if (ret < 0) - goto err_sync_cleanup; + goto err_exec_q; return ret; -err_sync_cleanup: - while (param.num_syncs--) - xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); - kfree(param.syncs); err_exec_q: if (param.exec_q) xe_exec_queue_put(param.exec_q); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 2628f78c4e8d..daf701b5d48b 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,6 +15,8 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" +struct drm_syncobj; + #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { @@ -248,6 +250,12 @@ struct xe_oa_stream { /** @xef: xe_file with which the stream was opened */ struct xe_file *xef; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @last_fence: fence to use in stream destroy when needed */ struct dma_fence *last_fence; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index be91343829dd..9a6df79fc5b6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -867,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; + xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index a1c88f9a6c76..07f96bda638a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2022,7 +2022,7 @@ static int op_prepare(struct xe_vm *vm, case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && !op->map.invalidate_on_bind) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, @@ -2252,7 +2252,7 @@ static void op_commit(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 7e2db71ff34e..129e7818565c 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -302,6 +302,11 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 if (!vma) return -EINVAL; + if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { + drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); + return 0; + } + if (xe_vma_has_default_mem_attrs(vma)) return 0; @@ -1034,6 +1039,9 @@ retry: if (err) return err; + dpagemap = xe_vma_resolve_pagemap(vma, tile); + if (!dpagemap && !ctx.devmem_only) + ctx.device_private_page_owner = NULL; range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); if (IS_ERR(range)) @@ -1054,7 +1062,6 @@ retry: range_debug(range, "PAGE FAULT"); - dpagemap = xe_vma_resolve_pagemap(vma, tile); if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { ktime_t migrate_start = xe_svm_stats_ktime_get(); @@ -1073,7 +1080,17 @@ retry: drm_dbg(&vm->xe->drm, "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", vm->usm.asid, ERR_PTR(err)); - goto retry; + + /* + * In the devmem-only case, mixed mappings may + * be found. The get_pages function will fix + * these up to a single location, allowing the + * page fault handler to make forward progress. + */ + if (ctx.devmem_only) + goto get_pages; + else + goto retry; } else { drm_err(&vm->xe->drm, "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", @@ -1083,6 +1100,7 @@ retry: } } +get_pages: get_pages_start = xe_svm_stats_ktime_get(); range_debug(range, "GET PAGES"); diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 82872a51f098..d48ab7b32ca5 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags) { struct drm_xe_sync sync_in; @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (exec) { sync->addr = sync_in.addr; } else { + sync->ufence_timeline_value = ufence_timeline_value; sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); + sync->ufence_chain_fence = dma_fence_chain_alloc(); + if (!sync->ufence_chain_fence) + return -ENOMEM; + sync->ufence_syncobj = ufence_syncobj; } break; @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) } else if (sync->ufence) { int err; - dma_fence_get(fence); + drm_syncobj_add_point(sync->ufence_syncobj, + sync->ufence_chain_fence, + fence, sync->ufence_timeline_value); + sync->ufence_chain_fence = NULL; + + fence = drm_syncobj_fence_get(sync->ufence_syncobj); user_fence_get(sync->ufence); err = dma_fence_add_callback(fence, &sync->ufence->cb, user_fence_cb); @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) drm_syncobj_put(sync->syncobj); dma_fence_put(sync->fence); dma_fence_chain_free(sync->chain_fence); - if (sync->ufence) + dma_fence_chain_free(sync->ufence_chain_fence); + if (!IS_ERR_OR_NULL(sync->ufence)) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 256ffc1e54dc..51f2d803e977 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -8,6 +8,7 @@ #include "xe_sync_types.h" +struct drm_syncobj; struct xe_device; struct xe_exec_queue; struct xe_file; @@ -21,6 +22,8 @@ struct xe_vm; int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 30ac3f51993b..b88f1833e28c 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,9 +18,12 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; + struct dma_fence_chain *ufence_chain_fence; + struct drm_syncobj *ufence_syncobj; struct xe_user_fence *ufence; u64 addr; u64 timeline_value; + u64 ufence_timeline_value; u32 type; u32 flags; }; diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h index fec331d791e7..b2d09c596714 100644 --- a/drivers/gpu/drm/xe/xe_validation.h +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -166,10 +166,10 @@ xe_validation_device_init(struct xe_validation_device *val) */ DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, if (_T) xe_validation_ctx_fini(_T);, - ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); - _ret ? NULL : _ctx; }), + ({*_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); + *_ret ? NULL : _ctx; }), struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, - struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret); + struct drm_exec *_exec, const struct xe_val_flags _flags, int *_ret); static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) {return *_T; } #define class_xe_validation_is_conditional true @@ -186,7 +186,7 @@ static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) * exhaustive eviction. */ #define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ - scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \ + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \ drm_exec_until_all_locked(_exec) #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 027e6ce648c5..ccb09ef4ec9e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -616,6 +616,13 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, vops->pt_update_ops[i].num_ops += inc_val; } +#define XE_VMA_CREATE_MASK ( \ + XE_VMA_READ_ONLY | \ + XE_VMA_DUMPABLE | \ + XE_VMA_SYSTEM_ALLOCATOR | \ + DRM_GPUVA_SPARSE | \ + XE_VMA_MADV_AUTORESET) + static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) { @@ -628,8 +635,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, op->base.map.gem.offset = vma->gpuva.gem.offset; op->map.vma = vma; op->map.immediate = true; - op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; - op->map.is_null = xe_vma_is_null(vma); + op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; } static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, @@ -932,11 +938,6 @@ static void xe_vma_free(struct xe_vma *vma) kfree(vma); } -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) -#define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) - static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, @@ -947,11 +948,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_vma *vma; struct xe_tile *tile; u8 id; - bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); - bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); - bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); - bool is_cpu_addr_mirror = - (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); + bool is_null = (flags & DRM_GPUVA_SPARSE); + bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -972,10 +970,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (!vma) return ERR_PTR(-ENOMEM); - if (is_cpu_addr_mirror) - vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; if (bo) vma->gpuva.gem.obj = &bo->ttm.base; } @@ -986,10 +980,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.vm = &vm->gpuvm; vma->gpuva.va.addr = start; vma->gpuva.va.range = end - start + 1; - if (read_only) - vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (dumpable) - vma->gpuva.flags |= XE_VMA_DUMPABLE; + vma->gpuva.flags = flags; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -2272,12 +2263,16 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, if (__op->op == DRM_GPUVA_OP_MAP) { op->map.immediate = flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - op->map.is_cpu_addr_mirror = flags & - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; - op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_READONLY) + op->map.vma_flags |= XE_VMA_READ_ONLY; + if (flags & DRM_XE_VM_BIND_FLAG_NULL) + op->map.vma_flags |= DRM_GPUVA_SPARSE; + if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; + if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) + op->map.vma_flags |= XE_VMA_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) + op->map.vma_flags |= XE_VMA_MADV_AUTORESET; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); @@ -2590,14 +2585,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, .pat_index = op->map.pat_index, }; - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->map.is_null ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->map.dumpable ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= op->map.is_cpu_addr_mirror ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; + flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; vma = new_vma(vm, &op->base.map, &default_attr, flags); @@ -2606,7 +2594,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->map.vma = vma; if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) || + !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); @@ -2637,18 +2625,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= xe_vma_is_cpu_addr_mirror(old) ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - + flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { vma = new_vma(vm, op->base.remap.prev, &old->attr, flags); @@ -2832,7 +2809,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) + bool res_evict, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -2843,7 +2820,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm), exec); + !xe_vm_in_preempt_fence_mode(vm) && + res_evict, exec); } return err; @@ -2913,14 +2891,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) } static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) + struct xe_vma_ops *vops, struct xe_vma_op *op) { int err = 0; + bool res_evict; + + /* + * We only allow evicting a BO within the VM if it is not part of an + * array of binds, as an array of binds can evict another BO within the + * bind. + */ + res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); switch (op->base.op) { case DRM_GPUVA_OP_MAP: if (!op->map.invalidate_on_bind) err = vma_lock_and_validate(exec, op->map.vma, + res_evict, !xe_vm_in_fault_mode(vm) || op->map.immediate); break; @@ -2931,11 +2918,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), - false); + res_evict, false); if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); + err = vma_lock_and_validate(exec, op->remap.prev, + res_evict, true); if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); + err = vma_lock_and_validate(exec, op->remap.next, + res_evict, true); break; case DRM_GPUVA_OP_UNMAP: err = check_ufence(gpuva_to_vma(op->base.unmap.va)); @@ -2944,7 +2933,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), - false); + res_evict, false); break; case DRM_GPUVA_OP_PREFETCH: { @@ -2959,7 +2948,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), - false); + res_evict, false); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region], @@ -3005,7 +2994,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return err; list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); + err = op_lock_and_prep(exec, vm, vops, op); if (err) return err; } @@ -3267,7 +3256,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE | \ DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ + DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -3382,7 +3372,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && !(BIT(prefetch_region) & xe->info.mem_region_mask))) || XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { + op == DRM_XE_VM_BIND_OP_UNMAP) || + XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && + (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { err = -EINVAL; goto free_bind_ops; } @@ -3614,8 +3606,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) syncs_user = u64_to_user_ptr(args->syncs); for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + struct xe_exec_queue *__q = q ?: vm->q[0]; + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], + __q->ufence_syncobj, + ++__q->ufence_timeline_value, (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0) | (!args->num_binds ? @@ -3638,6 +3634,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); + if (args->num_binds > 1) + vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -4198,7 +4196,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, struct xe_vma_ops vops; struct drm_gpuva_ops *ops = NULL; struct drm_gpuva_op *__op; - bool is_cpu_addr_mirror = false; + unsigned int vma_flags = 0; bool remap_op = false; struct xe_vma_mem_attr tmp_attr; u16 default_pat; @@ -4228,15 +4226,17 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, vma = gpuva_to_vma(op->base.unmap.va); XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_REMAP) { vma = gpuva_to_vma(op->base.remap.unmap->va); default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.is_cpu_addr_mirror = true; + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; op->map.pat_index = default_pat; } } else { @@ -4245,11 +4245,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, xe_assert(vm->xe, !remap_op); xe_assert(vm->xe, xe_vma_has_no_bo(vma)); remap_op = true; - - if (xe_vma_is_cpu_addr_mirror(vma)) - is_cpu_addr_mirror = true; - else - is_cpu_addr_mirror = false; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_MAP) { @@ -4258,10 +4254,10 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, /* * In case of madvise ops DRM_GPUVA_OP_MAP is * always after DRM_GPUVA_OP_REMAP, so ensure - * we assign op->map.is_cpu_addr_mirror true - * if REMAP is for xe_vma_is_cpu_addr_mirror vma + * to propagate the flags from the vma we're + * unmapping. */ - op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; } } print_op(vm->xe, __op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index da39940501d8..d6e2a0fdd4b3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -46,6 +46,7 @@ struct xe_vm_pgtable_update_op; #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) +#define XE_VMA_MADV_AUTORESET (DRM_GPUVA_USERBITS << 10) /** * struct xe_vma_mem_attr - memory attributes associated with vma @@ -345,17 +346,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + unsigned int vma_flags; /** @immediate: Immediate bind */ bool immediate; /** @read_only: Read only */ - bool read_only; - /** @is_null: is NULL binding */ - bool is_null; - /** @is_cpu_addr_mirror: is CPU address mirror binding */ - bool is_cpu_addr_mirror; - /** @dumpable: whether BO is dumped on GPU hang */ - bool dumpable; - /** @invalidate: invalidate the VMA before bind */ bool invalidate_on_bind; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; @@ -476,6 +470,7 @@ struct xe_vma_ops { /** @flag: signify the properties within xe_vma_ops*/ #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) #define XE_VMA_OPS_FLAG_MADVISE BIT(1) +#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b44ebf50fedb..652df7a5f4f6 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -26,15 +26,35 @@ #define BAR_SIZE_SHIFT 20 -static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +/* + * Release all the BARs that could influence/block LMEMBAR resizing, i.e. + * assigned IORESOURCE_MEM_64 BARs + */ +static void release_bars(struct pci_dev *pdev) +{ + struct resource *res; + int i; + + pci_dev_for_each_resource(pdev, res, i) { + /* Resource already un-assigned, do not reset it */ + if (!res->parent) + continue; + + /* No need to release unrelated BARs */ + if (!(res->flags & IORESOURCE_MEM_64)) + continue; + + pci_release_resource(pdev, i); + } +} + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret; - if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); + release_bars(pdev); ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { @@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * return -ENXIO; } - resize_vram_bar(xe); - lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); if (!lmem_bar->io_size) diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index 72860f714fc6..13505cfb184d 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -11,6 +11,7 @@ struct xe_device; struct xe_vram_region; +void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe); struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index cd03891654a1..3cf30718b200 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -679,6 +679,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("14023061436"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute), OR, + GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, @@ -916,6 +918,15 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3003), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, + { XE_RTP_NAME("14024681466"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX)) + }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 5341aa79f387..04420a713be0 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -93,7 +93,7 @@ config HID_GENERIC If unsure, say Y. config HID_HAPTIC - tristate "Haptic touchpad support" + bool "Haptic touchpad support" default n help Support for touchpads with force sensors and haptic actuators instead of a diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 5a95ea3bec98..803b883ae875 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -689,7 +689,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, count = cp2112_write_read_req(buf, addr, read_length, command, NULL, 0); } else { - count = cp2112_write_req(buf, addr, command, + /* Copy starts from data->block[1] so the length can + * be at max I2C_SMBUS_CLOCK_MAX + 1 + */ + + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_req(buf, addr, command, data->block + 1, data->block[0]); } @@ -700,7 +707,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, I2C_SMBUS_BLOCK_MAX, command, NULL, 0); } else { - count = cp2112_write_req(buf, addr, command, + /* data_length here is data->block[0] + 1 + * so make sure that the data->block[0] is + * less than or equals I2C_SMBUS_BLOCK_MAX + 1 + */ + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_req(buf, addr, command, data->block, data->block[0] + 1); } @@ -709,7 +723,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, size = I2C_SMBUS_BLOCK_DATA; read_write = I2C_SMBUS_READ; - count = cp2112_write_read_req(buf, addr, I2C_SMBUS_BLOCK_MAX, + /* data_length is data->block[0] + 1, so + * so data->block[0] should be less than or + * equal to the I2C_SMBUS_BLOCK_MAX + 1 + */ + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_read_req(buf, addr, I2C_SMBUS_BLOCK_MAX, command, data->block, data->block[0] + 1); break; diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 7107071c7c51..337d2dc81b4c 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -2523,7 +2523,7 @@ static const struct hid_usage_entry hid_usage_table[] = { { 0x85, 0x0088, "iDeviceName" }, { 0x85, 0x0089, "iDeviceChemistry" }, { 0x85, 0x008a, "ManufacturerData" }, - { 0x85, 0x008b, "Rechargable" }, + { 0x85, 0x008b, "Rechargeable" }, { 0x85, 0x008c, "WarningCapacityLimit" }, { 0x85, 0x008d, "CapacityGranularity1" }, { 0x85, 0x008e, "CapacityGranularity2" }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5721b8414bbd..0723b4b1c9ec 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -342,6 +342,9 @@ #define USB_DEVICE_ID_CODEMERCS_IOW_FIRST 0x1500 #define USB_DEVICE_ID_CODEMERCS_IOW_LAST 0x15ff +#define USB_VENDOR_ID_COOLER_MASTER 0x2516 +#define USB_DEVICE_ID_COOLER_MASTER_MICE_DONGLE 0x01b7 + #define USB_VENDOR_ID_CORSAIR 0x1b1c #define USB_DEVICE_ID_CORSAIR_K90 0x1b02 #define USB_DEVICE_ID_CORSAIR_K70R 0x1b09 @@ -1432,6 +1435,7 @@ #define USB_VENDOR_ID_VRS 0x0483 #define USB_DEVICE_ID_VRS_DFP 0xa355 +#define USB_DEVICE_ID_VRS_R295 0xa44c #define USB_VENDOR_ID_VTL 0x0306 #define USB_DEVICE_ID_VTL_MULTITOUCH_FF3F 0xff3f diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 5d7532d79d21..e56e7de53279 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -635,7 +635,10 @@ static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, return; } - if (value == 0 || value < dev->battery_min || value > dev->battery_max) + if ((usage & HID_USAGE_PAGE) == HID_UP_DIGITIZER && value == 0) + return; + + if (value < dev->battery_min || value > dev->battery_max) return; capacity = hidinput_scale_battery_capacity(dev, value); diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index aaef405a717e..5e763de4b94f 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -75,6 +75,7 @@ MODULE_PARM_DESC(disable_tap_to_click, #define HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS BIT(27) #define HIDPP_QUIRK_HI_RES_SCROLL_1P0 BIT(28) #define HIDPP_QUIRK_WIRELESS_STATUS BIT(29) +#define HIDPP_QUIRK_RESET_HI_RES_SCROLL BIT(30) /* These are just aliases for now */ #define HIDPP_QUIRK_KBD_SCROLL_WHEEL HIDPP_QUIRK_HIDPP_WHEELS @@ -193,6 +194,7 @@ struct hidpp_device { void *private_data; struct work_struct work; + struct work_struct reset_hi_res_work; struct kfifo delayed_work_fifo; struct input_dev *delayed_input; @@ -3836,6 +3838,7 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, struct hidpp_report *answer = hidpp->send_receive_buf; struct hidpp_report *report = (struct hidpp_report *)data; int ret; + int last_online; /* * If the mutex is locked then we have a pending answer from a @@ -3877,6 +3880,7 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, "See: https://gitlab.freedesktop.org/jwrdegoede/logitech-27mhz-keyboard-encryption-setup/\n"); } + last_online = hidpp->battery.online; if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) { ret = hidpp20_battery_event_1000(hidpp, data, size); if (ret != 0) @@ -3901,6 +3905,11 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, return ret; } + if (hidpp->quirks & HIDPP_QUIRK_RESET_HI_RES_SCROLL) { + if (last_online == 0 && hidpp->battery.online == 1) + schedule_work(&hidpp->reset_hi_res_work); + } + if (hidpp->quirks & HIDPP_QUIRK_HIDPP_WHEELS) { ret = hidpp10_wheel_raw_event(hidpp, data, size); if (ret != 0) @@ -4274,6 +4283,13 @@ static void hidpp_connect_event(struct work_struct *work) hidpp->delayed_input = input; } +static void hidpp_reset_hi_res_handler(struct work_struct *work) +{ + struct hidpp_device *hidpp = container_of(work, struct hidpp_device, reset_hi_res_work); + + hi_res_scroll_enable(hidpp); +} + static DEVICE_ATTR(builtin_power_supply, 0000, NULL, NULL); static struct attribute *sysfs_attrs[] = { @@ -4404,6 +4420,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) } INIT_WORK(&hidpp->work, hidpp_connect_event); + INIT_WORK(&hidpp->reset_hi_res_work, hidpp_reset_hi_res_handler); mutex_init(&hidpp->send_mutex); init_waitqueue_head(&hidpp->wait); @@ -4499,6 +4516,7 @@ static void hidpp_remove(struct hid_device *hdev) hid_hw_stop(hdev); cancel_work_sync(&hidpp->work); + cancel_work_sync(&hidpp->reset_hi_res_work); mutex_destroy(&hidpp->send_mutex); } @@ -4546,6 +4564,9 @@ static const struct hid_device_id hidpp_devices[] = { { /* Keyboard MX5500 (Bluetooth-receiver in HID proxy mode) */ LDJ_DEVICE(0xb30b), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, + { /* Logitech G502 Lightspeed Wireless Gaming Mouse */ + LDJ_DEVICE(0x407f), + .driver_data = HIDPP_QUIRK_RESET_HI_RES_SCROLL }, { LDJ_DEVICE(HID_ANY_ID) }, diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 2879e65cf303..179dc316b4b5 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -94,9 +94,8 @@ enum report_mode { TOUCHPAD_REPORT_ALL = TOUCHPAD_REPORT_BUTTONS | TOUCHPAD_REPORT_CONTACTS, }; -#define MT_IO_FLAGS_RUNNING 0 -#define MT_IO_FLAGS_ACTIVE_SLOTS 1 -#define MT_IO_FLAGS_PENDING_SLOTS 2 +#define MT_IO_SLOTS_MASK GENMASK(7, 0) /* reserve first 8 bits for slot tracking */ +#define MT_IO_FLAGS_RUNNING 32 static const bool mtrue = true; /* default for true */ static const bool mfalse; /* default for false */ @@ -172,7 +171,11 @@ struct mt_device { struct timer_list release_timer; /* to release sticky fingers */ struct hid_haptic_device *haptic; /* haptic related configuration */ struct hid_device *hdev; /* hid_device we're attached to */ - unsigned long mt_io_flags; /* mt flags (MT_IO_FLAGS_*) */ + unsigned long mt_io_flags; /* mt flags (MT_IO_FLAGS_RUNNING) + * first 8 bits are reserved for keeping the slot + * states, this is fine because we only support up + * to 250 slots (MT_MAX_MAXCONTACT) + */ __u8 inputmode_value; /* InputMode HID feature value */ __u8 maxcontacts; bool is_buttonpad; /* is this device a button pad? */ @@ -986,6 +989,7 @@ static void mt_release_pending_palms(struct mt_device *td, for_each_set_bit(slotnum, app->pending_palm_slots, td->maxcontacts) { clear_bit(slotnum, app->pending_palm_slots); + clear_bit(slotnum, &td->mt_io_flags); input_mt_slot(input, slotnum); input_mt_report_slot_inactive(input); @@ -1019,12 +1023,6 @@ static void mt_sync_frame(struct mt_device *td, struct mt_application *app, app->left_button_state = 0; if (td->is_haptic_touchpad) hid_haptic_pressure_reset(td->haptic); - - if (test_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags)) - set_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags); - else - clear_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags); - clear_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags); } static int mt_compute_timestamp(struct mt_application *app, __s32 value) @@ -1202,7 +1200,9 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input, input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major); input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor); - set_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags); + set_bit(slotnum, &td->mt_io_flags); + } else { + clear_bit(slotnum, &td->mt_io_flags); } return 0; @@ -1337,7 +1337,7 @@ static void mt_touch_report(struct hid_device *hid, * defect. */ if (app->quirks & MT_QUIRK_STICKY_FINGERS) { - if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) + if (td->mt_io_flags & MT_IO_SLOTS_MASK) mod_timer(&td->release_timer, jiffies + msecs_to_jiffies(100)); else @@ -1742,6 +1742,7 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) case HID_CP_CONSUMER_CONTROL: case HID_GD_WIRELESS_RADIO_CTLS: case HID_GD_SYSTEM_MULTIAXIS: + case HID_DG_PEN: /* already handled by hid core */ break; case HID_DG_TOUCHSCREEN: @@ -1813,6 +1814,7 @@ static void mt_release_contacts(struct hid_device *hid) for (i = 0; i < mt->num_slots; i++) { input_mt_slot(input_dev, i); input_mt_report_slot_inactive(input_dev); + clear_bit(i, &td->mt_io_flags); } input_mt_sync_frame(input_dev); input_sync(input_dev); @@ -1835,7 +1837,7 @@ static void mt_expired_timeout(struct timer_list *t) */ if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) return; - if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) + if (td->mt_io_flags & MT_IO_SLOTS_MASK) mt_release_contacts(hdev); clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); } diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index fb4985988615..c2849a541f65 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -1455,10 +1455,10 @@ static void joycon_parse_imu_report(struct joycon_ctlr *ctlr, ctlr->imu_avg_delta_ms; ctlr->imu_timestamp_us += 1000 * ctlr->imu_avg_delta_ms; if (dropped_pkts > JC_IMU_DROPPED_PKT_WARNING) { - hid_warn(ctlr->hdev, + hid_warn_ratelimited(ctlr->hdev, "compensating for %u dropped IMU reports\n", dropped_pkts); - hid_warn(ctlr->hdev, + hid_warn_ratelimited(ctlr->hdev, "delta=%u avg_delta=%u\n", delta, ctlr->imu_avg_delta_ms); } @@ -2420,7 +2420,7 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) struct joycon_input_report *report; req.subcmd_id = JC_SUBCMD_REQ_DEV_INFO; - ret = joycon_send_subcmd(ctlr, &req, 0, HZ); + ret = joycon_send_subcmd(ctlr, &req, 0, 2 * HZ); if (ret) { hid_err(ctlr->hdev, "Failed to get joycon info; ret=%d\n", ret); return ret; diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index ffd034566e2e..bcd4bccf1a7c 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -57,6 +57,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, + { HID_USB_DEVICE(USB_VENDOR_ID_COOLER_MASTER, USB_DEVICE_ID_COOLER_MASTER_MICE_DONGLE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, @@ -206,6 +207,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_WP5540), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_VRS, USB_DEVICE_ID_VRS_R295), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT }, diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c index 8433a991e7f4..0156ab391778 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c @@ -466,7 +466,7 @@ static void quicki2c_dma_adv_enable(struct quicki2c_device *qcdev) dev_warn(qcdev->dev, "Max frame size is smaller than hid max input length!"); thc_i2c_set_rx_max_size(qcdev->thc_hw, - le16_to_cpu(qcdev->i2c_max_frame_size)); + qcdev->i2c_max_frame_size); } thc_i2c_rx_max_size_enable(qcdev->thc_hw, true); } diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c index 84314989dc53..14cabd5dc6dd 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c @@ -33,6 +33,10 @@ struct quickspi_driver_data ptl = { .max_packet_size_value = MAX_PACKET_SIZE_VALUE_LNL, }; +struct quickspi_driver_data arl = { + .max_packet_size_value = MAX_PACKET_SIZE_VALUE_MTL, +}; + /* THC QuickSPI ACPI method to get device properties */ /* HIDSPI Method: {6e2ac436-0fcf-41af-a265-b32a220dcfab} */ static guid_t hidspi_guid = @@ -978,6 +982,8 @@ static const struct pci_device_id quickspi_pci_tbl[] = { {PCI_DEVICE_DATA(INTEL, THC_PTL_U_DEVICE_ID_SPI_PORT2, &ptl), }, {PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_SPI_PORT1, &ptl), }, {PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_SPI_PORT2, &ptl), }, + {PCI_DEVICE_DATA(INTEL, THC_ARL_DEVICE_ID_SPI_PORT1, &arl), }, + {PCI_DEVICE_DATA(INTEL, THC_ARL_DEVICE_ID_SPI_PORT2, &arl), }, {} }; MODULE_DEVICE_TABLE(pci, quickspi_pci_tbl); diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h index f3532d866749..c30e1a42eb09 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h @@ -21,6 +21,8 @@ #define PCI_DEVICE_ID_INTEL_THC_PTL_U_DEVICE_ID_SPI_PORT2 0xE44B #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_SPI_PORT1 0x4D49 #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_SPI_PORT2 0x4D4B +#define PCI_DEVICE_ID_INTEL_THC_ARL_DEVICE_ID_SPI_PORT1 0x7749 +#define PCI_DEVICE_ID_INTEL_THC_ARL_DEVICE_ID_SPI_PORT2 0x774B /* HIDSPI special ACPI parameters DSM methods */ #define ACPI_QUICKSPI_REVISION_NUM 2 diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c index e6ba2ddcc9cb..16f780bc879b 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c @@ -280,8 +280,7 @@ int reset_tic(struct quickspi_device *qsdev) qsdev->reset_ack = false; - /* First interrupt uses level trigger to avoid missing interrupt */ - thc_int_trigger_type_select(qsdev->thc_hw, false); + thc_int_trigger_type_select(qsdev->thc_hw, true); ret = acpi_tic_reset(qsdev); if (ret) diff --git a/drivers/hwmon/cgbc-hwmon.c b/drivers/hwmon/cgbc-hwmon.c index 772f44d56ccf..3aff4e092132 100644 --- a/drivers/hwmon/cgbc-hwmon.c +++ b/drivers/hwmon/cgbc-hwmon.c @@ -107,6 +107,9 @@ static int cgbc_hwmon_probe_sensors(struct device *dev, struct cgbc_hwmon_data * nb_sensors = data[0]; hwmon->sensors = devm_kzalloc(dev, sizeof(*hwmon->sensors) * nb_sensors, GFP_KERNEL); + if (!hwmon->sensors) + return -ENOMEM; + sensor = hwmon->sensors; for (i = 0; i < nb_sensors; i++) { diff --git a/drivers/hwmon/gpd-fan.c b/drivers/hwmon/gpd-fan.c index 644dc3ca9df7..f81c3bc422f4 100644 --- a/drivers/hwmon/gpd-fan.c +++ b/drivers/hwmon/gpd-fan.c @@ -12,9 +12,9 @@ * Copyright (c) 2024 Cryolitia PukNgae */ -#include <linux/acpi.h> #include <linux/dmi.h> #include <linux/hwmon.h> +#include <linux/io.h> #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/module.h> @@ -276,31 +276,6 @@ static int gpd_generic_read_rpm(void) return (u16)high << 8 | low; } -static void gpd_win4_init_ec(void) -{ - u8 chip_id, chip_ver; - - gpd_ecram_read(0x2000, &chip_id); - - if (chip_id == 0x55) { - gpd_ecram_read(0x1060, &chip_ver); - gpd_ecram_write(0x1060, chip_ver | 0x80); - } -} - -static int gpd_win4_read_rpm(void) -{ - int ret; - - ret = gpd_generic_read_rpm(); - - if (ret == 0) - // Re-init EC when speed is 0 - gpd_win4_init_ec(); - - return ret; -} - static int gpd_wm2_read_rpm(void) { for (u16 pwm_ctr_offset = GPD_PWM_CTR_OFFSET; @@ -320,11 +295,10 @@ static int gpd_wm2_read_rpm(void) static int gpd_read_rpm(void) { switch (gpd_driver_priv.drvdata->board) { + case win4_6800u: case win_mini: case duo: return gpd_generic_read_rpm(); - case win4_6800u: - return gpd_win4_read_rpm(); case win_max_2: return gpd_wm2_read_rpm(); } @@ -607,6 +581,28 @@ static struct hwmon_chip_info gpd_fan_chip_info = { .info = gpd_fan_hwmon_channel_info }; +static void gpd_win4_init_ec(void) +{ + u8 chip_id, chip_ver; + + gpd_ecram_read(0x2000, &chip_id); + + if (chip_id == 0x55) { + gpd_ecram_read(0x1060, &chip_ver); + gpd_ecram_write(0x1060, chip_ver | 0x80); + } +} + +static void gpd_init_ec(void) +{ + // The buggy firmware won't initialize EC properly on boot. + // Before its initialization, reading RPM will always return 0, + // and writing PWM will have no effect. + // Initialize it manually on driver load. + if (gpd_driver_priv.drvdata->board == win4_6800u) + gpd_win4_init_ec(); +} + static int gpd_fan_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -615,14 +611,14 @@ static int gpd_fan_probe(struct platform_device *pdev) const struct device *hwdev; res = platform_get_resource(pdev, IORESOURCE_IO, 0); - if (IS_ERR(res)) - return dev_err_probe(dev, PTR_ERR(res), + if (!res) + return dev_err_probe(dev, -EINVAL, "Failed to get platform resource\n"); region = devm_request_region(dev, res->start, resource_size(res), DRIVER_NAME); - if (IS_ERR(region)) - return dev_err_probe(dev, PTR_ERR(region), + if (!region) + return dev_err_probe(dev, -EBUSY, "Failed to request region\n"); hwdev = devm_hwmon_device_register_with_info(dev, @@ -631,9 +627,11 @@ static int gpd_fan_probe(struct platform_device *pdev) &gpd_fan_chip_info, NULL); if (IS_ERR(hwdev)) - return dev_err_probe(dev, PTR_ERR(region), + return dev_err_probe(dev, PTR_ERR(hwdev), "Failed to register hwmon device\n"); + gpd_init_ec(); + return 0; } diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c index 52cf62e45a86..6bba9b50c51b 100644 --- a/drivers/hwmon/pmbus/isl68137.c +++ b/drivers/hwmon/pmbus/isl68137.c @@ -336,10 +336,9 @@ static int isl68137_probe_from_dt(struct device *dev, struct isl68137_data *data) { const struct device_node *np = dev->of_node; - struct device_node *child; int err; - for_each_child_of_node(np, child) { + for_each_child_of_node_scoped(np, child) { if (strcmp(child->name, "channel")) continue; diff --git a/drivers/hwmon/pmbus/max34440.c b/drivers/hwmon/pmbus/max34440.c index 56834d26f8ef..ef981ed97da8 100644 --- a/drivers/hwmon/pmbus/max34440.c +++ b/drivers/hwmon/pmbus/max34440.c @@ -336,18 +336,18 @@ static struct pmbus_driver_info max34440_info[] = { .format[PSC_CURRENT_IN] = direct, .format[PSC_CURRENT_OUT] = direct, .format[PSC_TEMPERATURE] = direct, - .m[PSC_VOLTAGE_IN] = 1, + .m[PSC_VOLTAGE_IN] = 125, .b[PSC_VOLTAGE_IN] = 0, .R[PSC_VOLTAGE_IN] = 0, - .m[PSC_VOLTAGE_OUT] = 1, + .m[PSC_VOLTAGE_OUT] = 125, .b[PSC_VOLTAGE_OUT] = 0, .R[PSC_VOLTAGE_OUT] = 0, - .m[PSC_CURRENT_IN] = 1, + .m[PSC_CURRENT_IN] = 250, .b[PSC_CURRENT_IN] = 0, - .R[PSC_CURRENT_IN] = 2, - .m[PSC_CURRENT_OUT] = 1, + .R[PSC_CURRENT_IN] = -1, + .m[PSC_CURRENT_OUT] = 250, .b[PSC_CURRENT_OUT] = 0, - .R[PSC_CURRENT_OUT] = 2, + .R[PSC_CURRENT_OUT] = -1, .m[PSC_TEMPERATURE] = 1, .b[PSC_TEMPERATURE] = 0, .R[PSC_TEMPERATURE] = 2, diff --git a/drivers/hwmon/sht3x.c b/drivers/hwmon/sht3x.c index 557ad3e7752a..f36c0229328f 100644 --- a/drivers/hwmon/sht3x.c +++ b/drivers/hwmon/sht3x.c @@ -291,24 +291,26 @@ out: return data; } -static int temp1_input_read(struct device *dev) +static int temp1_input_read(struct device *dev, long *temp) { struct sht3x_data *data = sht3x_update_client(dev); if (IS_ERR(data)) return PTR_ERR(data); - return data->temperature; + *temp = data->temperature; + return 0; } -static int humidity1_input_read(struct device *dev) +static int humidity1_input_read(struct device *dev, long *humidity) { struct sht3x_data *data = sht3x_update_client(dev); if (IS_ERR(data)) return PTR_ERR(data); - return data->humidity; + *humidity = data->humidity; + return 0; } /* @@ -706,6 +708,7 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { enum sht3x_limits index; + int ret; switch (type) { case hwmon_chip: @@ -720,10 +723,12 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_temp: switch (attr) { case hwmon_temp_input: - *val = temp1_input_read(dev); - break; + return temp1_input_read(dev, val); case hwmon_temp_alarm: - *val = temp1_alarm_read(dev); + ret = temp1_alarm_read(dev); + if (ret < 0) + return ret; + *val = ret; break; case hwmon_temp_max: index = limit_max; @@ -748,10 +753,12 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_humidity: switch (attr) { case hwmon_humidity_input: - *val = humidity1_input_read(dev); - break; + return humidity1_input_read(dev, val); case hwmon_humidity_alarm: - *val = humidity1_alarm_read(dev); + ret = humidity1_alarm_read(dev); + if (ret < 0) + return ret; + *val = ret; break; case hwmon_humidity_max: index = limit_max; diff --git a/drivers/i2c/busses/i2c-amd-mp2.h b/drivers/i2c/busses/i2c-amd-mp2.h index 018a42de8b1e..9b7e9494dd12 100644 --- a/drivers/i2c/busses/i2c-amd-mp2.h +++ b/drivers/i2c/busses/i2c-amd-mp2.h @@ -207,7 +207,6 @@ static inline void amd_mp2_pm_runtime_get(struct amd_mp2_dev *mp2_dev) static inline void amd_mp2_pm_runtime_put(struct amd_mp2_dev *mp2_dev) { - pm_runtime_mark_last_busy(&mp2_dev->pci_dev->dev); pm_runtime_put_autosuspend(&mp2_dev->pci_dev->dev); } diff --git a/drivers/i2c/busses/i2c-at91-core.c b/drivers/i2c/busses/i2c-at91-core.c index edc047e3e535..b64adef778d4 100644 --- a/drivers/i2c/busses/i2c-at91-core.c +++ b/drivers/i2c/busses/i2c-at91-core.c @@ -313,7 +313,6 @@ static int __maybe_unused at91_twi_resume_noirq(struct device *dev) return ret; } - pm_runtime_mark_last_busy(dev); pm_request_autosuspend(dev); at91_init_twi_bus(twi_dev); diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 59795c1c24ff..894cedbca99f 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -717,7 +717,6 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) ret = (ret < 0) ? ret : num; out: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 697d095afbe4..0fb728ade92e 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1128,7 +1128,6 @@ out: cdns_i2c_set_mode(CDNS_I2C_MODE_SLAVE, id); #endif - pm_runtime_mark_last_busy(id->dev); pm_runtime_put_autosuspend(id->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 6a3d4e9e07f4..a773ba082321 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -543,7 +543,6 @@ i2c_davinci_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ret = num; out: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; @@ -821,7 +820,6 @@ static int davinci_i2c_probe(struct platform_device *pdev) if (r) goto err_unuse_clocks; - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index c7a72c28786c..41e9b5ecad20 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -901,7 +901,6 @@ done: i2c_dw_release_lock(dev); done_nolock: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c index 5358f5ddf924..95ab910b80c0 100644 --- a/drivers/i2c/busses/i2c-hix5hd2.c +++ b/drivers/i2c/busses/i2c-hix5hd2.c @@ -373,7 +373,6 @@ static int hix5hd2_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_autosuspend(priv->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index cba992fa6557..57fbec1259be 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -930,7 +930,6 @@ out: */ iowrite8(SMBHSTSTS_INUSE_STS | STATUS_FLAGS, SMBHSTSTS(priv)); - pm_runtime_mark_last_busy(&priv->pci_dev->dev); pm_runtime_put_autosuspend(&priv->pci_dev->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index a454f9f25146..88192c25c44c 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -1131,7 +1131,6 @@ static int img_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, break; } - pm_runtime_mark_last_busy(adap->dev.parent); pm_runtime_put_autosuspend(adap->dev.parent); return i2c->msg_status ? i2c->msg_status : num; @@ -1165,7 +1164,6 @@ static int img_i2c_init(struct img_i2c *i2c) "Unknown hardware revision (%d.%d.%d.%d)\n", (rev >> 24) & 0xff, (rev >> 16) & 0xff, (rev >> 8) & 0xff, rev & 0xff); - pm_runtime_mark_last_busy(i2c->adap.dev.parent); pm_runtime_put_autosuspend(i2c->adap.dev.parent); return -EINVAL; } @@ -1317,7 +1315,6 @@ static int img_i2c_init(struct img_i2c *i2c) /* Perform a synchronous sequence to reset the bus */ ret = img_i2c_reset_bus(i2c); - pm_runtime_mark_last_busy(i2c->adap.dev.parent); pm_runtime_put_autosuspend(i2c->adap.dev.parent); return ret; diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 03b5a7e8c361..2a0962a0b441 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -363,7 +363,6 @@ static int lpi2c_imx_master_enable(struct lpi2c_imx_struct *lpi2c_imx) return 0; rpm_put: - pm_runtime_mark_last_busy(lpi2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(lpi2c_imx->adapter.dev.parent); return ret; @@ -377,7 +376,6 @@ static int lpi2c_imx_master_disable(struct lpi2c_imx_struct *lpi2c_imx) temp &= ~MCR_MEN; writel(temp, lpi2c_imx->base + LPI2C_MCR); - pm_runtime_mark_last_busy(lpi2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(lpi2c_imx->adapter.dev.parent); return 0; @@ -1462,7 +1460,6 @@ static int lpi2c_imx_probe(struct platform_device *pdev) if (ret) goto rpm_disable; - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); dev_info(&lpi2c_imx->adapter.dev, "LPI2C adapter registered\n"); @@ -1564,7 +1561,6 @@ static int lpi2c_suspend(struct device *dev) static int lpi2c_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 60f5c790ad7c..dcce882f3eba 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1637,7 +1637,6 @@ static int i2c_imx_xfer(struct i2c_adapter *adapter, result = i2c_imx_xfer_common(adapter, msgs, num, false); - pm_runtime_mark_last_busy(i2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(i2c_imx->adapter.dev.parent); return result; @@ -1822,7 +1821,6 @@ static int i2c_imx_probe(struct platform_device *pdev) if (ret < 0) goto clk_notifier_unregister; - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); dev_dbg(&i2c_imx->adapter.dev, "claimed irq %d\n", irq); @@ -1928,7 +1926,6 @@ static int i2c_imx_suspend(struct device *dev) static int i2c_imx_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 8fc26a511320..1acba628e16c 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -766,7 +766,6 @@ mv64xxx_i2c_xfer_core(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) drv_data->num_msgs = 0; drv_data->msgs = NULL; - pm_runtime_mark_last_busy(&adap->dev); pm_runtime_put_autosuspend(&adap->dev); return ret; diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c index 541d808d62d0..14c059b03945 100644 --- a/drivers/i2c/busses/i2c-nvidia-gpu.c +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -216,7 +216,6 @@ exit: if (status2 < 0) dev_err(i2cd->dev, "i2c stop failed %d\n", status2); } - pm_runtime_mark_last_busy(i2cd->dev); pm_runtime_put_autosuspend(i2cd->dev); return status; } diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 5fcc9f6c33e5..d9f590f0c384 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -828,7 +828,6 @@ omap_i2c_xfer_common(struct i2c_adapter *adap, struct i2c_msg msgs[], int num, omap->set_mpu_wkup_lat(omap->dev, -1); out: - pm_runtime_mark_last_busy(omap->dev); pm_runtime_put_autosuspend(omap->dev); return r; } @@ -1510,7 +1509,6 @@ omap_i2c_probe(struct platform_device *pdev) dev_info(omap->dev, "bus %d rev%d.%d at %d kHz\n", adap->nr, major, minor, omap->speed); - pm_runtime_mark_last_busy(omap->dev); pm_runtime_put_autosuspend(omap->dev); return 0; @@ -1605,7 +1603,6 @@ static int omap_i2c_suspend(struct device *dev) static int omap_i2c_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index a3afa11a71a1..e631d79baf14 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -450,7 +450,6 @@ static int cci_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ret = num; err: - pm_runtime_mark_last_busy(cci->dev); pm_runtime_put_autosuspend(cci->dev); return ret; @@ -508,7 +507,6 @@ static int __maybe_unused cci_suspend(struct device *dev) static int __maybe_unused cci_resume(struct device *dev) { cci_resume_runtime(dev); - pm_runtime_mark_last_busy(dev); pm_request_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 95a577764d5c..43fdd89b8beb 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -714,7 +714,6 @@ static int geni_i2c_xfer(struct i2c_adapter *adap, else ret = geni_i2c_fifo_xfer(gi2c, msgs, num); - pm_runtime_mark_last_busy(gi2c->se.dev); pm_runtime_put_autosuspend(gi2c->se.dev); gi2c->cur = NULL; gi2c->err = 0; diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index fc348924d522..a0e076fc5f36 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1139,7 +1139,6 @@ static int qup_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(qup->dev); pm_runtime_put_autosuspend(qup->dev); return ret; @@ -1624,7 +1623,6 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap, if (ret == 0) ret = num; out: - pm_runtime_mark_last_busy(qup->dev); pm_runtime_put_autosuspend(qup->dev); return ret; @@ -1991,7 +1989,6 @@ static int qup_i2c_suspend(struct device *device) static int qup_i2c_resume(struct device *device) { qup_i2c_pm_resume_runtime(device); - pm_runtime_mark_last_busy(device); pm_request_autosuspend(device); return 0; } diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index b0ee9ac45a97..3e8f126cb7f7 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -206,7 +206,6 @@ static int riic_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) } out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return riic->err ?: num; @@ -452,7 +451,6 @@ static int riic_init_hw(struct riic_dev *riic) riic_clear_set_bit(riic, ICCR1_IICRST, 0, RIIC_ICCR1); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; } diff --git a/drivers/i2c/busses/i2c-rzv2m.c b/drivers/i2c/busses/i2c-rzv2m.c index b0e9c0b62429..238714850673 100644 --- a/drivers/i2c/busses/i2c-rzv2m.c +++ b/drivers/i2c/busses/i2c-rzv2m.c @@ -372,7 +372,6 @@ static int rzv2m_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 26ec34b19ad5..1b490525d8dd 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -302,7 +302,6 @@ static int sprd_i2c_xfer(struct i2c_adapter *i2c_adap, ret = sprd_i2c_handle_msg(i2c_adap, &msgs[im++], 1); err_msg: - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return ret < 0 ? ret : im; @@ -559,7 +558,6 @@ static int sprd_i2c_probe(struct platform_device *pdev) goto err_rpm_put; } - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index e6815f6cae78..dc69ed934ec8 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1761,7 +1761,6 @@ static int stm32f7_i2c_xfer_core(struct i2c_adapter *i2c_adap, } pm_free: - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return (ret < 0) ? ret : num; @@ -1870,7 +1869,6 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, } pm_free: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; } @@ -1977,7 +1975,6 @@ pm_free: if (!stm32f7_i2c_is_slave_registered(i2c_dev)) stm32f7_i2c_enable_wakeup(i2c_dev, false); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2015,7 +2012,6 @@ static int stm32f7_i2c_unreg_slave(struct i2c_client *slave) stm32f7_i2c_enable_wakeup(i2c_dev, false); } - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; @@ -2328,7 +2324,6 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) dev_info(i2c_dev->dev, "STM32F7 I2C-%d bus adapter\n", adap->nr); - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-usbio.c b/drivers/i2c/busses/i2c-usbio.c index d42f9ab6e9a5..e7799abf6787 100644 --- a/drivers/i2c/busses/i2c-usbio.c +++ b/drivers/i2c/busses/i2c-usbio.c @@ -27,6 +27,7 @@ static const struct acpi_device_id usbio_i2c_acpi_hids[] = { { "INTC1008" }, /* MTL */ { "INTC10B3" }, /* ARL */ { "INTC10B6" }, /* LNL */ + { "INTC10D2" }, /* MTL-CVF */ { "INTC10E3" }, /* PTL */ { } }; diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 607026c921d6..28015d77599d 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -1349,7 +1349,6 @@ static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) mutex_unlock(&i2c->lock); out: - pm_runtime_mark_last_busy(i2c->dev); pm_runtime_put_autosuspend(i2c->dev); return err; } diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 75c8d08fa24e..b9f370c9f018 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -118,6 +118,7 @@ struct pca954x { raw_spinlock_t lock; struct regulator *supply; + struct gpio_desc *reset_gpio; struct reset_control *reset_cont; }; @@ -315,25 +316,6 @@ static u8 pca954x_regval(struct pca954x *data, u8 chan) return 1 << chan; } -static void pca954x_reset_assert(struct pca954x *data) -{ - if (data->reset_cont) - reset_control_assert(data->reset_cont); -} - -static void pca954x_reset_deassert(struct pca954x *data) -{ - if (data->reset_cont) - reset_control_deassert(data->reset_cont); -} - -static void pca954x_reset_mux(struct pca954x *data) -{ - pca954x_reset_assert(data); - udelay(1); - pca954x_reset_deassert(data); -} - static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) { struct pca954x *data = i2c_mux_priv(muxc); @@ -347,8 +329,6 @@ static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) ret = pca954x_reg_write(muxc->parent, client, regval); data->last_chan = ret < 0 ? 0 : regval; } - if (ret == -ETIMEDOUT && data->reset_cont) - pca954x_reset_mux(data); return ret; } @@ -358,7 +338,6 @@ static int pca954x_deselect_mux(struct i2c_mux_core *muxc, u32 chan) struct pca954x *data = i2c_mux_priv(muxc); struct i2c_client *client = data->client; s32 idle_state; - int ret = 0; idle_state = READ_ONCE(data->idle_state); if (idle_state >= 0) @@ -368,10 +347,8 @@ static int pca954x_deselect_mux(struct i2c_mux_core *muxc, u32 chan) if (idle_state == MUX_IDLE_DISCONNECT) { /* Deselect active channel */ data->last_chan = 0; - ret = pca954x_reg_write(muxc->parent, client, - data->last_chan); - if (ret == -ETIMEDOUT && data->reset_cont) - pca954x_reset_mux(data); + return pca954x_reg_write(muxc->parent, client, + data->last_chan); } /* otherwise leave as-is */ @@ -550,10 +527,29 @@ static int pca954x_get_reset(struct device *dev, struct pca954x *data) if (IS_ERR(data->reset_cont)) return dev_err_probe(dev, PTR_ERR(data->reset_cont), "Failed to get reset\n"); + else if (data->reset_cont) + return 0; + + /* + * fallback to legacy reset-gpios + */ + data->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(data->reset_gpio)) { + return dev_err_probe(dev, PTR_ERR(data->reset_gpio), + "Failed to get reset gpio"); + } return 0; } +static void pca954x_reset_deassert(struct pca954x *data) +{ + if (data->reset_cont) + reset_control_deassert(data->reset_cont); + else + gpiod_set_value_cansleep(data->reset_gpio, 0); +} + /* * I2C init/probing/exit functions */ @@ -593,7 +589,7 @@ static int pca954x_probe(struct i2c_client *client) if (ret) goto fail_cleanup; - if (data->reset_cont) { + if (data->reset_cont || data->reset_gpio) { udelay(1); pca954x_reset_deassert(data); /* Give the chip some time to recover. */ diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 37cd37556510..fab5d914029d 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -206,6 +206,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( return ret; err_free: + ib_umem_release(umem); rdma_restrack_put(&cq->res); kfree(cq); err_event_file: diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4dab5ca7362b..84ce3fce2826 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -913,7 +913,7 @@ void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, spin_unlock_irqrestore(&qp->scq->cq_lock, flags); } -static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) +static void bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) { struct bnxt_re_qp *gsi_sqp; struct bnxt_re_ah *gsi_sah; @@ -933,10 +933,9 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &gsi_sqp->qplib_qp); - if (rc) { + if (rc) ibdev_err(&rdev->ibdev, "Destroy Shadow QP failed"); - goto fail; - } + bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp); /* remove from active qp list */ @@ -951,10 +950,6 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) rdev->gsi_ctx.gsi_sqp = NULL; rdev->gsi_ctx.gsi_sah = NULL; rdev->gsi_ctx.sqp_tbl = NULL; - - return 0; -fail: - return rc; } static void bnxt_re_del_unique_gid(struct bnxt_re_dev *rdev) diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index d9a12681f843..22d3e25c3b9d 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1216,13 +1216,13 @@ int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (umem->length < cq->size) { ibdev_dbg(&dev->ibdev, "External memory too small\n"); err = -EINVAL; - goto err_free_mem; + goto err_out; } if (!ib_umem_is_contiguous(umem)) { ibdev_dbg(&dev->ibdev, "Non contiguous CQ unsupported\n"); err = -EINVAL; - goto err_free_mem; + goto err_out; } cq->cpu_addr = NULL; @@ -1251,7 +1251,7 @@ int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, err = efa_com_create_cq(&dev->edev, ¶ms, &result); if (err) - goto err_free_mem; + goto err_free_mapped; resp.db_off = result.db_off; resp.cq_idx = result.cq_idx; @@ -1299,12 +1299,10 @@ err_remove_mmap: efa_cq_user_mmap_entries_remove(cq); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); -err_free_mem: - if (umem) - ib_umem_release(umem); - else - efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - +err_free_mapped: + if (!umem) + efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); err_out: atomic64_inc(&dev->stats.create_cq_err); return err; diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 3a5c93c9fb3e..6aa82fe9dd3d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include <linux/pci.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> #include "hns_roce_device.h" @@ -37,6 +38,43 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + cq_table->ctx_num[uctx->cq_bank_id]--; + mutex_unlock(&cq_table->bank_mutex); +} + +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + u32 least_load = cq_table->ctx_num[0]; + u8 bankid = 0; + u8 i; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (cq_table->ctx_num[i] < least_load) { + least_load = cq_table->ctx_num[i]; + bankid = i; + } + } + cq_table->ctx_num[bankid]++; + mutex_unlock(&cq_table->bank_mutex); + + uctx->cq_bank_id = bankid; +} + static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) { u32 least_load = bank[0].inuse; @@ -55,7 +93,21 @@ static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) return bankid; } -static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, + struct hns_roce_bank *bank, struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = udata ? + rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext) : NULL; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return uctx ? uctx->cq_bank_id : 0; + + return get_least_load_bankid_for_cq(bank); +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct hns_roce_bank *bank; @@ -63,7 +115,7 @@ static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) int id; mutex_lock(&cq_table->bank_mutex); - bankid = get_least_load_bankid_for_cq(cq_table->bank); + bankid = select_cq_bankid(hr_dev, cq_table->bank, udata); bank = &cq_table->bank[bankid]; id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); @@ -396,7 +448,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; } - ret = alloc_cqn(hr_dev, hr_cq); + ret = alloc_cqn(hr_dev, hr_cq, udata); if (ret) { ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); goto err_cq_db; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 78ee04a48a74..06832c0ac055 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -217,6 +217,7 @@ struct hns_roce_ucontext { struct mutex page_mutex; struct hns_user_mmap_entry *db_mmap_entry; u32 config; + u8 cq_bank_id; }; struct hns_roce_pd { @@ -495,6 +496,7 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; + u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; }; struct hns_roce_srq_table { @@ -1305,5 +1307,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, enum hns_roce_mmap_type mmap_type); bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl); +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f82bdd46a917..63052c0e7613 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -165,6 +165,8 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, hr_reg_write(fseg, FRMR_PBL_BUF_PG_SZ, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); hr_reg_clear(fseg, FRMR_BLK_MODE); + hr_reg_clear(fseg, FRMR_BLOCK_SIZE); + hr_reg_clear(fseg, FRMR_ZBVA); } static void set_atomic_seg(const struct ib_send_wr *wr, @@ -339,9 +341,6 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, int j = 0; int i; - hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE, !!(wr->send_flags & IB_SEND_INLINE)); if (wr->send_flags & IB_SEND_INLINE) @@ -586,6 +585,9 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, + curr_idx & (qp->sge.sge_cnt - 1)); + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { if (msg_len != ATOMIC_WR_LEN) @@ -734,6 +736,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + /* RC and UD share the same DirectWQE field layout */ + ((struct hns_roce_v2_rc_send_wqe *)wqe)->byte_4 = 0; + /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); @@ -7048,7 +7053,6 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_roce_init; } - handle->priv = hr_dev; return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d50f36f8a110..f3607fe107a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -425,6 +425,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, if (ret) goto error_fail_copy_to_udata; + hns_roce_get_cq_bankid_for_uctx(context); + return 0; error_fail_copy_to_udata: @@ -447,6 +449,8 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device); + hns_roce_put_cq_bankid_for_uctx(context); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&context->page_mutex); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6ff1b8ce580c..bdd879ac12dd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -662,7 +662,6 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; - cap->max_send_sge = hr_qp->sq.max_gs; return 0; } @@ -744,7 +743,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; - cap->max_send_sge = hr_qp->sq.max_gs; return 0; } diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c index 3091f9345f12..fa6325adaede 100644 --- a/drivers/infiniband/hw/irdma/pble.c +++ b/drivers/infiniband/hw/irdma/pble.c @@ -71,7 +71,7 @@ int irdma_hmc_init_pble(struct irdma_sc_dev *dev, static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) { - idx->sd_idx = (u32)pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE; + idx->sd_idx = pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE; idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr / IRDMA_HMC_PAGED_BP_SIZE); idx->rel_pd_idx = (idx->pd_idx % IRDMA_HMC_PD_CNT_IN_SD); } diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 4ae77cdde9dc..c1b8f81ea283 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -706,7 +706,7 @@ struct irdma_sc_dev { u32 vchnl_ver; u16 num_vfs; u16 hmc_fn_id; - u8 vf_id; + u16 vf_id; bool privileged:1; bool vchnl_up:1; bool ceq_valid:1; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 76ce6137f2ba..c883c9ea5a83 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2503,6 +2503,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); INIT_LIST_HEAD(&iwcq->cmpl_generated); + iwcq->cq_num = cq_num; info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index ed21c1b56e8e..ac8b38701835 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -140,7 +140,7 @@ struct irdma_srq { struct irdma_cq { struct ib_cq ibcq; struct irdma_sc_cq sc_cq; - u16 cq_num; + u32 cq_num; bool user_mode; atomic_t armed; enum irdma_cmpl_notify last_notify; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index a23b364e24ff..651d76bca114 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1020,15 +1020,18 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) MLX5_SET(cqc, cqc, oi, 1); + if (udata) { + cq->mcq.comp = mlx5_add_cq_to_tasklet; + cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; + } else { + cq->mcq.comp = mlx5_ib_cq_comp; + } + err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out)); if (err) goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - if (udata) - cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; - else - cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; INIT_LIST_HEAD(&cq->wc_list); diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index c0360c450880..75d60f2ad900 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -707,7 +707,8 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, struct iopt_area *area; unsigned long unmapped_bytes = 0; unsigned int tries = 0; - int rc = -ENOENT; + /* If there are no mapped entries then success */ + int rc = 0; /* * The domains_rwsem must be held in read mode any time any area->pages @@ -777,8 +778,6 @@ again: down_write(&iopt->iova_rwsem); } - if (unmapped_bytes) - rc = 0; out_unlock_iova: up_write(&iopt->iova_rwsem); @@ -815,13 +814,8 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) { - int rc; - - rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); /* If the IOVAs are empty then unmap all succeeds */ - if (rc == -ENOENT) - return 0; - return rc; + return iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); } /* The caller must always free all the nodes in the allowed_iova rb_root. */ diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 1542c5fd10a8..459a7c516915 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -367,6 +367,10 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) &unmapped); if (rc) goto out_put; + if (!unmapped) { + rc = -ENOENT; + goto out_put; + } } cmd->length = unmapped; diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 4514575818fc..b5b67a9d3fb3 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -130,9 +130,8 @@ struct iova_bitmap { static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, unsigned long iova) { - unsigned long pgsize = 1UL << bitmap->mapped.pgshift; - - return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); + return (iova >> bitmap->mapped.pgshift) / + BITS_PER_TYPE(*bitmap->bitmap); } /* diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index e5805885394e..70290b35b317 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -166,7 +166,8 @@ static int riscv_intc_domain_alloc(struct irq_domain *domain, static const struct irq_domain_ops riscv_intc_domain_ops = { .map = riscv_intc_domain_map, .xlate = irq_domain_xlate_onecell, - .alloc = riscv_intc_domain_alloc + .alloc = riscv_intc_domain_alloc, + .free = irq_domain_free_irqs_top, }; static struct fwnode_handle *riscv_intc_hwnode(void) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index e54419a4e731..541a20cb58f1 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1904,13 +1904,13 @@ out: mISDN_freebchannel(&hw->bch[1]); mISDN_freebchannel(&hw->bch[0]); mISDN_freedchannel(&hw->dch); - kfree(hw); return err; } static int hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + int err; struct hfcsusb *hw; struct usb_device *dev = interface_to_usbdev(intf); struct usb_host_interface *iface = intf->cur_altsetting; @@ -2101,20 +2101,28 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) if (!hw->ctrl_urb) { pr_warn("%s: No memory for control urb\n", driver_info->vend_name); - kfree(hw); - return -ENOMEM; + err = -ENOMEM; + goto err_free_hw; } pr_info("%s: %s: detected \"%s\" (%s, if=%d alt=%d)\n", hw->name, __func__, driver_info->vend_name, conf_str[small_match], ifnum, alt_used); - if (setup_instance(hw, dev->dev.parent)) - return -EIO; + if (setup_instance(hw, dev->dev.parent)) { + err = -EIO; + goto err_free_urb; + } hw->intf = intf; usb_set_intfdata(hw->intf, hw); return 0; + +err_free_urb: + usb_free_urb(hw->ctrl_urb); +err_free_hw: + kfree(hw); + return err; } /* function called when an active device is removed */ diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index d911021c1bb0..83862d57b126 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -1010,6 +1010,11 @@ int vb2_ioctl_remove_bufs(struct file *file, void *priv, if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; + if (vb2_fileio_is_active(vdev->queue)) { + dprintk(vdev->queue, 1, "file io in progress\n"); + return -EBUSY; + } + return vb2_core_remove_bufs(vdev->queue, d->index, d->count); } EXPORT_SYMBOL_GPL(vb2_ioctl_remove_bufs); diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c index b62fd12c93c1..74c59a94b2b0 100644 --- a/drivers/media/pci/cx18/cx18-driver.c +++ b/drivers/media/pci/cx18/cx18-driver.c @@ -1136,11 +1136,8 @@ int cx18_init_on_first_open(struct cx18 *cx) int video_input; int fw_retry_count = 3; struct v4l2_frequency vf; - struct cx18_open_id fh; v4l2_std_id std; - fh.cx = cx; - if (test_bit(CX18_F_I_FAILED, &cx->i_flags)) return -ENXIO; @@ -1220,14 +1217,14 @@ int cx18_init_on_first_open(struct cx18 *cx) video_input = cx->active_input; cx->active_input++; /* Force update of input */ - cx18_s_input(NULL, &fh, video_input); + cx18_do_s_input(cx, video_input); /* Let the VIDIOC_S_STD ioctl do all the work, keeps the code in one place. */ cx->std++; /* Force full standard initialization */ std = (cx->tuner_std == V4L2_STD_ALL) ? V4L2_STD_NTSC_M : cx->tuner_std; - cx18_s_std(NULL, &fh, std); - cx18_s_frequency(NULL, &fh, &vf); + cx18_do_s_std(cx, std); + cx18_do_s_frequency(cx, &vf); return 0; } diff --git a/drivers/media/pci/cx18/cx18-ioctl.c b/drivers/media/pci/cx18/cx18-ioctl.c index 0f3019739d03..0d676a57e24e 100644 --- a/drivers/media/pci/cx18/cx18-ioctl.c +++ b/drivers/media/pci/cx18/cx18-ioctl.c @@ -521,10 +521,8 @@ static int cx18_g_input(struct file *file, void *fh, unsigned int *i) return 0; } -int cx18_s_input(struct file *file, void *fh, unsigned int inp) +int cx18_do_s_input(struct cx18 *cx, unsigned int inp) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; v4l2_std_id std = V4L2_STD_ALL; const struct cx18_card_video_input *card_input = cx->card->video_inputs + inp; @@ -558,6 +556,11 @@ int cx18_s_input(struct file *file, void *fh, unsigned int inp) return 0; } +static int cx18_s_input(struct file *file, void *fh, unsigned int inp) +{ + return cx18_do_s_input(file2id(file)->cx, inp); +} + static int cx18_g_frequency(struct file *file, void *fh, struct v4l2_frequency *vf) { @@ -570,11 +573,8 @@ static int cx18_g_frequency(struct file *file, void *fh, return 0; } -int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) +int cx18_do_s_frequency(struct cx18 *cx, const struct v4l2_frequency *vf) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; - if (vf->tuner != 0) return -EINVAL; @@ -585,6 +585,12 @@ int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *v return 0; } +static int cx18_s_frequency(struct file *file, void *fh, + const struct v4l2_frequency *vf) +{ + return cx18_do_s_frequency(file2id(file)->cx, vf); +} + static int cx18_g_std(struct file *file, void *fh, v4l2_std_id *std) { struct cx18 *cx = file2id(file)->cx; @@ -593,11 +599,8 @@ static int cx18_g_std(struct file *file, void *fh, v4l2_std_id *std) return 0; } -int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) +int cx18_do_s_std(struct cx18 *cx, v4l2_std_id std) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; - if ((std & V4L2_STD_ALL) == 0) return -EINVAL; @@ -642,6 +645,11 @@ int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) return 0; } +static int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) +{ + return cx18_do_s_std(file2id(file)->cx, std); +} + static int cx18_s_tuner(struct file *file, void *fh, const struct v4l2_tuner *vt) { struct cx18_open_id *id = file2id(file); diff --git a/drivers/media/pci/cx18/cx18-ioctl.h b/drivers/media/pci/cx18/cx18-ioctl.h index 97cd9f99e22d..42a8acd69735 100644 --- a/drivers/media/pci/cx18/cx18-ioctl.h +++ b/drivers/media/pci/cx18/cx18-ioctl.h @@ -12,6 +12,8 @@ u16 cx18_service2vbi(int type); void cx18_expand_service_set(struct v4l2_sliced_vbi_format *fmt, int is_pal); u16 cx18_get_service_set(struct v4l2_sliced_vbi_format *fmt); void cx18_set_funcs(struct video_device *vdev); -int cx18_s_std(struct file *file, void *fh, v4l2_std_id std); -int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf); -int cx18_s_input(struct file *file, void *fh, unsigned int inp); + +struct cx18; +int cx18_do_s_std(struct cx18 *cx, v4l2_std_id std); +int cx18_do_s_frequency(struct cx18 *cx, const struct v4l2_frequency *vf); +int cx18_do_s_input(struct cx18 *cx, unsigned int inp); diff --git a/drivers/media/pci/ivtv/ivtv-driver.c b/drivers/media/pci/ivtv/ivtv-driver.c index 72a8f76a41f4..459eb6cc370c 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.c +++ b/drivers/media/pci/ivtv/ivtv-driver.c @@ -1247,15 +1247,12 @@ err: int ivtv_init_on_first_open(struct ivtv *itv) { - struct v4l2_frequency vf; /* Needed to call ioctls later */ - struct ivtv_open_id fh; + struct ivtv_stream *s = &itv->streams[IVTV_ENC_STREAM_TYPE_MPG]; + struct v4l2_frequency vf; int fw_retry_count = 3; int video_input; - fh.itv = itv; - fh.type = IVTV_ENC_STREAM_TYPE_MPG; - if (test_bit(IVTV_F_I_FAILED, &itv->i_flags)) return -ENXIO; @@ -1297,13 +1294,13 @@ int ivtv_init_on_first_open(struct ivtv *itv) video_input = itv->active_input; itv->active_input++; /* Force update of input */ - ivtv_s_input(NULL, &fh, video_input); + ivtv_do_s_input(itv, video_input); /* Let the VIDIOC_S_STD ioctl do all the work, keeps the code in one place. */ itv->std++; /* Force full standard initialization */ itv->std_out = itv->std; - ivtv_s_frequency(NULL, &fh, &vf); + ivtv_do_s_frequency(s, &vf); if (itv->card->v4l2_capabilities & V4L2_CAP_VIDEO_OUTPUT) { /* Turn on the TV-out: ivtv_init_mpeg_decoder() initializes diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c index 84c73bd22f2d..8d5ea3aec06f 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.c +++ b/drivers/media/pci/ivtv/ivtv-ioctl.c @@ -974,9 +974,8 @@ static int ivtv_g_input(struct file *file, void *fh, unsigned int *i) return 0; } -int ivtv_s_input(struct file *file, void *fh, unsigned int inp) +int ivtv_do_s_input(struct ivtv *itv, unsigned int inp) { - struct ivtv *itv = file2id(file)->itv; v4l2_std_id std; int i; @@ -1017,6 +1016,11 @@ int ivtv_s_input(struct file *file, void *fh, unsigned int inp) return 0; } +static int ivtv_s_input(struct file *file, void *fh, unsigned int inp) +{ + return ivtv_do_s_input(file2id(file)->itv, inp); +} + static int ivtv_g_output(struct file *file, void *fh, unsigned int *i) { struct ivtv *itv = file2id(file)->itv; @@ -1065,10 +1069,9 @@ static int ivtv_g_frequency(struct file *file, void *fh, struct v4l2_frequency * return 0; } -int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) +int ivtv_do_s_frequency(struct ivtv_stream *s, const struct v4l2_frequency *vf) { - struct ivtv *itv = file2id(file)->itv; - struct ivtv_stream *s = &itv->streams[file2id(file)->type]; + struct ivtv *itv = s->itv; if (s->vdev.vfl_dir) return -ENOTTY; @@ -1082,6 +1085,15 @@ int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *v return 0; } +static int ivtv_s_frequency(struct file *file, void *fh, + const struct v4l2_frequency *vf) +{ + struct ivtv_open_id *id = file2id(file); + struct ivtv *itv = id->itv; + + return ivtv_do_s_frequency(&itv->streams[id->type], vf); +} + static int ivtv_g_std(struct file *file, void *fh, v4l2_std_id *std) { struct ivtv *itv = file2id(file)->itv; diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.h b/drivers/media/pci/ivtv/ivtv-ioctl.h index 7f8c6f43d397..96ca7e2ef973 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.h +++ b/drivers/media/pci/ivtv/ivtv-ioctl.h @@ -9,6 +9,8 @@ #ifndef IVTV_IOCTL_H #define IVTV_IOCTL_H +struct ivtv; + u16 ivtv_service2vbi(int type); void ivtv_expand_service_set(struct v4l2_sliced_vbi_format *fmt, int is_pal); u16 ivtv_get_service_set(struct v4l2_sliced_vbi_format *fmt); @@ -17,7 +19,7 @@ int ivtv_set_speed(struct ivtv *itv, int speed); void ivtv_set_funcs(struct video_device *vdev); void ivtv_s_std_enc(struct ivtv *itv, v4l2_std_id std); void ivtv_s_std_dec(struct ivtv *itv, v4l2_std_id std); -int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf); -int ivtv_s_input(struct file *file, void *fh, unsigned int inp); +int ivtv_do_s_frequency(struct ivtv_stream *s, const struct v4l2_frequency *vf); +int ivtv_do_s_input(struct ivtv *itv, unsigned int inp); #endif diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index fb6afb8e84f0..ee4f54d68349 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -167,13 +167,26 @@ static struct uvc_entity *uvc_entity_by_reference(struct uvc_device *dev, static struct uvc_streaming *uvc_stream_by_id(struct uvc_device *dev, int id) { - struct uvc_streaming *stream; + struct uvc_streaming *stream, *last_stream; + unsigned int count = 0; list_for_each_entry(stream, &dev->streams, list) { + count += 1; + last_stream = stream; if (stream->header.bTerminalLink == id) return stream; } + /* + * If the streaming entity is referenced by an invalid ID, notify the + * user and use heuristics to guess the correct entity. + */ + if (count == 1 && id == UVC_INVALID_ENTITY_ID) { + dev_warn(&dev->intf->dev, + "UVC non compliance: Invalid USB header. The streaming entity has an invalid ID, guessing the correct one."); + return last_stream; + } + return NULL; } diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index 1da953629010..25e66bf18f5f 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -2608,7 +2608,7 @@ EXPORT_SYMBOL_GPL(v4l2_subdev_is_streaming); int v4l2_subdev_get_privacy_led(struct v4l2_subdev *sd) { #if IS_REACHABLE(CONFIG_LEDS_CLASS) - sd->privacy_led = led_get(sd->dev, "privacy-led"); + sd->privacy_led = led_get(sd->dev, "privacy"); if (IS_ERR(sd->privacy_led) && PTR_ERR(sd->privacy_led) != -ENOENT) return dev_err_probe(sd->dev, PTR_ERR(sd->privacy_led), "getting privacy LED\n"); diff --git a/drivers/misc/amd-sbi/Kconfig b/drivers/misc/amd-sbi/Kconfig index 4aae0733d0fc..ab594908cb4a 100644 --- a/drivers/misc/amd-sbi/Kconfig +++ b/drivers/misc/amd-sbi/Kconfig @@ -2,9 +2,11 @@ config AMD_SBRMI_I2C tristate "AMD side band RMI support" depends on I2C + depends on ARM || ARM64 || COMPILE_TEST select REGMAP_I2C help Side band RMI over I2C support for AMD out of band management. + This driver is intended to run on the BMC, not the managed node. This driver can also be built as a module. If so, the module will be called sbrmi-i2c. diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 621bce7e101c..ee652ef01534 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -381,6 +381,8 @@ static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd, } spin_unlock(&fl->lock); + dma_buf_put(buf); + return ret; } diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index bc40b940ae21..a4f75dc36929 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -120,6 +120,8 @@ #define MEI_DEV_ID_PTL_H 0xE370 /* Panther Lake H */ #define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ +#define MEI_DEV_ID_WCL_P 0x4D70 /* Wildcat Lake P */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/mei_lb.c b/drivers/misc/mei/mei_lb.c index 77686b108d3c..78717ee8ac9a 100644 --- a/drivers/misc/mei/mei_lb.c +++ b/drivers/misc/mei/mei_lb.c @@ -134,8 +134,7 @@ static bool mei_lb_check_response(const struct device *dev, ssize_t bytes, return true; } -static int mei_lb_push_payload(struct device *dev, - enum intel_lb_type type, u32 flags, +static int mei_lb_push_payload(struct device *dev, u32 type, u32 flags, const void *payload, size_t payload_size) { struct mei_cl_device *cldev; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index b108a7c22388..b017ff29dbd1 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -127,6 +127,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_H, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_WCL_P, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c index c9eb5c5393e4..06b55a891c6b 100644 --- a/drivers/misc/mei/pci-txe.c +++ b/drivers/misc/mei/pci-txe.c @@ -109,19 +109,19 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto end; } + err = mei_register(dev, &pdev->dev); + if (err) + goto release_irq; + if (mei_start(dev)) { dev_err(&pdev->dev, "init hw failure.\n"); err = -ENODEV; - goto release_irq; + goto deregister; } pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_TXI_RPM_TIMEOUT); pm_runtime_use_autosuspend(&pdev->dev); - err = mei_register(dev, &pdev->dev); - if (err) - goto stop; - pci_set_drvdata(pdev, dev); /* @@ -144,8 +144,8 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; -stop: - mei_stop(dev); +deregister: + mei_deregister(dev); release_irq: mei_cancel_work(dev); mei_disable_interrupts(dev); diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c index 36f7379b8e2d..f6b821fc274c 100644 --- a/drivers/misc/ocxl/afu_irq.c +++ b/drivers/misc/ocxl/afu_irq.c @@ -203,7 +203,7 @@ u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id) mutex_lock(&ctx->irq_lock); irq = idr_find(&ctx->irq_idr, irq_id); if (irq) { - xd = irq_get_handler_data(irq->virq); + xd = irq_get_chip_data(irq->virq); addr = xd ? xd->trig_page : 0; } mutex_unlock(&ctx->irq_lock); diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 6df51ee8db62..cc1d18b3df5c 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1737,7 +1737,7 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, { unsigned long status, flags; struct vmballoon *b; - int ret; + int ret = 0; b = container_of(b_dev_info, struct vmballoon, b_dev_info); @@ -1796,17 +1796,15 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, * A failure happened. While we can deflate the page we just * inflated, this deflation can also encounter an error. Instead * we will decrease the size of the balloon to reflect the - * change and report failure. + * change. */ atomic64_dec(&b->size); - ret = -EBUSY; } else { /* * Success. Take a reference for the page, and we will add it to * the list after acquiring the lock. */ get_page(newpage); - ret = 0; } /* Update the balloon list under the @pages_lock */ @@ -1817,7 +1815,7 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, * If we succeed just insert it to the list and update the statistics * under the lock. */ - if (!ret) { + if (status == VMW_BALLOON_SUCCESS) { balloon_page_insert(&b->b_dev_info, newpage); __count_vm_event(BALLOON_MIGRATE); } diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9399bf6c766a..c0ffe0817fd4 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -79,48 +79,6 @@ MODULE_ALIAS("mmc:block"); #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16) #define MMC_EXTRACT_VALUE_FROM_ARG(x) ((x & 0x0000FF00) >> 8) -/** - * struct rpmb_frame - rpmb frame as defined by eMMC 5.1 (JESD84-B51) - * - * @stuff : stuff bytes - * @key_mac : The authentication key or the message authentication - * code (MAC) depending on the request/response type. - * The MAC will be delivered in the last (or the only) - * block of data. - * @data : Data to be written or read by signed access. - * @nonce : Random number generated by the host for the requests - * and copied to the response by the RPMB engine. - * @write_counter: Counter value for the total amount of the successful - * authenticated data write requests made by the host. - * @addr : Address of the data to be programmed to or read - * from the RPMB. Address is the serial number of - * the accessed block (half sector 256B). - * @block_count : Number of blocks (half sectors, 256B) requested to be - * read/programmed. - * @result : Includes information about the status of the write counter - * (valid, expired) and result of the access made to the RPMB. - * @req_resp : Defines the type of request and response to/from the memory. - * - * The stuff bytes and big-endian properties are modeled to fit to the spec. - */ -struct rpmb_frame { - u8 stuff[196]; - u8 key_mac[32]; - u8 data[256]; - u8 nonce[16]; - __be32 write_counter; - __be16 addr; - __be16 block_count; - __be16 result; - __be16 req_resp; -} __packed; - -#define RPMB_PROGRAM_KEY 0x1 /* Program RPMB Authentication Key */ -#define RPMB_GET_WRITE_COUNTER 0x2 /* Read RPMB write counter */ -#define RPMB_WRITE_DATA 0x3 /* Write data to RPMB partition */ -#define RPMB_READ_DATA 0x4 /* Read data from RPMB partition */ -#define RPMB_RESULT_READ 0x5 /* Read result request (Internal) */ - #define RPMB_FRAME_SIZE sizeof(struct rpmb_frame) #define CHECK_SIZE_NEQ(val) ((val) != sizeof(struct rpmb_frame)) #define CHECK_SIZE_ALIGNED(val) IS_ALIGNED((val), sizeof(struct rpmb_frame)) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 2c963cb6724b..10d0ef58ef49 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -950,7 +950,7 @@ config MMC_USHC config MMC_WMT tristate "Wondermedia SD/MMC Host Controller support" depends on ARCH_VT8500 || COMPILE_TEST - default y + default ARCH_VT8500 help This selects support for the SD/MMC Host Controller on Wondermedia WM8505/WM8650 based SoCs. diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index 82dd906bb002..681354942e97 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c @@ -42,7 +42,7 @@ struct dw_mci_rockchip_priv_data { */ static int rockchip_mmc_get_internal_phase(struct dw_mci *host, bool sample) { - unsigned long rate = clk_get_rate(host->ciu_clk); + unsigned long rate = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV; u32 raw_value; u16 degrees; u32 delay_num = 0; @@ -85,7 +85,7 @@ static int rockchip_mmc_get_phase(struct dw_mci *host, bool sample) static int rockchip_mmc_set_internal_phase(struct dw_mci *host, bool sample, int degrees) { - unsigned long rate = clk_get_rate(host->ciu_clk); + unsigned long rate = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV; u8 nineties, remainder; u8 delay_num; u32 raw_value; diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 26d03352af63..b5ea058ed467 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -652,10 +652,9 @@ static int pxamci_probe(struct platform_device *pdev) host->clkrt = CLKRT_OFF; host->clk = devm_clk_get(dev, NULL); - if (IS_ERR(host->clk)) { - host->clk = NULL; - return PTR_ERR(host->clk); - } + if (IS_ERR(host->clk)) + return dev_err_probe(dev, PTR_ERR(host->clk), + "Failed to acquire clock\n"); host->clkrate = clk_get_rate(host->clk); @@ -703,46 +702,37 @@ static int pxamci_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mmc); - host->dma_chan_rx = dma_request_chan(dev, "rx"); - if (IS_ERR(host->dma_chan_rx)) { - host->dma_chan_rx = NULL; + host->dma_chan_rx = devm_dma_request_chan(dev, "rx"); + if (IS_ERR(host->dma_chan_rx)) return dev_err_probe(dev, PTR_ERR(host->dma_chan_rx), "unable to request rx dma channel\n"); - } - host->dma_chan_tx = dma_request_chan(dev, "tx"); - if (IS_ERR(host->dma_chan_tx)) { - dev_err(dev, "unable to request tx dma channel\n"); - ret = PTR_ERR(host->dma_chan_tx); - host->dma_chan_tx = NULL; - goto out; - } + + host->dma_chan_tx = devm_dma_request_chan(dev, "tx"); + if (IS_ERR(host->dma_chan_tx)) + return dev_err_probe(dev, PTR_ERR(host->dma_chan_tx), + "unable to request tx dma channel\n"); if (host->pdata) { host->detect_delay_ms = host->pdata->detect_delay_ms; host->power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); - if (IS_ERR(host->power)) { - ret = PTR_ERR(host->power); - dev_err(dev, "Failed requesting gpio_power\n"); - goto out; - } + if (IS_ERR(host->power)) + return dev_err_probe(dev, PTR_ERR(host->power), + "Failed requesting gpio_power\n"); /* FIXME: should we pass detection delay to debounce? */ ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0); - if (ret && ret != -ENOENT) { - dev_err(dev, "Failed requesting gpio_cd\n"); - goto out; - } + if (ret && ret != -ENOENT) + return dev_err_probe(dev, ret, "Failed requesting gpio_cd\n"); if (!host->pdata->gpio_card_ro_invert) mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; ret = mmc_gpiod_request_ro(mmc, "wp", 0, 0); - if (ret && ret != -ENOENT) { - dev_err(dev, "Failed requesting gpio_ro\n"); - goto out; - } + if (ret && ret != -ENOENT) + return dev_err_probe(dev, ret, "Failed requesting gpio_ro\n"); + if (!ret) host->use_ro_gpio = true; @@ -759,16 +749,8 @@ static int pxamci_probe(struct platform_device *pdev) if (ret) { if (host->pdata && host->pdata->exit) host->pdata->exit(dev, mmc); - goto out; } - return 0; - -out: - if (host->dma_chan_rx) - dma_release_channel(host->dma_chan_rx); - if (host->dma_chan_tx) - dma_release_channel(host->dma_chan_tx); return ret; } @@ -791,8 +773,6 @@ static void pxamci_remove(struct platform_device *pdev) dmaengine_terminate_all(host->dma_chan_rx); dmaengine_terminate_all(host->dma_chan_tx); - dma_release_channel(host->dma_chan_rx); - dma_release_channel(host->dma_chan_tx); } } diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index eebd45389956..5b61401a7f3d 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -94,7 +94,7 @@ #define DLL_TXCLK_TAPNUM_DEFAULT 0x10 #define DLL_TXCLK_TAPNUM_90_DEGREES 0xA #define DLL_TXCLK_TAPNUM_FROM_SW BIT(24) -#define DLL_STRBIN_TAPNUM_DEFAULT 0x8 +#define DLL_STRBIN_TAPNUM_DEFAULT 0x4 #define DLL_STRBIN_TAPNUM_FROM_SW BIT(24) #define DLL_STRBIN_DELAY_NUM_SEL BIT(26) #define DLL_STRBIN_DELAY_NUM_OFFSET 16 diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c index cf5be9c449a5..10064d7b7249 100644 --- a/drivers/most/most_usb.c +++ b/drivers/most/most_usb.c @@ -929,6 +929,10 @@ static void release_mdev(struct device *dev) { struct most_dev *mdev = to_mdev_from_dev(dev); + kfree(mdev->busy_urbs); + kfree(mdev->cap); + kfree(mdev->conf); + kfree(mdev->ep_address); kfree(mdev); } /** @@ -1093,7 +1097,7 @@ err_free_cap: err_free_conf: kfree(mdev->conf); err_free_mdev: - put_device(&mdev->dev); + kfree(mdev); return ret; } @@ -1121,13 +1125,6 @@ static void hdm_disconnect(struct usb_interface *interface) if (mdev->dci) device_unregister(&mdev->dci->dev); most_deregister_interface(&mdev->iface); - - kfree(mdev->busy_urbs); - kfree(mdev->cap); - kfree(mdev->conf); - kfree(mdev->ep_address); - put_device(&mdev->dci->dev); - put_device(&mdev->dev); } static int hdm_suspend(struct usb_interface *interface, pm_message_t message) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 8dc4f5c493fc..335c702633ff 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -599,6 +599,7 @@ mtdchar_write_ioctl(struct mtd_info *mtd, struct mtd_write_req __user *argp) uint8_t *datbuf = NULL, *oobbuf = NULL; size_t datbuf_len, oobbuf_len; int ret = 0; + u64 end; if (copy_from_user(&req, argp, sizeof(req))) return -EFAULT; @@ -618,7 +619,7 @@ mtdchar_write_ioctl(struct mtd_info *mtd, struct mtd_write_req __user *argp) req.len &= 0xffffffff; req.ooblen &= 0xffffffff; - if (req.start + req.len > mtd->size) + if (check_add_overflow(req.start, req.len, &end) || end > mtd->size) return -EINVAL; datbuf_len = min_t(size_t, req.len, mtd->erasesize); @@ -698,6 +699,7 @@ mtdchar_read_ioctl(struct mtd_info *mtd, struct mtd_read_req __user *argp) size_t datbuf_len, oobbuf_len; size_t orig_len, orig_ooblen; int ret = 0; + u64 end; if (copy_from_user(&req, argp, sizeof(req))) return -EFAULT; @@ -724,7 +726,7 @@ mtdchar_read_ioctl(struct mtd_info *mtd, struct mtd_read_req __user *argp) req.len &= 0xffffffff; req.ooblen &= 0xffffffff; - if (req.start + req.len > mtd->size) { + if (check_add_overflow(req.start, req.len, &end) || end > mtd->size) { ret = -EINVAL; goto out; } diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 4a17271076bc..1e57c8de8578 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -63,7 +63,7 @@ config MTD_NAND_ECC_MEDIATEK config MTD_NAND_ECC_REALTEK tristate "Realtek RTL93xx hardware ECC engine" - depends on HAS_IOMEM + depends on HAS_IOMEM && HAS_DMA depends on MACH_REALTEK_RTL || COMPILE_TEST select MTD_NAND_ECC help diff --git a/drivers/mtd/nand/ecc-realtek.c b/drivers/mtd/nand/ecc-realtek.c index 7d718934c909..0046da37ea3e 100644 --- a/drivers/mtd/nand/ecc-realtek.c +++ b/drivers/mtd/nand/ecc-realtek.c @@ -380,7 +380,7 @@ static void rtl_ecc_cleanup_ctx(struct nand_device *nand) nand_ecc_cleanup_req_tweaking(&ctx->req_ctx); } -static struct nand_ecc_engine_ops rtl_ecc_engine_ops = { +static const struct nand_ecc_engine_ops rtl_ecc_engine_ops = { .init_ctx = rtl_ecc_init_ctx, .cleanup_ctx = rtl_ecc_cleanup_ctx, .prepare_io_req = rtl_ecc_prepare_io_req, @@ -418,8 +418,8 @@ static int rtl_ecc_probe(struct platform_device *pdev) rtlc->buf = dma_alloc_noncoherent(dev, RTL_ECC_DMA_SIZE, &rtlc->buf_dma, DMA_BIDIRECTIONAL, GFP_KERNEL); - if (IS_ERR(rtlc->buf)) - return PTR_ERR(rtlc->buf); + if (!rtlc->buf) + return -ENOMEM; rtlc->dev = dev; rtlc->engine.dev = dev; diff --git a/drivers/mtd/nand/onenand/onenand_samsung.c b/drivers/mtd/nand/onenand/onenand_samsung.c index f37a6138e461..6d6aa709a21f 100644 --- a/drivers/mtd/nand/onenand/onenand_samsung.c +++ b/drivers/mtd/nand/onenand/onenand_samsung.c @@ -906,7 +906,7 @@ static int s3c_onenand_probe(struct platform_device *pdev) err = devm_request_irq(&pdev->dev, r->start, s5pc110_onenand_irq, IRQF_SHARED, "onenand", - &onenand); + onenand); if (err) { dev_err(&pdev->dev, "failed to get irq\n"); return err; diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 6667eea95597..32ed38b89394 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -2871,7 +2871,7 @@ cadence_nand_irq_cleanup(int irqnum, struct cdns_nand_ctrl *cdns_ctrl) static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) { dma_cap_mask_t mask; - struct dma_device *dma_dev = cdns_ctrl->dmac->device; + struct dma_device *dma_dev; int ret; cdns_ctrl->cdma_desc = dma_alloc_coherent(cdns_ctrl->dev, @@ -2915,6 +2915,7 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) } } + dma_dev = cdns_ctrl->dmac->device; cdns_ctrl->io.iova_dma = dma_map_resource(dma_dev->dev, cdns_ctrl->io.dma, cdns_ctrl->io.size, DMA_BIDIRECTIONAL, 0); diff --git a/drivers/mtd/nand/spi/fmsh.c b/drivers/mtd/nand/spi/fmsh.c index 8b2097bfc771..c2b9a8c113cb 100644 --- a/drivers/mtd/nand/spi/fmsh.c +++ b/drivers/mtd/nand/spi/fmsh.c @@ -58,7 +58,7 @@ static const struct spinand_info fmsh_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, &update_cache_variants), - SPINAND_HAS_QE_BIT, + 0, SPINAND_ECCINFO(&fm25s01a_ooblayout, NULL)), }; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4da619210c1f..5abef8a3b775 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2120,7 +2120,7 @@ skip_mac_set: /* check for initial state */ new_slave->link = BOND_LINK_NOCHANGE; if (bond->params.miimon) { - if (netif_carrier_ok(slave_dev)) { + if (netif_running(slave_dev) && netif_carrier_ok(slave_dev)) { if (bond->params.updelay) { bond_set_slave_link_state(new_slave, BOND_LINK_BACK, @@ -2287,7 +2287,9 @@ skip_mac_set: unblock_netpoll_tx(); } - if (bond_mode_can_use_xmit_hash(bond)) + /* broadcast mode uses the all_slaves to loop through slaves. */ + if (bond_mode_can_use_xmit_hash(bond) || + BOND_MODE(bond) == BOND_MODE_BROADCAST) bond_update_slave_arr(bond, NULL); if (!slave_dev->netdev_ops->ndo_bpf || @@ -2463,7 +2465,8 @@ static int __bond_release_one(struct net_device *bond_dev, bond_upper_dev_unlink(bond, slave); - if (bond_mode_can_use_xmit_hash(bond)) + if (bond_mode_can_use_xmit_hash(bond) || + BOND_MODE(bond) == BOND_MODE_BROADCAST) bond_update_slave_arr(bond, slave); slave_info(bond_dev, slave_dev, "Releasing %s interface\n", @@ -2662,7 +2665,8 @@ static int bond_miimon_inspect(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { bond_propose_link_state(slave, BOND_LINK_NOCHANGE); - link_state = netif_carrier_ok(slave->dev); + link_state = netif_running(slave->dev) && + netif_carrier_ok(slave->dev); switch (slave->link) { case BOND_LINK_UP: @@ -2871,7 +2875,7 @@ static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work); - bool should_notify_peers = false; + bool should_notify_peers; bool commit; unsigned long delay; struct slave *slave; @@ -2883,30 +2887,33 @@ static void bond_mii_monitor(struct work_struct *work) goto re_arm; rcu_read_lock(); + should_notify_peers = bond_should_notify_peers(bond); commit = !!bond_miimon_inspect(bond); - if (bond->send_peer_notif) { - rcu_read_unlock(); - if (rtnl_trylock()) { - bond->send_peer_notif--; - rtnl_unlock(); - } - } else { - rcu_read_unlock(); - } - if (commit) { + rcu_read_unlock(); + + if (commit || bond->send_peer_notif) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; - should_notify_peers = false; goto re_arm; } - bond_for_each_slave(bond, slave, iter) { - bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER); + if (commit) { + bond_for_each_slave(bond, slave, iter) { + bond_commit_link_state(slave, + BOND_SLAVE_NOTIFY_LATER); + } + bond_miimon_commit(bond); + } + + if (bond->send_peer_notif) { + bond->send_peer_notif--; + if (should_notify_peers) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, + bond->dev); } - bond_miimon_commit(bond); rtnl_unlock(); /* might sleep, hold no other locks */ } @@ -2914,13 +2921,6 @@ static void bond_mii_monitor(struct work_struct *work) re_arm: if (bond->params.miimon) queue_delayed_work(bond->wq, &bond->mii_work, delay); - - if (should_notify_peers) { - if (!rtnl_trylock()) - return; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - rtnl_unlock(); - } } static int bond_upper_dev_walk(struct net_device *upper, diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 495a87f2ea7c..384499c869b8 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -225,13 +225,6 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { { NULL, -1, 0}, }; -static const struct bond_opt_value bond_actor_port_prio_tbl[] = { - { "minval", 0, BOND_VALFLAG_MIN}, - { "maxval", 65535, BOND_VALFLAG_MAX}, - { "default", 255, BOND_VALFLAG_DEFAULT}, - { NULL, -1, 0}, -}; - static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", 1023, BOND_VALFLAG_MAX}, @@ -497,7 +490,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .id = BOND_OPT_ACTOR_PORT_PRIO, .name = "actor_port_prio", .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), - .values = bond_actor_port_prio_tbl, + .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_actor_port_prio_set, }, [BOND_OPT_AD_ACTOR_SYSTEM] = { diff --git a/drivers/net/can/bxcan.c b/drivers/net/can/bxcan.c index bfc60eb33dc3..333ad42ea73b 100644 --- a/drivers/net/can/bxcan.c +++ b/drivers/net/can/bxcan.c @@ -842,7 +842,7 @@ static netdev_tx_t bxcan_start_xmit(struct sk_buff *skb, u32 id; int i, j; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (bxcan_tx_busy(priv)) diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 0591406b6f32..6f83b87d54fc 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -452,7 +452,9 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_CAN_RESTART_MS]) { - if (!priv->do_set_mode) { + unsigned int restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); + + if (restart_ms != 0 && !priv->do_set_mode) { NL_SET_ERR_MSG(extack, "Device doesn't support restart from Bus Off"); return -EOPNOTSUPP; @@ -461,7 +463,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* Do not allow changing restart delay while running */ if (dev->flags & IFF_UP) return -EBUSY; - priv->restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); + priv->restart_ms = restart_ms; } if (data[IFLA_CAN_RESTART]) { diff --git a/drivers/net/can/esd/esdacc.c b/drivers/net/can/esd/esdacc.c index c80032bc1a52..73e66f9a3781 100644 --- a/drivers/net/can/esd/esdacc.c +++ b/drivers/net/can/esd/esdacc.c @@ -254,7 +254,7 @@ netdev_tx_t acc_start_xmit(struct sk_buff *skb, struct net_device *netdev) u32 acc_id; u32 acc_dlc; - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* Access core->tx_fifo_tail only once because it may be changed diff --git a/drivers/net/can/rockchip/rockchip_canfd-tx.c b/drivers/net/can/rockchip/rockchip_canfd-tx.c index 865a15e033a9..12200dcfd338 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-tx.c +++ b/drivers/net/can/rockchip/rockchip_canfd-tx.c @@ -72,7 +72,7 @@ netdev_tx_t rkcanfd_start_xmit(struct sk_buff *skb, struct net_device *ndev) int err; u8 i; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (!netif_subqueue_maybe_stop(priv->ndev, 0, diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 2f846381d5a7..eb767edc4c13 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -371,11 +371,11 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) * frames should be flooded or not. */ b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt); - mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IPMC_FWD_EN; + mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IP_MC; b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); } else { b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt); - mgmt |= B53_IP_MCAST_25; + mgmt |= B53_IP_MC; b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); } } @@ -1372,6 +1372,10 @@ static void b53_force_port_config(struct b53_device *dev, int port, else reg &= ~PORT_OVERRIDE_FULL_DUPLEX; + reg &= ~(0x3 << GMII_PO_SPEED_S); + if (is5301x(dev) || is58xx(dev)) + reg &= ~PORT_OVERRIDE_SPEED_2000M; + switch (speed) { case 2000: reg |= PORT_OVERRIDE_SPEED_2000M; @@ -1390,6 +1394,11 @@ static void b53_force_port_config(struct b53_device *dev, int port, return; } + if (is5325(dev)) + reg &= ~PORT_OVERRIDE_LP_FLOW_25; + else + reg &= ~(PORT_OVERRIDE_RX_FLOW | PORT_OVERRIDE_TX_FLOW); + if (rx_pause) { if (is5325(dev)) reg |= PORT_OVERRIDE_LP_FLOW_25; @@ -1593,8 +1602,11 @@ static void b53_phylink_mac_link_down(struct phylink_config *config, struct b53_device *dev = dp->ds->priv; int port = dp->index; - if (mode == MLO_AN_PHY) + if (mode == MLO_AN_PHY) { + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) + b53_force_link(dev, port, false); return; + } if (mode == MLO_AN_FIXED) { b53_force_link(dev, port, false); @@ -1622,6 +1634,13 @@ static void b53_phylink_mac_link_up(struct phylink_config *config, if (mode == MLO_AN_PHY) { /* Re-negotiate EEE if it was enabled already */ p->eee_enabled = b53_eee_init(ds, port, phydev); + + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) { + b53_force_port_config(dev, port, speed, duplex, + tx_pause, rx_pause); + b53_force_link(dev, port, true); + } + return; } @@ -2018,7 +2037,7 @@ static int b53_arl_search_wait(struct b53_device *dev) do { b53_read8(dev, B53_ARLIO_PAGE, offset, ®); if (!(reg & ARL_SRCH_STDN)) - return 0; + return -ENOENT; if (reg & ARL_SRCH_VLID) return 0; @@ -2068,13 +2087,16 @@ static int b53_fdb_copy(int port, const struct b53_arl_entry *ent, int b53_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { + unsigned int count = 0, results_per_hit = 1; struct b53_device *priv = ds->priv; struct b53_arl_entry results[2]; - unsigned int count = 0; u8 offset; int ret; u8 reg; + if (priv->num_arl_bins > 2) + results_per_hit = 2; + mutex_lock(&priv->arl_mutex); if (is5325(priv) || is5365(priv)) @@ -2096,7 +2118,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, if (ret) break; - if (priv->num_arl_bins > 2) { + if (results_per_hit == 2) { b53_arl_search_rd(priv, 1, &results[1]); ret = b53_fdb_copy(port, &results[1], cb, data); if (ret) @@ -2106,7 +2128,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, break; } - } while (count++ < b53_max_arl_entries(priv) / 2); + } while (count++ < b53_max_arl_entries(priv) / results_per_hit); mutex_unlock(&priv->arl_mutex); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 309fe0e46dad..8ce1ce72e938 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -111,8 +111,7 @@ /* IP Multicast control (8 bit) */ #define B53_IP_MULTICAST_CTRL 0x21 -#define B53_IP_MCAST_25 BIT(0) -#define B53_IPMC_FWD_EN BIT(1) +#define B53_IP_MC BIT(0) #define B53_UC_FWD_EN BIT(6) #define B53_MC_FWD_EN BIT(7) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index d747ea1c41a7..5df8f153d511 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1355,9 +1355,15 @@ void ksz9477_config_cpu_port(struct dsa_switch *ds) } } +#define RESV_MCAST_CNT 8 + +static u8 reserved_mcast_map[RESV_MCAST_CNT] = { 0, 1, 3, 16, 32, 33, 2, 17 }; + int ksz9477_enable_stp_addr(struct ksz_device *dev) { + u8 i, ports, update; const u32 *masks; + bool override; u32 data; int ret; @@ -1366,23 +1372,87 @@ int ksz9477_enable_stp_addr(struct ksz_device *dev) /* Enable Reserved multicast table */ ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_RESV_MCAST_ENABLE, true); - /* Set the Override bit for forwarding BPDU packet to CPU */ - ret = ksz_write32(dev, REG_SW_ALU_VAL_B, - ALU_V_OVERRIDE | BIT(dev->cpu_port)); - if (ret < 0) - return ret; + /* The reserved multicast address table has 8 entries. Each entry has + * a default value of which port to forward. It is assumed the host + * port is the last port in most of the switches, but that is not the + * case for KSZ9477 or maybe KSZ9897. For LAN937X family the default + * port is port 5, the first RGMII port. It is okay for LAN9370, a + * 5-port switch, but may not be correct for the other 8-port + * versions. It is necessary to update the whole table to forward to + * the right ports. + * Furthermore PTP messages can use a reserved multicast address and + * the host will not receive them if this table is not correct. + */ + for (i = 0; i < RESV_MCAST_CNT; i++) { + data = reserved_mcast_map[i] << + dev->info->shifts[ALU_STAT_INDEX]; + data |= ALU_STAT_START | + masks[ALU_STAT_DIRECT] | + masks[ALU_RESV_MCAST_ADDR] | + masks[ALU_STAT_READ]; + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; - data = ALU_STAT_START | ALU_RESV_MCAST_ADDR | masks[ALU_STAT_WRITE]; + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) + return ret; - ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); - if (ret < 0) - return ret; + ret = ksz_read32(dev, REG_SW_ALU_VAL_B, &data); + if (ret < 0) + return ret; - /* wait to be finished */ - ret = ksz9477_wait_alu_sta_ready(dev); - if (ret < 0) { - dev_err(dev->dev, "Failed to update Reserved Multicast table\n"); - return ret; + override = false; + ports = data & dev->port_mask; + switch (i) { + case 0: + case 6: + /* Change the host port. */ + update = BIT(dev->cpu_port); + override = true; + break; + case 2: + /* Change the host port. */ + update = BIT(dev->cpu_port); + break; + case 4: + case 5: + case 7: + /* Skip the host port. */ + update = dev->port_mask & ~BIT(dev->cpu_port); + break; + default: + update = ports; + break; + } + if (update != ports || override) { + data &= ~dev->port_mask; + data |= update; + /* Set Override bit to receive frame even when port is + * closed. + */ + if (override) + data |= ALU_V_OVERRIDE; + ret = ksz_write32(dev, REG_SW_ALU_VAL_B, data); + if (ret < 0) + return ret; + + data = reserved_mcast_map[i] << + dev->info->shifts[ALU_STAT_INDEX]; + data |= ALU_STAT_START | + masks[ALU_STAT_DIRECT] | + masks[ALU_RESV_MCAST_ADDR] | + masks[ALU_STAT_WRITE]; + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; + + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) + return ret; + } } return 0; diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index ff579920078e..61ea11e3338e 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 register definitions * - * Copyright (C) 2017-2024 Microchip Technology Inc. + * Copyright (C) 2017-2025 Microchip Technology Inc. */ #ifndef __KSZ9477_REGS_H @@ -397,7 +397,6 @@ #define ALU_RESV_MCAST_INDEX_M (BIT(6) - 1) #define ALU_STAT_START BIT(7) -#define ALU_RESV_MCAST_ADDR BIT(1) #define REG_SW_ALU_VAL_A 0x0420 diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index a962055bfdbd..933ae8dc6337 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -808,6 +808,8 @@ static const u16 ksz9477_regs[] = { static const u32 ksz9477_masks[] = { [ALU_STAT_WRITE] = 0, [ALU_STAT_READ] = 1, + [ALU_STAT_DIRECT] = 0, + [ALU_RESV_MCAST_ADDR] = BIT(1), [P_MII_TX_FLOW_CTRL] = BIT(5), [P_MII_RX_FLOW_CTRL] = BIT(3), }; @@ -835,6 +837,8 @@ static const u8 ksz9477_xmii_ctrl1[] = { static const u32 lan937x_masks[] = { [ALU_STAT_WRITE] = 1, [ALU_STAT_READ] = 2, + [ALU_STAT_DIRECT] = BIT(3), + [ALU_RESV_MCAST_ADDR] = BIT(2), [P_MII_TX_FLOW_CTRL] = BIT(5), [P_MII_RX_FLOW_CTRL] = BIT(3), }; diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index a1eb39771bb9..c65188cd3c0a 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -294,6 +294,8 @@ enum ksz_masks { DYNAMIC_MAC_TABLE_TIMESTAMP, ALU_STAT_WRITE, ALU_STAT_READ, + ALU_STAT_DIRECT, + ALU_RESV_MCAST_ADDR, P_MII_TX_FLOW_CTRL, P_MII_RX_FLOW_CTRL, }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3fc33b1b4dfb..a625e7c311dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12439,7 +12439,7 @@ static int bnxt_try_recover_fw(struct bnxt *bp) return -ENODEV; } -static void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) +void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; @@ -16892,6 +16892,10 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (netif_running(dev)) netif_close(dev); + if (bnxt_hwrm_func_drv_unrgtr(bp)) { + pcie_flr(pdev); + goto shutdown_exit; + } bnxt_ptp_clear(bp); bnxt_clear_int_mode(bp); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 741b2d854789..3613a172483a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2149,7 +2149,7 @@ struct bnxt_bs_trace_info { static inline void bnxt_bs_trace_check_wrap(struct bnxt_bs_trace_info *bs_trace, u32 offset) { - if (!bs_trace->wrapped && + if (!bs_trace->wrapped && bs_trace->magic_byte && *bs_trace->magic_byte != BNXT_TRACE_BUF_MAGIC_BYTE) bs_trace->wrapped = 1; bs_trace->last_offset = offset; @@ -2941,6 +2941,7 @@ void bnxt_report_link(struct bnxt *bp); int bnxt_update_link(struct bnxt *bp, bool chng_link_state); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset); int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 0181ab1f2dfd..ccb8b509662d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -333,13 +333,14 @@ static void bnxt_fill_drv_seg_record(struct bnxt *bp, u32 offset = 0; int rc = 0; + record->max_entries = cpu_to_le32(ctxm->max_entries); + record->entry_size = cpu_to_le32(ctxm->entry_size); + rc = bnxt_dbg_hwrm_log_buffer_flush(bp, type, 0, &offset); if (rc) return; bnxt_bs_trace_check_wrap(bs_trace, offset); - record->max_entries = cpu_to_le32(ctxm->max_entries); - record->entry_size = cpu_to_le32(ctxm->entry_size); record->offset = cpu_to_le32(bs_trace->last_offset); record->wrapped = bs_trace->wrapped; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 02961d93ed35..67ca02d84c97 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -461,7 +461,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, rtnl_unlock(); break; } - bnxt_cancel_reservations(bp, false); + bnxt_clear_reservations(bp, false); bnxt_free_ctx_mem(bp, false); break; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index db81cf6d5289..0abaa2bbe357 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -1051,9 +1051,9 @@ static void bnxt_ptp_free(struct bnxt *bp) if (ptp->ptp_clock) { ptp_clock_unregister(ptp->ptp_clock); ptp->ptp_clock = NULL; - kfree(ptp->ptp_info.pin_config); - ptp->ptp_info.pin_config = NULL; } + kfree(ptp->ptp_info.pin_config); + ptp->ptp_info.pin_config = NULL; } int bnxt_ptp_init(struct bnxt *bp) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c index ecd9a0bd5e18..49b57bb5fac1 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c @@ -290,9 +290,15 @@ static int ch_ipsec_xfrm_add_state(struct net_device *dev, return -EINVAL; } + if (unlikely(!try_module_get(THIS_MODULE))) { + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire module reference"); + return -ENODEV; + } + sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); if (!sa_entry) { res = -ENOMEM; + module_put(THIS_MODULE); goto out; } @@ -301,7 +307,6 @@ static int ch_ipsec_xfrm_add_state(struct net_device *dev, sa_entry->esn = 1; ch_ipsec_setkey(x, sa_entry); x->xso.offload_handle = (unsigned long)sa_entry; - try_module_get(THIS_MODULE); out: return res; } diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 7077d705e471..6e4f17142519 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -733,7 +733,7 @@ start_xmit (struct sk_buff *skb, struct net_device *dev) u64 tfc_vlan_tag = 0; if (np->link_status == 0) { /* Link Down */ - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } entry = np->cur_tx % TX_RING_SIZE; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index c96d1d6ba8fe..18d86badd6ea 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1077,8 +1077,7 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv, dma_addr_t addr; buffer_start = skb->data - dpaa2_eth_needed_headroom(skb); - aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN, - DPAA2_ETH_TX_BUF_ALIGN); + aligned_start = PTR_ALIGN(buffer_start, DPAA2_ETH_TX_BUF_ALIGN); if (aligned_start >= skb->head) buffer_start = aligned_start; else diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index aae462a0cf5a..0535e92404e3 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1595,6 +1595,8 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, /* next descriptor to process */ i = rx_ring->next_to_clean; + enetc_lock_mdio(); + while (likely(rx_frm_cnt < work_limit)) { union enetc_rx_bd *rxbd; struct sk_buff *skb; @@ -1630,7 +1632,9 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, rx_byte_cnt += skb->len + ETH_HLEN; rx_frm_cnt++; + enetc_unlock_mdio(); napi_gro_receive(napi, skb); + enetc_lock_mdio(); } rx_ring->next_to_clean = i; @@ -1638,6 +1642,8 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, rx_ring->stats.packets += rx_frm_cnt; rx_ring->stats.bytes += rx_byte_cnt; + enetc_unlock_mdio(); + return rx_frm_cnt; } @@ -1947,6 +1953,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, /* next descriptor to process */ i = rx_ring->next_to_clean; + enetc_lock_mdio(); + while (likely(rx_frm_cnt < work_limit)) { union enetc_rx_bd *rxbd, *orig_rxbd; struct xdp_buff xdp_buff; @@ -2010,7 +2018,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, */ enetc_bulk_flip_buff(rx_ring, orig_i, i); + enetc_unlock_mdio(); napi_gro_receive(napi, skb); + enetc_lock_mdio(); break; case XDP_TX: tx_ring = priv->xdp_tx_ring[rx_ring->index]; @@ -2045,7 +2055,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, } break; case XDP_REDIRECT: + enetc_unlock_mdio(); err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog); + enetc_lock_mdio(); if (unlikely(err)) { enetc_xdp_drop(rx_ring, orig_i, i); rx_ring->stats.xdp_redirect_failures++; @@ -2065,8 +2077,11 @@ out: rx_ring->stats.packets += rx_frm_cnt; rx_ring->stats.bytes += rx_byte_cnt; - if (xdp_redirect_frm_cnt) + if (xdp_redirect_frm_cnt) { + enetc_unlock_mdio(); xdp_do_flush(); + enetc_lock_mdio(); + } if (xdp_tx_frm_cnt) enetc_update_tx_ring_tail(tx_ring); @@ -2075,6 +2090,8 @@ out: enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring) - rx_ring->xdp.xdp_tx_in_flight); + enetc_unlock_mdio(); + return rx_frm_cnt; } @@ -2093,6 +2110,7 @@ static int enetc_poll(struct napi_struct *napi, int budget) for (i = 0; i < v->count_tx_rings; i++) if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) complete = false; + enetc_unlock_mdio(); prog = rx_ring->xdp.prog; if (prog) @@ -2104,10 +2122,8 @@ static int enetc_poll(struct napi_struct *napi, int budget) if (work_done) v->rx_napi_work = true; - if (!complete) { - enetc_unlock_mdio(); + if (!complete) return budget; - } napi_complete_done(napi, work_done); @@ -2116,6 +2132,7 @@ static int enetc_poll(struct napi_struct *napi, int budget) v->rx_napi_work = false; + enetc_lock_mdio(); /* enable interrupts */ enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 0ec010a7d640..f279fa597991 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -76,7 +76,7 @@ struct enetc_lso_t { #define ENETC_LSO_MAX_DATA_LEN SZ_256K #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE -#define ENETC_RXB_TRUESIZE 2048 /* PAGE_SIZE >> 1 */ +#define ENETC_RXB_TRUESIZE (PAGE_SIZE >> 1) #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ #define ENETC_RXB_DMA_SIZE \ (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1edcfaee6819..3222359ac15b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1835,6 +1835,8 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget) ndev->stats.rx_packets++; pkt_len = fec16_to_cpu(bdp->cbd_datlen); ndev->stats.rx_bytes += pkt_len; + if (fep->quirks & FEC_QUIRK_HAS_RACC) + ndev->stats.rx_bytes -= 2; index = fec_enet_get_bd_index(bdp, &rxq->bd); page = rxq->rx_skb_info[index].page; diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c index e96247c9d68d..a384a9ed4914 100644 --- a/drivers/net/ethernet/google/gve/gve_ptp.c +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -26,6 +26,19 @@ int gve_clock_nic_ts_read(struct gve_priv *priv) return 0; } +static int gve_ptp_gettimex64(struct ptp_clock_info *info, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + return -EOPNOTSUPP; +} + +static int gve_ptp_settime64(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + return -EOPNOTSUPP; +} + static long gve_ptp_do_aux_work(struct ptp_clock_info *info) { const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); @@ -47,6 +60,8 @@ out: static const struct ptp_clock_info gve_ptp_caps = { .owner = THIS_MODULE, .name = "gve clock", + .gettimex64 = gve_ptp_gettimex64, + .settime64 = gve_ptp_settime64, .do_aux_work = gve_ptp_do_aux_work, }; diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 65302c41bfb1..38875c196cb6 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -148,6 +148,7 @@ config HIBMCGE tristate "Hisilicon BMC Gigabit Ethernet Device Support" depends on PCI && PCI_MSI select PHYLIB + select FIXED_PHY select MOTORCOMM_PHY select REALTEK_PHY help diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index ea09a09c451b..2097e4c2b3d7 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -17,6 +17,7 @@ #define HBG_PCU_CACHE_LINE_SIZE 32 #define HBG_TX_TIMEOUT_BUF_LEN 1024 #define HBG_RX_DESCR 0x01 +#define HBG_NO_PHY 0xFF #define HBG_PACKET_HEAD_SIZE ((HBG_RX_SKIP1 + HBG_RX_SKIP2 + \ HBG_RX_DESCR) * HBG_PCU_CACHE_LINE_SIZE) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 83cf75bf7a17..e11495b7ee98 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -136,12 +136,11 @@ static pci_ers_result_t hbg_pci_err_detected(struct pci_dev *pdev, { struct net_device *netdev = pci_get_drvdata(pdev); - netif_device_detach(netdev); - - if (state == pci_channel_io_perm_failure) + if (state == pci_channel_io_perm_failure) { + netif_device_detach(netdev); return PCI_ERS_RESULT_DISCONNECT; + } - pci_disable_device(pdev); return PCI_ERS_RESULT_NEED_RESET; } @@ -150,6 +149,9 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct hbg_priv *priv = netdev_priv(netdev); + netif_device_detach(netdev); + pci_disable_device(pdev); + if (pci_enable_device(pdev)) { dev_err(&pdev->dev, "failed to re-enable PCI device after reset\n"); diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index d0aa0661ecd4..d6e8ce8e351a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -244,6 +244,9 @@ void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) hbg_hw_mac_enable(priv, HBG_STATUS_ENABLE); + if (priv->mac.phy_addr == HBG_NO_PHY) + return; + /* wait MAC link up */ ret = readl_poll_timeout(priv->io_base + HBG_REG_AN_NEG_STATE_ADDR, link_status, diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c index 8af0bc4cca21..ae4cb35186d8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c @@ -32,6 +32,7 @@ static void hbg_irq_handle_rx_buf_val(struct hbg_priv *priv, const struct hbg_irq_info *irq_info) { priv->stats.rx_fifo_less_empty_thrsld_cnt++; + hbg_hw_irq_enable(priv, irq_info->mask, true); } #define HBG_IRQ_I(name, handle) \ diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index 37791de47f6f..b6f0a2780ea8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -20,7 +20,6 @@ #define HBG_MDIO_OP_INTERVAL_US (5 * 1000) #define HBG_NP_LINK_FAIL_RETRY_TIMES 5 -#define HBG_NO_PHY 0xFF static void hbg_mdio_set_command(struct hbg_mac *mac, u32 cmd) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 9d34d28ff168..782bb48c9f3d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9429,8 +9429,7 @@ static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd) /* this command reads phy id and register at the same time */ fallthrough; case SIOCGMIIREG: - data->val_out = hclge_read_phy_reg(hdev, data->reg_num); - return 0; + return hclge_read_phy_reg(hdev, data->reg_num, &data->val_out); case SIOCSMIIREG: return hclge_write_phy_reg(hdev, data->reg_num, data->val_in); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 96553109f44c..cf881108fa57 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -274,7 +274,7 @@ void hclge_mac_stop_phy(struct hclge_dev *hdev) phy_stop(phydev); } -u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) +int hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 *val) { struct hclge_phy_reg_cmd *req; struct hclge_desc desc; @@ -286,11 +286,14 @@ u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) req->reg_addr = cpu_to_le16(reg_addr); ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) + if (ret) { dev_err(&hdev->pdev->dev, "failed to read phy reg, ret = %d.\n", ret); + return ret; + } - return le16_to_cpu(req->reg_val); + *val = le16_to_cpu(req->reg_val); + return 0; } int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index 4200d0b6d931..21d434c82475 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -13,7 +13,7 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle); void hclge_mac_disconnect_phy(struct hnae3_handle *handle); void hclge_mac_start_phy(struct hclge_dev *hdev); void hclge_mac_stop_phy(struct hclge_dev *hdev); -u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr); +int hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 *val); int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val); #endif diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index a563a94e2780..122ee23497e6 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -146,7 +146,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL - select LIBIE_FWLOG + select LIBIE_FWLOG if DEBUG_FS select MDIO select NET_DEVLINK select PLDMFW @@ -298,7 +298,7 @@ config ICE select DIMLIB select LIBIE select LIBIE_ADMINQ - select LIBIE_FWLOG + select LIBIE_FWLOG if DEBUG_FS select NET_DEVLINK select PACKING select PLDMFW diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2250426ec91b..2532b6f82e97 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -4382,6 +4382,15 @@ int ice_get_phy_lane_number(struct ice_hw *hw) unsigned int lane; int err; + /* E82X does not have sequential IDs, lane number is PF ID. + * For E825 device, the exception is the variant with external + * PHY (0x579F), in which there is also 1:1 pf_id -> lane_number + * mapping. + */ + if (hw->mac_type == ICE_MAC_GENERIC || + hw->device_id == ICE_DEV_ID_E825C_SGMII) + return hw->pf_id; + options = kcalloc(ICE_AQC_PORT_OPT_MAX, sizeof(*options), GFP_KERNEL); if (!options) return -ENOMEM; @@ -6497,6 +6506,28 @@ u32 ice_get_link_speed(u16 index) } /** + * ice_get_dest_cgu - get destination CGU dev for given HW + * @hw: pointer to the HW struct + * + * Get CGU client id for CGU register read/write operations. + * + * Return: CGU device id to use in SBQ transactions. + */ +static enum ice_sbq_dev_id ice_get_dest_cgu(struct ice_hw *hw) +{ + /* On dual complex E825 only complex 0 has functional CGU powering all + * the PHYs. + * SBQ destination device cgu points to CGU on a current complex and to + * access primary CGU from the secondary complex, the driver should use + * cgu_peer as a destination device. + */ + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) && + !ice_is_primary(hw)) + return ice_sbq_dev_cgu_peer; + return ice_sbq_dev_cgu; +} + +/** * ice_read_cgu_reg - Read a CGU register * @hw: Pointer to the HW struct * @addr: Register address to read @@ -6510,8 +6541,8 @@ u32 ice_get_link_speed(u16 index) int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) { struct ice_sbq_msg_input cgu_msg = { + .dest_dev = ice_get_dest_cgu(hw), .opcode = ice_sbq_msg_rd, - .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr }; int err; @@ -6542,8 +6573,8 @@ int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) int ice_write_cgu_reg(struct ice_hw *hw, u32 addr, u32 val) { struct ice_sbq_msg_input cgu_msg = { + .dest_dev = ice_get_dest_cgu(hw), .opcode = ice_sbq_msg_wr, - .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr, .data = val }; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 363ae79a3620..013c93b6605e 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1479,7 +1479,7 @@ static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk) per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs; hw->blk[blk].masks.count = per_pf; - hw->blk[blk].masks.first = hw->pf_id * per_pf; + hw->blk[blk].masks.first = hw->logical_pf_id * per_pf; memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks)); diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h index 183dd5457d6a..21bb861febbf 100644 --- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h @@ -50,6 +50,7 @@ enum ice_sbq_dev_id { ice_sbq_dev_phy_0 = 0x02, ice_sbq_dev_cgu = 0x06, ice_sbq_dev_phy_0_peer = 0x0D, + ice_sbq_dev_cgu_peer = 0x0F, }; enum ice_sbq_msg_opcode { diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f8a208c84f15..10e2445e0ded 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2281,7 +2281,7 @@ static int igb_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGB_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index f3e7218ba6f3..bb783042d1af 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -810,7 +810,7 @@ static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGC_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } @@ -2094,6 +2094,9 @@ static void igc_ethtool_diag_test(struct net_device *netdev, netdev_info(adapter->netdev, "Offline testing starting"); set_bit(__IGC_TESTING, &adapter->state); + /* power up PHY for link test */ + igc_power_up_phy_copper(&adapter->hw); + /* Link test performed before hardware reset so autoneg doesn't * interfere with test result */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 14d275270123..dce4936708eb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -821,9 +821,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff *ixgbe_hwmon_buff; #endif /* CONFIG_IXGBE_HWMON */ -#ifdef CONFIG_DEBUG_FS struct dentry *ixgbe_dbg_adapter; -#endif /*CONFIG_DEBUG_FS*/ u8 default_up; /* Bitmask indicating in use pools */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ca1ccc630001..3190ce7e44c7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -11507,10 +11507,10 @@ static int ixgbe_recovery_probe(struct ixgbe_adapter *adapter) shutdown_aci: mutex_destroy(&adapter->hw.aci.lock); ixgbe_release_hw_control(adapter); - devlink_free(adapter->devlink); clean_up_probe: disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); + devlink_free(adapter->devlink); pci_release_mem_regions(pdev); if (disable_dev) pci_disable_device(pdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 114dd88fc71c..6885d2343c48 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -641,7 +641,7 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp, * disabled */ if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp) - return -ENOTSUPP; + return -EOPNOTSUPP; if (on) adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index aff17c37ddde..902d6abaa3ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1516,10 +1516,8 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, pool->xdp_cnt = numptrs; pool->xdp = devm_kcalloc(pfvf->dev, numptrs, sizeof(struct xdp_buff *), GFP_KERNEL); - if (IS_ERR(pool->xdp)) { - netdev_err(pfvf->netdev, "Creation of xsk pool failed\n"); - return PTR_ERR(pool->xdp); - } + if (!pool->xdp) + return -ENOMEM; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index e9f319a9bdd6..60f7ab1d72e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -66,8 +66,8 @@ void mlx5_cq_tasklet_cb(struct tasklet_struct *t) tasklet_schedule(&ctx->task); } -static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, - struct mlx5_eqe *eqe) +void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, + struct mlx5_eqe *eqe) { unsigned long flags; struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; @@ -95,7 +95,15 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, if (schedule_tasklet) tasklet_schedule(&tasklet_ctx->task); } +EXPORT_SYMBOL(mlx5_add_cq_to_tasklet); +static void mlx5_core_cq_dummy_cb(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe) +{ + mlx5_core_err(cq->eq->core.dev, + "CQ default completion callback, CQ #%u\n", cq->cqn); +} + +#define MLX5_CQ_INIT_CMD_SN cpu_to_be32(2 << 28) /* Callers must verify outbox status in case of err */ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen) @@ -121,10 +129,19 @@ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->arm_sn = 0; cq->eq = eq; cq->uid = MLX5_GET(create_cq_in, in, uid); + + /* Kernel CQs must set the arm_db address prior to calling + * this function, allowing for the proper value to be + * initialized. User CQs are responsible for their own + * initialization since they do not use the arm_db field. + */ + if (cq->arm_db) + *cq->arm_db = MLX5_CQ_INIT_CMD_SN; + refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) - cq->comp = mlx5_add_cq_to_tasklet; + cq->comp = mlx5_core_cq_dummy_cb; /* assuming CQ will be deleted before the EQ */ cq->tasklet_ctx.priv = &eq->tasklet_ctx; INIT_LIST_HEAD(&cq->tasklet_ctx.list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index fceea83abbd7..887adf4807d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -541,7 +541,7 @@ static int mlx5_devlink_num_doorbells_validate(struct devlink *devlink, u32 id, max_num_channels = mlx5e_get_max_num_channels(mdev); if (val32 > max_num_channels) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Requested num_doorbells (%u) exceeds maximum number of channels (%u)", + "Requested num_doorbells (%u) exceeds max number of channels (%u)", val32, max_num_channels); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 14e3207b14e7..a163f81f07c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -634,7 +634,10 @@ struct mlx5e_dma_info { struct mlx5e_shampo_hd { struct mlx5e_frag_page *pages; u32 hd_per_wq; + u32 hd_per_page; u16 hd_per_wqe; + u8 log_hd_per_page; + u8 log_hd_entry_size; unsigned long *bitmap; u16 pi; u16 ci; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 3692298e10f2..c9bdee9a8b30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -100,7 +100,7 @@ u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) return sizeof(struct mlx5_ksm) * 4; } WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); - return 0; + return 1; } u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 0a4fb8c92268..35d9530037a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -804,7 +804,8 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, goto err_xfrm; } - if (mlx5_eswitch_block_mode(priv->mdev)) + err = mlx5_eswitch_block_mode(priv->mdev); + if (err) goto unblock_ipsec; if (x->props.mode == XFRM_MODE_TUNNEL && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 5d7c15abfcaf..f8eaaf37963b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -342,6 +342,7 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *slave_priv, struct mlx5e_priv *master_priv); void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event); +void mlx5e_ipsec_disable_events(struct mlx5e_priv *priv); static inline struct mlx5_core_dev * mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry) @@ -387,6 +388,10 @@ static inline void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *sl static inline void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event) { } + +static inline void mlx5e_ipsec_disable_events(struct mlx5e_priv *priv) +{ +} #endif #endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index bf1d2769d4f1..feef86fff4bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2893,9 +2893,30 @@ void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *slave_priv, void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event) { - if (!priv->ipsec) - return; /* IPsec not supported */ + if (!priv->ipsec || mlx5_devcom_comp_get_size(priv->devcom) < 2) + return; /* IPsec not supported or no peers */ mlx5_devcom_send_event(priv->devcom, event, event, priv); wait_for_completion(&priv->ipsec->comp); } + +void mlx5e_ipsec_disable_events(struct mlx5e_priv *priv) +{ + struct mlx5_devcom_comp_dev *tmp = NULL; + struct mlx5e_priv *peer_priv; + + if (!priv->devcom) + return; + + if (!mlx5_devcom_for_each_peer_begin(priv->devcom)) + goto out; + + peer_priv = mlx5_devcom_get_next_peer_data(priv->devcom, &tmp); + if (peer_priv) + complete_all(&peer_priv->ipsec->comp); + + mlx5_devcom_for_each_peer_end(priv->devcom); +out: + mlx5_devcom_unregister_component(priv->devcom); + priv->devcom = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index d7a11ff9bbdb..da2d1eb52c13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -320,7 +320,6 @@ err_dma_unmap: err_free: kfree(buf); err_out: - priv_rx->rq_stats->tls_resync_req_skip++; return err; } @@ -339,14 +338,19 @@ static void resync_handle_work(struct work_struct *work) if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { mlx5e_ktls_priv_rx_put(priv_rx); + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&resync->core); return; } c = resync->priv->channels.c[priv_rx->rxq]; sq = &c->async_icosq; - if (resync_post_get_progress_params(sq, priv_rx)) + if (resync_post_get_progress_params(sq, priv_rx)) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&resync->core); mlx5e_ktls_priv_rx_put(priv_rx); + } } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, @@ -425,14 +429,21 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, { struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_offload_context_rx *priv_rx; + struct tls_offload_resync_async *async_resync; + struct tls_offload_context_rx *rx_ctx; u8 tracker_state, auth_state, *ctx; struct device *dev; u32 hw_seq; priv_rx = buf->priv_rx; dev = mlx5_core_dma_dev(sq->channel->mdev); - if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk)); + async_resync = rx_ctx->resync_async; + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(async_resync); goto out; + } dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -443,11 +454,13 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(async_resync); goto out; } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); - tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); + tls_offload_rx_resync_async_request_end(async_resync, + cpu_to_be32(hw_seq)); priv_rx->rq_stats->tls_resync_req_end++; out: mlx5e_ktls_priv_rx_put(priv_rx); @@ -472,8 +485,10 @@ static bool resync_queue_get_psv(struct sock *sk) resync = &priv_rx->resync; mlx5e_ktls_priv_rx_get(priv_rx); - if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) + if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) { mlx5e_ktls_priv_rx_put(priv_rx); + return false; + } return true; } @@ -482,6 +497,7 @@ static bool resync_queue_get_psv(struct sock *sk) static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) { struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct tls_offload_resync_async *resync_async; struct net_device *netdev = rq->netdev; struct net *net = dev_net(netdev); struct sock *sk = NULL; @@ -527,7 +543,8 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) seq = th->seq; datalen = skb->len - depth; - tls_offload_rx_resync_async_request_start(sk, seq, datalen); + resync_async = tls_offload_ctx_rx(tls_get_ctx(sk))->resync_async; + tls_offload_rx_resync_async_request_start(resync_async, seq, datalen); rq->stats->tls_resync_req_start++; unref: @@ -556,6 +573,18 @@ void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, resync_handle_seq_match(priv_rx, c); } +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_buf *buf; + + buf = wi->tls_get_params.buf; + priv_rx = buf->priv_rx; + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&priv_rx->resync.core); +} + /* End of resync section */ void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index f87b65c560ea..cb08799769ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -29,6 +29,10 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); + +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi); + static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d166c0d5189e..9b93da4d52f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -595,32 +595,55 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, struct mlx5_core_dev *mdev = priv->mdev; u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; - __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB); + __u64 upper_limit_mbps; + __u64 upper_limit_gbps; int i; + struct { + int scale; + const char *units_str; + } units[] = { + [MLX5_100_MBPS_UNIT] = { + .scale = 100, + .units_str = "Mbps", + }, + [MLX5_GBPS_UNIT] = { + .scale = 1, + .units_str = "Gbps", + }, + }; memset(max_bw_value, 0, sizeof(max_bw_value)); memset(max_bw_unit, 0, sizeof(max_bw_unit)); + upper_limit_mbps = 255 * MLX5E_100MB; + upper_limit_gbps = 255 * MLX5E_1GB; for (i = 0; i <= mlx5_max_tc(mdev); i++) { if (!maxrate->tc_maxrate[i]) { max_bw_unit[i] = MLX5_BW_NO_LIMIT; continue; } - if (maxrate->tc_maxrate[i] < upper_limit_mbps) { + if (maxrate->tc_maxrate[i] <= upper_limit_mbps) { max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], MLX5E_100MB); max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; max_bw_unit[i] = MLX5_100_MBPS_UNIT; - } else { + } else if (max_bw_value[i] <= upper_limit_gbps) { max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], MLX5E_1GB); max_bw_unit[i] = MLX5_GBPS_UNIT; + } else { + netdev_err(netdev, + "tc_%d maxrate %llu Kbps exceeds limit %llu\n", + i, maxrate->tc_maxrate[i], + upper_limit_gbps); + return -EINVAL; } } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - netdev_dbg(netdev, "%s: tc_%d <=> max_bw %d Gbps\n", - __func__, i, max_bw_value[i]); + netdev_dbg(netdev, "%s: tc_%d <=> max_bw %u %s\n", __func__, i, + max_bw_value[i] * units[max_bw_unit[i]].scale, + units[max_bw_unit[i]].units_str); } return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 53e5ae252eac..893e1380a7c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2125,14 +2125,12 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, if (!size_read) return i; - if (size_read == -EINVAL) - return -EINVAL; if (size_read < 0) { NL_SET_ERR_MSG_FMT_MOD( extack, "Query module eeprom by page failed, read %u bytes, err %d", i, size_read); - return i; + return size_read; } i += size_read; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a56825921c23..5e17eae81f4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -242,8 +242,8 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) &attr, mlx5e_devcom_event_mpv, priv); - if (IS_ERR(priv->devcom)) - return PTR_ERR(priv->devcom); + if (!priv->devcom) + return -EINVAL; if (mlx5_core_is_mp_master(priv->mdev)) { mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP, @@ -256,7 +256,7 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) static void mlx5e_devcom_cleanup_mpv(struct mlx5e_priv *priv) { - if (IS_ERR_OR_NULL(priv->devcom)) + if (!priv->devcom) return; if (mlx5_core_is_mp_master(priv->mdev)) { @@ -266,6 +266,7 @@ static void mlx5e_devcom_cleanup_mpv(struct mlx5e_priv *priv) } mlx5_devcom_unregister_component(priv->devcom); + priv->devcom = NULL; } static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) @@ -790,8 +791,9 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, int node) { void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + u8 log_hd_per_page, log_hd_entry_size; + u16 hd_per_wq, hd_per_wqe; u32 hd_pool_size; - u16 hd_per_wq; int wq_size; int err; @@ -814,11 +816,24 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, if (err) goto err_umr_mkey; - rq->mpwqe.shampo->hd_per_wqe = - mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rqp); wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); - hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / - MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + + BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); + if (hd_per_wqe >= MLX5E_SHAMPO_WQ_HEADER_PER_PAGE) { + log_hd_per_page = MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE; + log_hd_entry_size = MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + } else { + log_hd_per_page = order_base_2(hd_per_wqe); + log_hd_entry_size = order_base_2(PAGE_SIZE / hd_per_wqe); + } + + rq->mpwqe.shampo->hd_per_wqe = hd_per_wqe; + rq->mpwqe.shampo->hd_per_page = BIT(log_hd_per_page); + rq->mpwqe.shampo->log_hd_per_page = log_hd_per_page; + rq->mpwqe.shampo->log_hd_entry_size = log_hd_entry_size; + + hd_pool_size = (hd_per_wqe * wq_size) >> log_hd_per_page; if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) { /* Separate page pool for shampo headers */ @@ -2204,7 +2219,6 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; *mcq->set_ci_db = 0; - *mcq->arm_db = 0; mcq->vector = param->eq_ix; mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; @@ -6120,6 +6134,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_cleanup(priv); + mlx5e_ipsec_disable_events(priv); mlx5e_disable_blocking_events(priv); mlx5e_disable_async_events(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 263d5628ee44..687cf123211d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -648,17 +648,20 @@ static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, umr_wqe->hdr.uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, int header_index) +static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, + int header_index) { - BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - return &rq->mpwqe.shampo->pages[header_index >> MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE]; + return &shampo->pages[header_index >> shampo->log_hd_per_page]; } -static u64 mlx5e_shampo_hd_offset(int header_index) +static u64 mlx5e_shampo_hd_offset(struct mlx5e_rq *rq, int header_index) { - return (header_index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << - MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u32 hd_per_page = shampo->hd_per_page; + + return (header_index & (hd_per_page - 1)) << shampo->log_hd_entry_size; } static void mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index); @@ -671,7 +674,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u16 pi, header_offset, err, wqe_bbs; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; struct mlx5e_umr_wqe *umr_wqe; - int headroom, i = 0; + int headroom, i; headroom = rq->buff.headroom; wqe_bbs = MLX5E_KSM_UMR_WQEBBS(ksm_entries); @@ -679,25 +682,24 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); build_ksm_umr(sq, umr_wqe, shampo->mkey_be, index, ksm_entries); - WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)); - while (i < ksm_entries) { - struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); + for (i = 0; i < ksm_entries; i++, index++) { + struct mlx5e_frag_page *frag_page; u64 addr; - err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); - if (unlikely(err)) - goto err_unmap; + frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); + header_offset = mlx5e_shampo_hd_offset(rq, index); + if (!header_offset) { + err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, + frag_page); + if (err) + goto err_unmap; + } addr = page_pool_get_dma_addr_netmem(frag_page->netmem); - - for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { - header_offset = mlx5e_shampo_hd_offset(index++); - - umr_wqe->inline_ksms[i++] = (struct mlx5_ksm) { - .key = cpu_to_be32(lkey), - .va = cpu_to_be64(addr + header_offset + headroom), - }; - } + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(lkey), + .va = cpu_to_be64(addr + header_offset + headroom), + }; } sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { @@ -713,9 +715,9 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, return 0; err_unmap: - while (--i) { + while (--i >= 0) { --index; - header_offset = mlx5e_shampo_hd_offset(index); + header_offset = mlx5e_shampo_hd_offset(rq, index); if (!header_offset) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); @@ -735,12 +737,11 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) struct mlx5e_icosq *sq = rq->icosq; int i, err, max_ksm_entries, len; - max_ksm_entries = ALIGN_DOWN(MLX5E_MAX_KSM_PER_WQE(rq->mdev), - MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); + max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev); ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); - ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); + ksm_entries = ALIGN_DOWN(ksm_entries, shampo->hd_per_page); if (!ksm_entries) return 0; @@ -858,7 +859,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { + if (((header_index + 1) & (shampo->hd_per_page - 1)) == 0) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page); @@ -1036,6 +1037,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) netdev_WARN_ONCE(cq->netdev, "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); +#ifdef CONFIG_MLX5_EN_TLS + if (wi->wqe_type == MLX5E_ICOSQ_WQE_GET_PSV_TLS) + mlx5e_ktls_rx_resync_async_request_cancel(wi); +#endif mlx5e_dump_error_cqe(&sq->cq, sq->sqn, (struct mlx5_err_cqe *)cqe); mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); @@ -1221,9 +1226,10 @@ static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb, static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; + u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index); + void *addr = netmem_address(frag_page->netmem); - return netmem_address(frag_page->netmem) + head_offset; + return addr + head_offset + rq->buff.headroom; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -1794,14 +1800,27 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi } prog = rcu_dereference(rq->xdp_prog); - if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) { - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - struct mlx5e_wqe_frag_info *pwi; + if (prog) { + u8 nr_frags_free, old_nr_frags = sinfo->nr_frags; - for (pwi = head_wi; pwi < wi; pwi++) - pwi->frag_page->frags++; + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, + rq->flags)) { + struct mlx5e_wqe_frag_info *pwi; + + wi -= old_nr_frags - sinfo->nr_frags; + + for (pwi = head_wi; pwi < wi; pwi++) + pwi->frag_page->frags++; + } + return NULL; /* page/packet was consumed by XDP */ + } + + nr_frags_free = old_nr_frags - sinfo->nr_frags; + if (unlikely(nr_frags_free)) { + wi -= nr_frags_free; + truesize -= nr_frags_free * frag_info->frag_stride; } - return NULL; /* page/packet was consumed by XDP */ } skb = mlx5e_build_linear_skb( @@ -2027,6 +2046,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w u32 byte_cnt = cqe_bcnt; struct skb_shared_info *sinfo; unsigned int truesize = 0; + u32 pg_consumed_bytes; struct bpf_prog *prog; struct sk_buff *skb; u32 linear_frame_sz; @@ -2080,7 +2100,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w while (byte_cnt) { /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ - u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) truesize += pg_consumed_bytes; @@ -2096,10 +2117,15 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w } if (prog) { + u8 nr_frags_free, old_nr_frags = sinfo->nr_frags; + u32 len; + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_frag_page *pfp; + frag_page -= old_nr_frags - sinfo->nr_frags; + for (pfp = head_page; pfp < frag_page; pfp++) pfp->frags++; @@ -2110,9 +2136,19 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w return NULL; /* page/packet was consumed by XDP */ } + nr_frags_free = old_nr_frags - sinfo->nr_frags; + if (unlikely(nr_frags_free)) { + frag_page -= nr_frags_free; + truesize -= (nr_frags_free - 1) * PAGE_SIZE + + ALIGN(pg_consumed_bytes, + BIT(rq->mpwqe.log_stride_sz)); + } + + len = mxbuf->xdp.data_end - mxbuf->xdp.data; + skb = mlx5e_build_linear_skb( rq, mxbuf->xdp.data_hard_start, linear_frame_sz, - mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0, + mxbuf->xdp.data - mxbuf->xdp.data_hard_start, len, mxbuf->xdp.data - mxbuf->xdp.data_meta); if (unlikely(!skb)) { mlx5e_page_release_fragmented(rq->page_pool, @@ -2137,8 +2173,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w do pagep->frags++; while (++pagep < frag_page); + + headlen = min_t(u16, MLX5E_RX_MAX_HEAD - len, + skb->data_len); + __pskb_pull_tail(skb, headlen); } - __pskb_pull_tail(skb, headlen); } else { dma_addr_t addr; @@ -2230,7 +2269,8 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - u16 head_offset = mlx5e_shampo_hd_offset(header_index); + u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index); + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; u16 head_size = cqe->shampo.header_size; u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb = NULL; @@ -2246,7 +2286,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); - if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { + if (likely(frag_size <= BIT(shampo->log_hd_entry_size))) { /* build SKB around header */ dma_sync_single_range_for_cpu(rq->pdev, dma_addr, 0, frag_size, rq->buff.map_dir); net_prefetchw(hdr); @@ -2319,7 +2359,10 @@ mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) { int nr_frags = skb_shinfo(skb)->nr_frags; - return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; + if (PAGE_SIZE >= GRO_LEGACY_MAX_SIZE) + return skb->len + data_bcnt <= GRO_LEGACY_MAX_SIZE; + else + return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; } static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 7c029a7d0fd7..a2802cfc9b98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1614,7 +1614,9 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv, fec_set_corrected_bits_total(priv, fec_stats); fec_set_block_stats(priv, mode, fec_stats); - fec_set_histograms_stats(priv, mode, hist); + + if (MLX5_CAP_PCAM_REG(priv->mdev, pphcr)) + fec_set_histograms_stats(priv, mode, hist); } #define PPORT_ETH_EXT_OFF(c) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index b7227afcb51d..2702b3885f06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -256,7 +256,7 @@ mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, u8 mode; #ifdef CONFIG_MLX5_EN_TLS - if (accel && accel->tls.tls_tisn) + if (accel->tls.tls_tisn) return MLX5_INLINE_MODE_TCP_UDP; #endif @@ -982,6 +982,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_attr attr; struct mlx5i_tx_wqe *wqe; + struct mlx5e_accel_tx_state accel = {}; struct mlx5_wqe_datagram_seg *datagram; struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_eth_seg *eseg; @@ -992,7 +993,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, int num_dma; u16 pi; - mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); + mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr); pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); @@ -1009,7 +1010,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); - mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg); + mlx5e_txwqe_build_eseg_csum(sq, skb, &accel, eseg); eseg->mss = attr.mss; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 76382626ad41..929adeb50a98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -66,7 +66,6 @@ static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) esw->fdb_table.legacy.addr_grp = NULL; esw->fdb_table.legacy.allmulti_grp = NULL; esw->fdb_table.legacy.promisc_grp = NULL; - atomic64_set(&esw->user_count, 0); } static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4cf995be127d..44a142a041b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1978,7 +1978,6 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); - atomic64_set(&esw->user_count, 0); } static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw) @@ -3129,7 +3128,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, attr, mlx5_esw_offloads_devcom_event, esw); - if (IS_ERR(esw->devcom)) + if (!esw->devcom) return; mlx5_devcom_send_event(esw->devcom, @@ -3140,7 +3139,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) { - if (IS_ERR_OR_NULL(esw->devcom)) + if (!esw->devcom) return; mlx5_devcom_send_event(esw->devcom, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index cb1319974f83..ccef64fb40b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -421,6 +421,13 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) __be64 *pas; u32 i; + conn->cq.mcq.cqe_sz = 64; + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; + *conn->cq.mcq.set_ci_db = 0; + conn->cq.mcq.vector = 0; + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; + cq_size = roundup_pow_of_two(cq_size); MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); @@ -468,15 +475,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) if (err) goto err_cqwq; - conn->cq.mcq.cqe_sz = 64; - conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; - conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; - *conn->cq.mcq.set_ci_db = 0; - *conn->cq.mcq.arm_db = 0; - conn->cq.mcq.vector = 0; - conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); - mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 59c00c911275..3db0387bf6dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1430,11 +1430,10 @@ static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS, &attr, NULL, dev); - if (IS_ERR(dev->priv.hca_devcom_comp)) { + if (!dev->priv.hca_devcom_comp) { mlx5_core_err(dev, - "Failed to register devcom HCA component, err: %ld\n", - PTR_ERR(dev->priv.hca_devcom_comp)); - return PTR_ERR(dev->priv.hca_devcom_comp); + "Failed to register devcom HCA component."); + return -EINVAL; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d0ba83d77cd1..29e7fa09c32c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -1444,7 +1444,7 @@ static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key) compd = mlx5_devcom_register_component(mdev->priv.devc, MLX5_DEVCOM_SHARED_CLOCK, &attr, NULL, mdev); - if (IS_ERR(compd)) + if (!compd) return; mdev->clock_state->compdev = compd; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index faa2833602c8..e749618229bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -76,20 +76,18 @@ mlx5_devcom_dev_alloc(struct mlx5_core_dev *dev) struct mlx5_devcom_dev * mlx5_devcom_register_device(struct mlx5_core_dev *dev) { - struct mlx5_devcom_dev *devc; + struct mlx5_devcom_dev *devc = NULL; mutex_lock(&dev_list_lock); if (devcom_dev_exists(dev)) { - devc = ERR_PTR(-EEXIST); + mlx5_core_err(dev, "devcom device already exists"); goto out; } devc = mlx5_devcom_dev_alloc(dev); - if (!devc) { - devc = ERR_PTR(-ENOMEM); + if (!devc) goto out; - } list_add_tail(&devc->list, &devcom_dev_list); out: @@ -110,8 +108,10 @@ mlx5_devcom_dev_release(struct kref *ref) void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc) { - if (!IS_ERR_OR_NULL(devc)) - kref_put(&devc->ref, mlx5_devcom_dev_release); + if (!devc) + return; + + kref_put(&devc->ref, mlx5_devcom_dev_release); } static struct mlx5_devcom_comp * @@ -122,7 +122,7 @@ mlx5_devcom_comp_alloc(u64 id, const struct mlx5_devcom_match_attr *attr, comp = kzalloc(sizeof(*comp), GFP_KERNEL); if (!comp) - return ERR_PTR(-ENOMEM); + return NULL; comp->id = id; comp->key.key = attr->key; @@ -160,7 +160,7 @@ devcom_alloc_comp_dev(struct mlx5_devcom_dev *devc, devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); if (!devcom) - return ERR_PTR(-ENOMEM); + return NULL; kref_get(&devc->ref); devcom->devc = devc; @@ -240,31 +240,28 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, mlx5_devcom_event_handler_t handler, void *data) { - struct mlx5_devcom_comp_dev *devcom; + struct mlx5_devcom_comp_dev *devcom = NULL; struct mlx5_devcom_comp *comp; - if (IS_ERR_OR_NULL(devc)) - return ERR_PTR(-EINVAL); + if (!devc) + return NULL; mutex_lock(&comp_list_lock); comp = devcom_component_get(devc, id, attr, handler); - if (IS_ERR(comp)) { - devcom = ERR_PTR(-EINVAL); + if (IS_ERR(comp)) goto out_unlock; - } if (!comp) { comp = mlx5_devcom_comp_alloc(id, attr, handler); - if (IS_ERR(comp)) { - devcom = ERR_CAST(comp); + if (!comp) goto out_unlock; - } + list_add_tail(&comp->comp_list, &devcom_comp_list); } mutex_unlock(&comp_list_lock); devcom = devcom_alloc_comp_dev(devc, comp, data); - if (IS_ERR(devcom)) + if (!devcom) kref_put(&comp->ref, mlx5_devcom_comp_release); return devcom; @@ -276,8 +273,10 @@ out_unlock: void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom) { - if (!IS_ERR_OR_NULL(devcom)) - devcom_free_comp_dev(devcom); + if (!devcom) + return; + + devcom_free_comp_dev(devcom); } int mlx5_devcom_comp_get_size(struct mlx5_devcom_comp_dev *devcom) @@ -296,7 +295,7 @@ int mlx5_devcom_send_event(struct mlx5_devcom_comp_dev *devcom, int err = 0; void *data; - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return -ENODEV; comp = devcom->comp; @@ -338,7 +337,7 @@ void mlx5_devcom_comp_set_ready(struct mlx5_devcom_comp_dev *devcom, bool ready) bool mlx5_devcom_comp_is_ready(struct mlx5_devcom_comp_dev *devcom) { - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return false; return READ_ONCE(devcom->comp->ready); @@ -348,7 +347,7 @@ bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom_comp_dev *devcom) { struct mlx5_devcom_comp *comp; - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return false; comp = devcom->comp; @@ -421,21 +420,21 @@ void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom, void mlx5_devcom_comp_lock(struct mlx5_devcom_comp_dev *devcom) { - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return; down_write(&devcom->comp->sem); } void mlx5_devcom_comp_unlock(struct mlx5_devcom_comp_dev *devcom) { - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return; up_write(&devcom->comp->sem); } int mlx5_devcom_comp_trylock(struct mlx5_devcom_comp_dev *devcom) { - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return 0; return down_write_trylock(&devcom->comp->sem); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index f5c2701f6e87..8e17daae48af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -221,8 +221,8 @@ static int sd_register(struct mlx5_core_dev *dev) attr.net = mlx5_core_net(dev); devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP, &attr, NULL, dev); - if (IS_ERR(devcom)) - return PTR_ERR(devcom); + if (!devcom) + return -EINVAL; sd->devcom = devcom; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index df93625c9dfa..70c156591b0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -978,9 +978,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) int err; dev->priv.devc = mlx5_devcom_register_device(dev); - if (IS_ERR(dev->priv.devc)) - mlx5_core_warn(dev, "failed to register devcom device %pe\n", - dev->priv.devc); + if (!dev->priv.devc) + mlx5_core_warn(dev, "failed to register devcom device\n"); err = mlx5_query_board_id(dev); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index 24ef7d66fa8a..7510c46e58a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -873,12 +873,6 @@ err_free_sqc: return err; } -static void hws_cq_complete(struct mlx5_core_cq *mcq, - struct mlx5_eqe *eqe) -{ - pr_err("CQ completion CQ: #%u\n", mcq->cqn); -} - static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev, int numa_node, struct mlx5hws_send_engine *queue, @@ -901,7 +895,6 @@ static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev, mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; - mcq->comp = hws_cq_complete; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { cqe = mlx5_cqwq_get_wqe(&cq->wq, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c index 077a77fde670..d034372fa047 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c @@ -1049,12 +1049,6 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) return 0; } -static void dr_cq_complete(struct mlx5_core_cq *mcq, - struct mlx5_eqe *eqe) -{ - pr_err("CQ completion CQ: #%u\n", mcq->cqn); -} - static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_uars_page *uar, size_t ncqe) @@ -1089,6 +1083,13 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; } + cq->mcq.cqe_sz = 64; + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; + *cq->mcq.set_ci_db = 0; + cq->mcq.vector = 0; + cq->mdev = mdev; + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = kvzalloc(inlen, GFP_KERNEL); @@ -1112,27 +1113,12 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); - cq->mcq.comp = dr_cq_complete; - err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) goto err_cqwq; - cq->mcq.cqe_sz = 64; - cq->mcq.set_ci_db = cq->wq_ctrl.db.db; - cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; - *cq->mcq.set_ci_db = 0; - - /* set no-zero value, in order to avoid the HW to run db-recovery on - * CQ that used in polling mode. - */ - *cq->mcq.arm_db = cpu_to_be32(2 << 28); - - cq->mcq.vector = 0; - cq->mdev = mdev; - return cq; err_cqwq: diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index 2474dfd330f4..fe4e61405284 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -294,7 +294,7 @@ static void lan966x_stats_update(struct lan966x *lan966x) { int i, j; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); for (i = 0; i < lan966x->num_phys_ports; i++) { uint idx = i * lan966x->num_stats; @@ -310,7 +310,7 @@ static void lan966x_stats_update(struct lan966x *lan966x) } } - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static int lan966x_get_sset_count(struct net_device *dev, int sset) @@ -365,7 +365,7 @@ static void lan966x_get_eth_mac_stats(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); mac_stats->FramesTransmittedOK = lan966x->stats[idx + SYS_COUNT_TX_UC] + @@ -416,7 +416,7 @@ static void lan966x_get_eth_mac_stats(struct net_device *dev, lan966x->stats[idx + SYS_COUNT_RX_LONG] + lan966x->stats[idx + SYS_COUNT_RX_PMAC_LONG]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static const struct ethtool_rmon_hist_range lan966x_rmon_ranges[] = { @@ -442,7 +442,7 @@ static void lan966x_get_eth_rmon_stats(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); rmon_stats->undersize_pkts = lan966x->stats[idx + SYS_COUNT_RX_SHORT] + @@ -500,7 +500,7 @@ static void lan966x_get_eth_rmon_stats(struct net_device *dev, lan966x->stats[idx + SYS_COUNT_TX_SZ_1024_1526] + lan966x->stats[idx + SYS_COUNT_TX_PMAC_SZ_1024_1526]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); *ranges = lan966x_rmon_ranges; } @@ -603,7 +603,7 @@ void lan966x_stats_get(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); stats->rx_bytes = lan966x->stats[idx + SYS_COUNT_RX_OCT] + lan966x->stats[idx + SYS_COUNT_RX_PMAC_OCT]; @@ -685,7 +685,7 @@ void lan966x_stats_get(struct net_device *dev, stats->collisions = lan966x->stats[idx + SYS_COUNT_TX_COL]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } int lan966x_stats_init(struct lan966x *lan966x) @@ -701,7 +701,7 @@ int lan966x_stats_init(struct lan966x *lan966x) return -ENOMEM; /* Init stats worker */ - mutex_init(&lan966x->stats_lock); + spin_lock_init(&lan966x->stats_lock); snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(lan966x->dev)); lan966x->stats_queue = create_singlethread_workqueue(queue_name); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 7001584f1b7a..47752d3fde0b 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1261,7 +1261,6 @@ cleanup_ports: cancel_delayed_work_sync(&lan966x->stats_work); destroy_workqueue(lan966x->stats_queue); - mutex_destroy(&lan966x->stats_lock); debugfs_remove_recursive(lan966x->debugfs_root); @@ -1279,7 +1278,6 @@ static void lan966x_remove(struct platform_device *pdev) cancel_delayed_work_sync(&lan966x->stats_work); destroy_workqueue(lan966x->stats_queue); - mutex_destroy(&lan966x->stats_lock); lan966x_mac_purge_entries(lan966x); lan966x_mdb_deinit(lan966x); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 4f75f0688369..eea286c29474 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -295,8 +295,8 @@ struct lan966x { const struct lan966x_stat_layout *stats_layout; u32 num_stats; - /* workqueue for reading stats */ - struct mutex stats_lock; + /* lock for reading stats */ + spinlock_t stats_lock; u64 *stats; struct delayed_work stats_work; struct workqueue_struct *stats_queue; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index a1471e38d118..2a37fc1ba4bc 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -403,11 +403,11 @@ static void lan966x_es0_read_esdx_counter(struct lan966x *lan966x, u32 counter; id = id & 0xff; /* counter limit */ - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); counter = lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)) + lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); if (counter) admin->cache.counter = counter; } @@ -417,14 +417,14 @@ static void lan966x_es0_write_esdx_counter(struct lan966x *lan966x, { id = id & 0xff; /* counter limit */ - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_BYTES)); lan_wr(admin->cache.counter, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_BYTES)); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static void lan966x_vcap_cache_write(struct net_device *dev, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 132626a3f9f7..9ef72f294117 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2557,14 +2557,16 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar, &nn->tlv_caps); if (err) - goto err_free_nn; + goto err_free_xsk_pools; err = nfp_ccm_mbox_alloc(nn); if (err) - goto err_free_nn; + goto err_free_xsk_pools; return nn; +err_free_xsk_pools: + kfree(nn->dp.xsk_pools); err_free_nn: if (nn->dp.netdev) free_netdev(nn->dp.netdev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index d10b58ebf603..301ebee2fdc5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -29,6 +29,10 @@ static void ionic_tx_clean(struct ionic_queue *q, static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell) { + /* Ensure TX descriptor writes reach memory before NIC reads them. + * Prevents device from fetching stale descriptors. + */ + dma_wmb(); ionic_q_post(q, ring_dbell); } @@ -1444,19 +1448,6 @@ static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, bool encap; int err; - desc_info = &q->tx_info[q->head_idx]; - - if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) - return -EIO; - - len = skb->len; - mss = skb_shinfo(skb)->gso_size; - outer_csum = (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPXIP4 | - SKB_GSO_IPXIP6 | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM)); has_vlan = !!skb_vlan_tag_present(skb); vlan_tci = skb_vlan_tag_get(skb); encap = skb->encapsulation; @@ -1470,12 +1461,21 @@ static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, err = ionic_tx_tcp_inner_pseudo_csum(skb); else err = ionic_tx_tcp_pseudo_csum(skb); - if (unlikely(err)) { - /* clean up mapping from ionic_tx_map_skb */ - ionic_tx_desc_unmap_bufs(q, desc_info); + if (unlikely(err)) return err; - } + desc_info = &q->tx_info[q->head_idx]; + if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) + return -EIO; + + len = skb->len; + mss = skb_shinfo(skb)->gso_size; + outer_csum = (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | + SKB_GSO_IPXIP4 | + SKB_GSO_IPXIP6 | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)); if (encap) hdrlen = skb_inner_tcp_all_headers(skb); else diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 9d3bd65b85ff..e2d7ce1a85e8 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2211,15 +2211,35 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_tx_timestamp(skb); } - /* Descriptor type must be set after all the above writes */ - dma_wmb(); + if (num_tx_desc > 1) { desc->die_dt = DT_FEND; desc--; + /* When using multi-descriptors, DT_FEND needs to get written + * before DT_FSTART, but the compiler may reorder the memory + * writes in an attempt to optimize the code. + * Use a dma_wmb() barrier to make sure DT_FEND and DT_FSTART + * are written exactly in the order shown in the code. + * This is particularly important for cases where the DMA engine + * is already running when we are running this code. If the DMA + * sees DT_FSTART without the corresponding DT_FEND it will enter + * an error condition. + */ + dma_wmb(); desc->die_dt = DT_FSTART; } else { + /* Descriptor type must be set after all the above writes */ + dma_wmb(); desc->die_dt = DT_FSINGLE; } + + /* Before ringing the doorbell we need to make sure that the latest + * writes have been committed to memory, otherwise it could delay + * things until the doorbell is rang again. + * This is in replacement of the read operation mentioned in the HW + * manuals. + */ + dma_wmb(); ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q); priv->cur_tx[q] += num_tx_desc; diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 6fd0c1e9a7d5..7cfd9000f79d 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -1090,6 +1090,9 @@ void efx_mae_remove_mport(void *desc, void *arg) kfree(mport); } +/* + * Takes ownership of @desc, even if it returns an error + */ static int efx_mae_process_mport(struct efx_nic *efx, struct mae_mport_desc *desc) { @@ -1100,6 +1103,7 @@ static int efx_mae_process_mport(struct efx_nic *efx, if (!IS_ERR_OR_NULL(mport)) { netif_err(efx, drv, efx->net_dev, "mport with id %u does exist!!!\n", desc->mport_id); + kfree(desc); return -EEXIST; } diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c index e1c5faff3b71..220eb5ce7583 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.c +++ b/drivers/net/ethernet/spacemit/k1_emac.c @@ -1441,6 +1441,9 @@ static int emac_set_pauseparam(struct net_device *dev, struct emac_priv *priv = netdev_priv(dev); u8 fc = 0; + if (!netif_running(dev)) + return -ENETDOWN; + priv->flow_control_autoneg = pause->autoneg; if (pause->autoneg) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 51ea0caf16c1..0786816e05f0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1446,14 +1446,15 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable) } } else { if (bsp_priv->clk_enabled) { + if (bsp_priv->ops && bsp_priv->ops->set_clock_selection) { + bsp_priv->ops->set_clock_selection(bsp_priv, + bsp_priv->clock_input, false); + } + clk_bulk_disable_unprepare(bsp_priv->num_clks, bsp_priv->clks); clk_disable_unprepare(bsp_priv->clk_phy); - if (bsp_priv->ops && bsp_priv->ops->set_clock_selection) - bsp_priv->ops->set_clock_selection(bsp_priv, - bsp_priv->clock_input, false); - bsp_priv->clk_enabled = false; } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 650d75b73e0b..7b90ecd3a55e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4089,18 +4089,11 @@ static int stmmac_release(struct net_device *dev) static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, struct stmmac_tx_queue *tx_q) { - u16 tag = 0x0, inner_tag = 0x0; - u32 inner_type = 0x0; struct dma_desc *p; + u16 tag = 0x0; - if (!priv->dma_cap.vlins) + if (!priv->dma_cap.vlins || !skb_vlan_tag_present(skb)) return false; - if (!skb_vlan_tag_present(skb)) - return false; - if (skb->vlan_proto == htons(ETH_P_8021AD)) { - inner_tag = skb_vlan_tag_get(skb); - inner_type = STMMAC_VLAN_INSERT; - } tag = skb_vlan_tag_get(skb); @@ -4109,7 +4102,7 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, else p = &tx_q->dma_tx[tx_q->cur_tx]; - if (stmmac_set_desc_vlan_tag(priv, p, tag, inner_tag, inner_type)) + if (stmmac_set_desc_vlan_tag(priv, p, tag, 0x0, 0x0)) return false; stmmac_set_tx_owner(priv, p); @@ -4507,6 +4500,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) bool has_vlan, set_ic; int entry, first_tx; dma_addr_t des; + u32 sdu_len; tx_q = &priv->dma_conf.tx_queue[queue]; txq_stats = &priv->xstats.txq_stats[queue]; @@ -4524,10 +4518,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) } if (priv->est && priv->est->enable && - priv->est->max_sdu[queue] && - skb->len > priv->est->max_sdu[queue]){ - priv->xstats.max_sdu_txq_drop[queue]++; - goto max_sdu_err; + priv->est->max_sdu[queue]) { + sdu_len = skb->len; + /* Add VLAN tag length if VLAN tag insertion offload is requested */ + if (priv->dma_cap.vlins && skb_vlan_tag_present(skb)) + sdu_len += VLAN_HLEN; + if (sdu_len > priv->est->max_sdu[queue]) { + priv->xstats.max_sdu_txq_drop[queue]++; + goto max_sdu_err; + } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { @@ -7573,11 +7572,8 @@ int stmmac_dvr_probe(struct device *device, ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER; } - if (priv->dma_cap.vlins) { + if (priv->dma_cap.vlins) ndev->features |= NETIF_F_HW_VLAN_CTAG_TX; - if (priv->dma_cap.dvlan) - ndev->features |= NETIF_F_HW_VLAN_STAG_TX; - } #endif priv->msg_enable = netif_msg_init(debug, default_msg_level); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 97e89a604abd..3b4d4696afe9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -981,7 +981,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, if (qopt->cmd == TAPRIO_CMD_DESTROY) goto disable; - if (qopt->num_entries >= dep) + if (qopt->num_entries > dep) return -EINVAL; if (!qopt->cycle_time) return -ERANGE; @@ -1012,7 +1012,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, s64 delta_ns = qopt->entries[i].interval; u32 gates = qopt->entries[i].gate_mask; - if (delta_ns > GENMASK(wid, 0)) + if (delta_ns > GENMASK(wid - 1, 0)) return -ERANGE; if (gates > GENMASK(31 - wid, 0)) return -ERANGE; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c index 0b6f6228ae35..ff02a79c00d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c @@ -212,7 +212,7 @@ static void vlan_enable(struct mac_device_info *hw, u32 type) value = readl(ioaddr + VLAN_INCL); value |= VLAN_VLTI; - value |= VLAN_CSVL; /* Only use SVLAN */ + value &= ~VLAN_CSVL; /* Only use CVLAN */ value &= ~VLAN_VLC; value |= (type << VLAN_VLC_SHIFT) & VLAN_VLC; writel(value, ioaddr + VLAN_INCL); diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index fa96db7c1a13..66e8b224827b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -276,9 +276,31 @@ static int am65_cpsw_iet_set_verify_timeout_count(struct am65_cpsw_port *port) /* The number of wireside clocks contained in the verify * timeout counter. The default is 0x1312d0 * (10ms at 125Mhz in 1G mode). + * The frequency of the clock depends on the link speed + * and the PHY interface. */ - val = 125 * HZ_PER_MHZ; /* assuming 125MHz wireside clock */ + switch (port->slave.phy_if) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + if (port->qos.link_speed == SPEED_1000) + val = 125 * HZ_PER_MHZ; /* 125 MHz at 1000Mbps*/ + else if (port->qos.link_speed == SPEED_100) + val = 25 * HZ_PER_MHZ; /* 25 MHz at 100Mbps*/ + else + val = (25 * HZ_PER_MHZ) / 10; /* 2.5 MHz at 10Mbps*/ + break; + + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_SGMII: + val = 125 * HZ_PER_MHZ; /* 125 MHz */ + break; + default: + netdev_err(port->ndev, "selected mode does not supported IET\n"); + return -EOPNOTSUPP; + } val /= MILLIHZ_PER_HZ; /* count per ms timeout */ val *= verify_time_ms; /* count for timeout ms */ @@ -295,20 +317,21 @@ static int am65_cpsw_iet_verify_wait(struct am65_cpsw_port *port) u32 ctrl, status; int try; - try = 20; - do { - /* Reset the verify state machine by writing 1 - * to LINKFAIL - */ - ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); - ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; - writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + try = 3; - /* Clear MAC_LINKFAIL bit to start Verify. */ - ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); - ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; - writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + /* Reset the verify state machine by writing 1 + * to LINKFAIL + */ + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + /* Clear MAC_LINKFAIL bit to start Verify. */ + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + + do { msleep(port->qos.iet.verify_time_ms); status = readl(port->port_base + AM65_CPSW_PN_REG_IET_STATUS); @@ -330,7 +353,7 @@ static int am65_cpsw_iet_verify_wait(struct am65_cpsw_port *port) netdev_dbg(port->ndev, "MAC Merge verify error\n"); return -ENODEV; } - } while (try-- > 0); + } while (--try > 0); netdev_dbg(port->ndev, "MAC Merge verify timeout\n"); return -ETIMEDOUT; diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 59d6ab989c55..8ffbfaa3ab18 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -163,7 +163,9 @@ struct am65_cpts { struct device_node *clk_mux_np; struct clk *refclk; u32 refclk_freq; - struct list_head events; + /* separate lists to handle TX and RX timestamp independently */ + struct list_head events_tx; + struct list_head events_rx; struct list_head pool; struct am65_cpts_event pool_data[AM65_CPTS_MAX_EVENTS]; spinlock_t lock; /* protects events lists*/ @@ -227,6 +229,24 @@ static void am65_cpts_disable(struct am65_cpts *cpts) am65_cpts_write32(cpts, 0, int_enable); } +static int am65_cpts_purge_event_list(struct am65_cpts *cpts, + struct list_head *events) +{ + struct list_head *this, *next; + struct am65_cpts_event *event; + int removed = 0; + + list_for_each_safe(this, next, events) { + event = list_entry(this, struct am65_cpts_event, list); + if (time_after(jiffies, event->tmo)) { + list_del_init(&event->list); + list_add(&event->list, &cpts->pool); + ++removed; + } + } + return removed; +} + static int am65_cpts_event_get_port(struct am65_cpts_event *event) { return (event->event1 & AM65_CPTS_EVENT_1_PORT_NUMBER_MASK) >> @@ -239,20 +259,12 @@ static int am65_cpts_event_get_type(struct am65_cpts_event *event) AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT; } -static int am65_cpts_cpts_purge_events(struct am65_cpts *cpts) +static int am65_cpts_purge_events(struct am65_cpts *cpts) { - struct list_head *this, *next; - struct am65_cpts_event *event; int removed = 0; - list_for_each_safe(this, next, &cpts->events) { - event = list_entry(this, struct am65_cpts_event, list); - if (time_after(jiffies, event->tmo)) { - list_del_init(&event->list); - list_add(&event->list, &cpts->pool); - ++removed; - } - } + removed += am65_cpts_purge_event_list(cpts, &cpts->events_tx); + removed += am65_cpts_purge_event_list(cpts, &cpts->events_rx); if (removed) dev_dbg(cpts->dev, "event pool cleaned up %d\n", removed); @@ -287,7 +299,7 @@ static int __am65_cpts_fifo_read(struct am65_cpts *cpts) struct am65_cpts_event, list); if (!event) { - if (am65_cpts_cpts_purge_events(cpts)) { + if (am65_cpts_purge_events(cpts)) { dev_err(cpts->dev, "cpts: event pool empty\n"); ret = -1; goto out; @@ -306,11 +318,21 @@ static int __am65_cpts_fifo_read(struct am65_cpts *cpts) cpts->timestamp); break; case AM65_CPTS_EV_RX: + event->tmo = jiffies + + msecs_to_jiffies(AM65_CPTS_EVENT_RX_TX_TIMEOUT); + + list_move_tail(&event->list, &cpts->events_rx); + + dev_dbg(cpts->dev, + "AM65_CPTS_EV_RX e1:%08x e2:%08x t:%lld\n", + event->event1, event->event2, + event->timestamp); + break; case AM65_CPTS_EV_TX: event->tmo = jiffies + msecs_to_jiffies(AM65_CPTS_EVENT_RX_TX_TIMEOUT); - list_move_tail(&event->list, &cpts->events); + list_move_tail(&event->list, &cpts->events_tx); dev_dbg(cpts->dev, "AM65_CPTS_EV_TX e1:%08x e2:%08x t:%lld\n", @@ -828,7 +850,7 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts, return found; } -static void am65_cpts_find_ts(struct am65_cpts *cpts) +static void am65_cpts_find_tx_ts(struct am65_cpts *cpts) { struct am65_cpts_event *event; struct list_head *this, *next; @@ -837,7 +859,7 @@ static void am65_cpts_find_ts(struct am65_cpts *cpts) LIST_HEAD(events); spin_lock_irqsave(&cpts->lock, flags); - list_splice_init(&cpts->events, &events); + list_splice_init(&cpts->events_tx, &events); spin_unlock_irqrestore(&cpts->lock, flags); list_for_each_safe(this, next, &events) { @@ -850,7 +872,7 @@ static void am65_cpts_find_ts(struct am65_cpts *cpts) } spin_lock_irqsave(&cpts->lock, flags); - list_splice_tail(&events, &cpts->events); + list_splice_tail(&events, &cpts->events_tx); list_splice_tail(&events_free, &cpts->pool); spin_unlock_irqrestore(&cpts->lock, flags); } @@ -861,7 +883,7 @@ static long am65_cpts_ts_work(struct ptp_clock_info *ptp) unsigned long flags; long delay = -1; - am65_cpts_find_ts(cpts); + am65_cpts_find_tx_ts(cpts); spin_lock_irqsave(&cpts->txq.lock, flags); if (!skb_queue_empty(&cpts->txq)) @@ -905,7 +927,7 @@ static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid) spin_lock_irqsave(&cpts->lock, flags); __am65_cpts_fifo_read(cpts); - list_for_each_safe(this, next, &cpts->events) { + list_for_each_safe(this, next, &cpts->events_rx) { event = list_entry(this, struct am65_cpts_event, list); if (time_after(jiffies, event->tmo)) { list_move(&event->list, &cpts->pool); @@ -1155,7 +1177,8 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs, return ERR_PTR(ret); mutex_init(&cpts->ptp_clk_lock); - INIT_LIST_HEAD(&cpts->events); + INIT_LIST_HEAD(&cpts->events_tx); + INIT_LIST_HEAD(&cpts->events_rx); INIT_LIST_HEAD(&cpts->pool); spin_lock_init(&cpts->lock); skb_queue_head_init(&cpts->txq); diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index da53eb04b0a4..3f8237c17d09 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -66,6 +66,9 @@ #define FDB_GEN_CFG1 0x60 #define SMEM_VLAN_OFFSET 8 #define SMEM_VLAN_OFFSET_MASK GENMASK(25, 8) +#define FDB_HASH_SIZE_MASK GENMASK(6, 3) +#define FDB_HASH_SIZE_SHIFT 3 +#define FDB_HASH_SIZE 3 #define FDB_GEN_CFG2 0x64 #define FDB_VLAN_EN BIT(6) @@ -463,6 +466,8 @@ void icssg_init_emac_mode(struct prueth *prueth) /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); + regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, FDB_HASH_SIZE_MASK, + FDB_HASH_SIZE << FDB_HASH_SIZE_SHIFT); /* Set enable VLAN aware mode, and FDBs for all PRUs */ regmap_write(prueth->miig_rt, FDB_GEN_CFG2, (FDB_PRU0_EN | FDB_PRU1_EN | FDB_HOST_EN)); prueth->vlan_tbl = (struct prueth_vlan_tbl __force *)(prueth->shram.va + @@ -484,6 +489,8 @@ void icssg_init_fw_offload_mode(struct prueth *prueth) /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); + regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, FDB_HASH_SIZE_MASK, + FDB_HASH_SIZE << FDB_HASH_SIZE_SHIFT); /* Set enable VLAN aware mode, and FDBs for all PRUs */ regmap_write(prueth->miig_rt, FDB_GEN_CFG2, FDB_EN_ALL); prueth->vlan_tbl = (struct prueth_vlan_tbl __force *)(prueth->shram.va + diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 857820657bac..5ee13db568f0 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1338,10 +1338,10 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) tx_pipe->dma_channel = knav_dma_open_channel(dev, tx_pipe->dma_chan_name, &config); - if (IS_ERR(tx_pipe->dma_channel)) { + if (!tx_pipe->dma_channel) { dev_err(dev, "failed opening tx chan(%s)\n", tx_pipe->dma_chan_name); - ret = PTR_ERR(tx_pipe->dma_channel); + ret = -EINVAL; goto err; } @@ -1359,7 +1359,7 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) return 0; err: - if (!IS_ERR_OR_NULL(tx_pipe->dma_channel)) + if (tx_pipe->dma_channel) knav_dma_close_channel(tx_pipe->dma_channel); tx_pipe->dma_channel = NULL; return ret; @@ -1678,10 +1678,10 @@ static int netcp_setup_navigator_resources(struct net_device *ndev) netcp->rx_channel = knav_dma_open_channel(netcp->netcp_device->device, netcp->dma_chan_name, &config); - if (IS_ERR(netcp->rx_channel)) { + if (!netcp->rx_channel) { dev_err(netcp->ndev_dev, "failed opening rx chan(%s\n", netcp->dma_chan_name); - ret = PTR_ERR(netcp->rx_channel); + ret = -EINVAL; goto fail; } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 1e2713f0c921..b37d6cfbfbe9 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2427,7 +2427,8 @@ int wx_sw_init(struct wx *wx) wx->oem_svid = pdev->subsystem_vendor; wx->oem_ssid = pdev->subsystem_device; wx->bus.device = PCI_SLOT(pdev->devfn); - wx->bus.func = PCI_FUNC(pdev->devfn); + wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, + rd32(wx, WX_CFG_PORT_ST)); if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || pdev->is_virtfn) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index d89b9b8a0a2c..2f8319e03182 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -97,6 +97,8 @@ #define WX_CFG_PORT_CTL_DRV_LOAD BIT(3) #define WX_CFG_PORT_CTL_QINQ BIT(2) #define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/ +#define WX_CFG_PORT_ST 0x14404 +#define WX_CFG_PORT_ST_LANID GENMASK(9, 8) #define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4)) #define WX_CFG_PORT_CTL_NUM_VT_MASK GENMASK(13, 12) /* number of TVs */ @@ -557,8 +559,6 @@ enum WX_MSCA_CMD_value { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -#define WX_CFG_PORT_ST 0x14404 - /******************* Receive Descriptor bit definitions **********************/ #define WX_RXD_STAT_DD BIT(0) /* Done */ #define WX_RXD_STAT_EOP BIT(1) /* End of Packet */ diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index 36ccc53b1797..ef860cfc629f 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -96,11 +96,13 @@ static netdev_tx_t mctp_usb_start_xmit(struct sk_buff *skb, skb->data, skb->len, mctp_usb_out_complete, skb); + /* Stops TX queue first to prevent race condition with URB complete */ + netif_stop_queue(dev); rc = usb_submit_urb(urb, GFP_ATOMIC); - if (rc) + if (rc) { + netif_wake_queue(dev); goto err_drop; - else - netif_stop_queue(dev); + } return NETDEV_TX_OK; diff --git a/drivers/net/mdio/mdio-airoha.c b/drivers/net/mdio/mdio-airoha.c index 1dc9939c8d7d..52e7475121ea 100644 --- a/drivers/net/mdio/mdio-airoha.c +++ b/drivers/net/mdio/mdio-airoha.c @@ -219,6 +219,8 @@ static int airoha_mdio_probe(struct platform_device *pdev) priv = bus->priv; priv->base_addr = addr; priv->regmap = device_node_to_regmap(dev->parent->of_node); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); priv->clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(priv->clk)) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 194570443493..bb6e03a92956 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -886,8 +886,11 @@ static ssize_t userdatum_value_show(struct config_item *item, char *buf) static void update_userdata(struct netconsole_target *nt) { - int complete_idx = 0, child_count = 0; struct list_head *entry; + int child_count = 0; + unsigned long flags; + + spin_lock_irqsave(&target_list_lock, flags); /* Clear the current string in case the last userdatum was deleted */ nt->userdata_length = 0; @@ -897,8 +900,11 @@ static void update_userdata(struct netconsole_target *nt) struct userdatum *udm_item; struct config_item *item; - if (WARN_ON_ONCE(child_count >= MAX_EXTRADATA_ITEMS)) - break; + if (child_count >= MAX_EXTRADATA_ITEMS) { + spin_unlock_irqrestore(&target_list_lock, flags); + WARN_ON_ONCE(1); + return; + } child_count++; item = container_of(entry, struct config_item, ci_entry); @@ -912,12 +918,11 @@ static void update_userdata(struct netconsole_target *nt) * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is * checked to not exceed MAX items with child_count above */ - complete_idx += scnprintf(&nt->extradata_complete[complete_idx], - MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", - item->ci_name, udm_item->value); + nt->userdata_length += scnprintf(&nt->extradata_complete[nt->userdata_length], + MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", + item->ci_name, udm_item->value); } - nt->userdata_length = strnlen(nt->extradata_complete, - sizeof(nt->extradata_complete)); + spin_unlock_irqrestore(&target_list_lock, flags); } static ssize_t userdatum_value_store(struct config_item *item, const char *buf, @@ -931,6 +936,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); ret = strscpy(udm->value, buf, sizeof(udm->value)); @@ -944,6 +950,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, ret = count; out_unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -969,6 +976,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_MSGID); if (msgid_enabled == curr) @@ -989,6 +997,7 @@ unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1003,6 +1012,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_RELEASE); if (release_enabled == curr) @@ -1023,6 +1033,7 @@ unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1037,6 +1048,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_TASKNAME); if (taskname_enabled == curr) @@ -1057,6 +1069,7 @@ unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1072,6 +1085,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_CPU_NR); if (cpu_nr_enabled == curr) @@ -1100,6 +1114,7 @@ unlock_ok: ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index 289f62c5d2c7..0d7f30360d87 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c @@ -560,16 +560,34 @@ static void ovpn_tcp_close(struct sock *sk, long timeout) static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask = datagram_poll(file, sock, wait); + struct sk_buff_head *queue = &sock->sk->sk_receive_queue; struct ovpn_socket *ovpn_sock; + struct ovpn_peer *peer = NULL; + __poll_t mask; rcu_read_lock(); ovpn_sock = rcu_dereference_sk_user_data(sock->sk); - if (ovpn_sock && ovpn_sock->peer && - !skb_queue_empty(&ovpn_sock->peer->tcp.user_queue)) - mask |= EPOLLIN | EPOLLRDNORM; + /* if we landed in this callback, we expect to have a + * meaningful state. The ovpn_socket lifecycle would + * prevent it otherwise. + */ + if (WARN(!ovpn_sock || !ovpn_sock->peer, + "ovpn: null state in ovpn_tcp_poll!")) { + rcu_read_unlock(); + return 0; + } + + if (ovpn_peer_hold(ovpn_sock->peer)) { + peer = ovpn_sock->peer; + queue = &peer->tcp.user_queue; + } rcu_read_unlock(); + mask = datagram_poll_queue(file, sock, wait, queue); + + if (peer) + ovpn_peer_put(peer); + return mask; } diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index deeefb962566..36a0c1b7f59c 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -738,6 +738,12 @@ static int dp83867_config_init(struct phy_device *phydev) return ret; } + /* Although the DP83867 reports EEE capability through the + * MDIO_PCS_EEE_ABLE and MDIO_AN_EEE_ADV registers, the feature + * is not actually implemented in hardware. + */ + phy_disable_eee(phydev); + if (phy_interface_is_rgmii(phydev) || phydev->interface == PHY_INTERFACE_MODE_SGMII) { val = phy_read(phydev, MII_DP83867_PHYCTRL); diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index a2cd1cc35cde..1f381d7b13ff 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -84,7 +84,7 @@ #define DP83869_CLK_DELAY_DEF 7 /* STRAP_STS1 bits */ -#define DP83869_STRAP_OP_MODE_MASK GENMASK(2, 0) +#define DP83869_STRAP_OP_MODE_MASK GENMASK(11, 9) #define DP83869_STRAP_STS1_RESERVED BIT(11) #define DP83869_STRAP_MIRROR_ENABLED BIT(12) @@ -528,7 +528,7 @@ static int dp83869_set_strapped_mode(struct phy_device *phydev) if (val < 0) return val; - dp83869->mode = val & DP83869_STRAP_OP_MODE_MASK; + dp83869->mode = FIELD_GET(DP83869_STRAP_OP_MODE_MASK, val); return 0; } diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index cad6ed3aa10b..4354241137d5 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -73,8 +73,11 @@ int mdiobus_register_device(struct mdio_device *mdiodev) return err; err = mdiobus_register_reset(mdiodev); - if (err) + if (err) { + gpiod_put(mdiodev->reset_gpio); + mdiodev->reset_gpio = NULL; return err; + } /* Assert the reset signal */ mdio_device_reset(mdiodev, 1); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 79ce3eb6752b..01c87c9b7702 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -466,6 +466,12 @@ struct lan8842_priv { u16 rev; }; +struct lanphy_reg_data { + int page; + u16 addr; + u16 val; +}; + static const struct kszphy_type lan8814_type = { .led_mode_reg = ~LAN8814_LED_CTRL_1, .cable_diag_reg = LAN8814_CABLE_DIAG, @@ -2836,6 +2842,13 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, #define LAN8814_PAGE_PCS_DIGITAL 2 /** + * LAN8814_PAGE_EEE - Selects Extended Page 3. + * + * This page contains EEE registers + */ +#define LAN8814_PAGE_EEE 3 + +/** * LAN8814_PAGE_COMMON_REGS - Selects Extended Page 4. * * This page contains device-common registers that affect the entire chip. @@ -2854,6 +2867,13 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, #define LAN8814_PAGE_PORT_REGS 5 /** + * LAN8814_PAGE_POWER_REGS - Selects Extended Page 28. + * + * This page contains analog control registers and power mode registers. + */ +#define LAN8814_PAGE_POWER_REGS 28 + +/** * LAN8814_PAGE_SYSTEM_CTRL - Selects Extended Page 31. * * This page appears to hold fundamental system or global controls. In the @@ -4262,6 +4282,8 @@ static int __lan8814_ptp_probe_once(struct phy_device *phydev, char *pin_name, { struct lan8814_shared_priv *shared = phy_package_get_priv(phydev); + shared->phydev = phydev; + /* Initialise shared lock for clock*/ mutex_init(&shared->shared_lock); @@ -4317,8 +4339,6 @@ static int __lan8814_ptp_probe_once(struct phy_device *phydev, char *pin_name, phydev_dbg(phydev, "successfully registered ptp clock\n"); - shared->phydev = phydev; - /* The EP.4 is shared between all the PHYs in the package and also it * can be accessed by any of the PHYs */ @@ -4360,12 +4380,6 @@ static int lan8814_config_init(struct phy_device *phydev) { struct kszphy_priv *lan8814 = phydev->priv; - /* Reset the PHY */ - lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, - LAN8814_QSGMII_SOFT_RESET, - LAN8814_QSGMII_SOFT_RESET_BIT, - LAN8814_QSGMII_SOFT_RESET_BIT); - /* Disable ANEG with QSGMII PCS Host side */ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, LAN8814_QSGMII_PCS1G_ANEG_CONFIG, @@ -4451,6 +4465,12 @@ static int lan8814_probe(struct phy_device *phydev) addr, sizeof(struct lan8814_shared_priv)); if (phy_package_init_once(phydev)) { + /* Reset the PHY */ + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); + err = lan8814_release_coma_mode(phydev); if (err) return err; @@ -5884,6 +5904,144 @@ static int lan8842_probe(struct phy_device *phydev) return 0; } +#define LAN8814_POWER_MGMT_MODE_3_ANEG_MDI 0x13 +#define LAN8814_POWER_MGMT_MODE_4_ANEG_MDIX 0x14 +#define LAN8814_POWER_MGMT_MODE_5_10BT_MDI 0x15 +#define LAN8814_POWER_MGMT_MODE_6_10BT_MDIX 0x16 +#define LAN8814_POWER_MGMT_MODE_7_100BT_TRAIN 0x17 +#define LAN8814_POWER_MGMT_MODE_8_100BT_MDI 0x18 +#define LAN8814_POWER_MGMT_MODE_9_100BT_EEE_MDI_TX 0x19 +#define LAN8814_POWER_MGMT_MODE_10_100BT_EEE_MDI_RX 0x1a +#define LAN8814_POWER_MGMT_MODE_11_100BT_MDIX 0x1b +#define LAN8814_POWER_MGMT_MODE_12_100BT_EEE_MDIX_TX 0x1c +#define LAN8814_POWER_MGMT_MODE_13_100BT_EEE_MDIX_RX 0x1d +#define LAN8814_POWER_MGMT_MODE_14_100BTX_EEE_TX_RX 0x1e + +#define LAN8814_POWER_MGMT_DLLPD_D BIT(0) +#define LAN8814_POWER_MGMT_ADCPD_D BIT(1) +#define LAN8814_POWER_MGMT_PGAPD_D BIT(2) +#define LAN8814_POWER_MGMT_TXPD_D BIT(3) +#define LAN8814_POWER_MGMT_DLLPD_C BIT(4) +#define LAN8814_POWER_MGMT_ADCPD_C BIT(5) +#define LAN8814_POWER_MGMT_PGAPD_C BIT(6) +#define LAN8814_POWER_MGMT_TXPD_C BIT(7) +#define LAN8814_POWER_MGMT_DLLPD_B BIT(8) +#define LAN8814_POWER_MGMT_ADCPD_B BIT(9) +#define LAN8814_POWER_MGMT_PGAPD_B BIT(10) +#define LAN8814_POWER_MGMT_TXPD_B BIT(11) +#define LAN8814_POWER_MGMT_DLLPD_A BIT(12) +#define LAN8814_POWER_MGMT_ADCPD_A BIT(13) +#define LAN8814_POWER_MGMT_PGAPD_A BIT(14) +#define LAN8814_POWER_MGMT_TXPD_A BIT(15) + +#define LAN8814_POWER_MGMT_C_D (LAN8814_POWER_MGMT_DLLPD_D | \ + LAN8814_POWER_MGMT_ADCPD_D | \ + LAN8814_POWER_MGMT_PGAPD_D | \ + LAN8814_POWER_MGMT_DLLPD_C | \ + LAN8814_POWER_MGMT_ADCPD_C | \ + LAN8814_POWER_MGMT_PGAPD_C) + +#define LAN8814_POWER_MGMT_B_C_D (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_DLLPD_B | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_B) + +#define LAN8814_POWER_MGMT_VAL1 (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_B | \ + LAN8814_POWER_MGMT_ADCPD_A | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL2 LAN8814_POWER_MGMT_C_D + +#define LAN8814_POWER_MGMT_VAL3 (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_DLLPD_B | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL4 (LAN8814_POWER_MGMT_B_C_D | \ + LAN8814_POWER_MGMT_ADCPD_A | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL5 LAN8814_POWER_MGMT_B_C_D + +#define LAN8814_EEE_WAKE_TX_TIMER 0x0e +#define LAN8814_EEE_WAKE_TX_TIMER_MAX_VAL 0x1f + +static const struct lanphy_reg_data short_center_tap_errata[] = { + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_3_ANEG_MDI, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_4_ANEG_MDIX, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_5_10BT_MDI, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_6_10BT_MDIX, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_7_100BT_TRAIN, + LAN8814_POWER_MGMT_VAL2 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_8_100BT_MDI, + LAN8814_POWER_MGMT_VAL3 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_9_100BT_EEE_MDI_TX, + LAN8814_POWER_MGMT_VAL3 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_10_100BT_EEE_MDI_RX, + LAN8814_POWER_MGMT_VAL4 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_11_100BT_MDIX, + LAN8814_POWER_MGMT_VAL5 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_12_100BT_EEE_MDIX_TX, + LAN8814_POWER_MGMT_VAL5 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_13_100BT_EEE_MDIX_RX, + LAN8814_POWER_MGMT_VAL4 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_14_100BTX_EEE_TX_RX, + LAN8814_POWER_MGMT_VAL4 }, +}; + +static const struct lanphy_reg_data waketx_timer_errata[] = { + { LAN8814_PAGE_EEE, + LAN8814_EEE_WAKE_TX_TIMER, + LAN8814_EEE_WAKE_TX_TIMER_MAX_VAL }, +}; + +static int lanphy_write_reg_data(struct phy_device *phydev, + const struct lanphy_reg_data *data, + size_t num) +{ + int ret = 0; + + while (num--) { + ret = lanphy_write_page_reg(phydev, data->page, data->addr, + data->val); + if (ret) + break; + } + + return ret; +} + +static int lan8842_erratas(struct phy_device *phydev) +{ + int ret; + + ret = lanphy_write_reg_data(phydev, short_center_tap_errata, + ARRAY_SIZE(short_center_tap_errata)); + if (ret) + return ret; + + return lanphy_write_reg_data(phydev, waketx_timer_errata, + ARRAY_SIZE(waketx_timer_errata)); +} + static int lan8842_config_init(struct phy_device *phydev) { int ret; @@ -5896,6 +6054,11 @@ static int lan8842_config_init(struct phy_device *phydev) if (ret < 0) return ret; + /* Apply the erratas for this device */ + ret = lan8842_erratas(phydev); + if (ret < 0) + return ret; + /* Even if the GPIOs are set to control the LEDs the behaviour of the * LEDs is wrong, they are not blinking when there is traffic. * To fix this it is required to set extended LED mode diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index a724b21b4fe7..16a347084293 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -154,7 +154,7 @@ #define RTL_8211FVD_PHYID 0x001cc878 #define RTL_8221B 0x001cc840 #define RTL_8221B_VB_CG 0x001cc849 -#define RTL_8221B_VN_CG 0x001cc84a +#define RTL_8221B_VM_CG 0x001cc84a #define RTL_8251B 0x001cc862 #define RTL_8261C 0x001cc890 @@ -1523,16 +1523,16 @@ static int rtl8221b_vb_cg_c45_match_phy_device(struct phy_device *phydev, return rtlgen_is_c45_match(phydev, RTL_8221B_VB_CG, true); } -static int rtl8221b_vn_cg_c22_match_phy_device(struct phy_device *phydev, +static int rtl8221b_vm_cg_c22_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - return rtlgen_is_c45_match(phydev, RTL_8221B_VN_CG, false); + return rtlgen_is_c45_match(phydev, RTL_8221B_VM_CG, false); } -static int rtl8221b_vn_cg_c45_match_phy_device(struct phy_device *phydev, +static int rtl8221b_vm_cg_c45_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - return rtlgen_is_c45_match(phydev, RTL_8221B_VN_CG, true); + return rtlgen_is_c45_match(phydev, RTL_8221B_VM_CG, true); } static int rtl_internal_nbaset_match_phy_device(struct phy_device *phydev, @@ -1879,7 +1879,7 @@ static struct phy_driver realtek_drvs[] = { .suspend = genphy_c45_pma_suspend, .resume = rtlgen_c45_resume, }, { - .match_phy_device = rtl8221b_vn_cg_c22_match_phy_device, + .match_phy_device = rtl8221b_vm_cg_c22_match_phy_device, .name = "RTL8221B-VM-CG 2.5Gbps PHY (C22)", .probe = rtl822x_probe, .get_features = rtl822x_get_features, @@ -1892,8 +1892,8 @@ static struct phy_driver realtek_drvs[] = { .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, }, { - .match_phy_device = rtl8221b_vn_cg_c45_match_phy_device, - .name = "RTL8221B-VN-CG 2.5Gbps PHY (C45)", + .match_phy_device = rtl8221b_vm_cg_c45_match_phy_device, + .name = "RTL8221B-VM-CG 2.5Gbps PHY (C45)", .probe = rtl822x_probe, .config_init = rtl822xb_config_init, .get_rate_matching = rtl822xb_get_rate_matching, diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 85bd5d845409..232bbd79a4de 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -230,7 +230,9 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) int i; unsigned long gpio_bits = dev->driver_info->data; - usbnet_get_endpoints(dev,intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + goto out; /* Toggle the GPIOs in a manufacturer/model specific way */ for (i = 2; i >= 0; i--) { @@ -848,7 +850,9 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) dev->driver_priv = priv; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; /* Maybe the boot loader passed the MAC address via device tree */ if (!eth_platform_get_mac_address(&dev->udev->dev, buf)) { @@ -1281,7 +1285,9 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) int ret; u8 buf[ETH_ALEN] = {0}; - usbnet_get_endpoints(dev,intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; /* Get the MAC address */ ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 11352d85475a..3a4985b582cb 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -192,6 +192,12 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (!skbn) return 0; + /* Raw IP packets don't have a MAC header, but other subsystems + * (like xfrm) may still access MAC header offsets, so they must + * be initialized. + */ + skb_reset_mac_header(skbn); + switch (skb->data[offset + qmimux_hdr_sz] & 0xf0) { case 0x40: skbn->protocol = htons(ETH_P_IP); diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 92add3daadbb..278e6cb6f4d9 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -685,9 +685,16 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, rtl8150_t *dev = netdev_priv(netdev); int count, res; + /* pad the frame and ensure terminating USB packet, datasheet 9.2.3 */ + count = max(skb->len, ETH_ZLEN); + if (count % 64 == 0) + count++; + if (skb_padto(skb, count)) { + netdev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + netif_stop_queue(netdev); - count = (skb->len < 60) ? 60 : skb->len; - count = (count & 0x3f) ? count : count + 1; dev->tx_skb = skb; usb_fill_bulk_urb(dev->tx_urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), skb->data, count, write_bulk_callback, dev); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index bf01f2728531..697cd9d866d3 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1659,6 +1659,8 @@ void usbnet_disconnect (struct usb_interface *intf) net = dev->net; unregister_netdev (net); + cancel_work_sync(&dev->kevent); + while ((urb = usb_get_from_anchor(&dev->deferred))) { dev_kfree_skb(urb->context); kfree(urb->sg); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a757cbcab87f..0369dda5ed60 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -910,17 +910,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, goto ok; } - /* - * Verify that we can indeed put this data into a skb. - * This is here to handle cases when the device erroneously - * tries to receive more than is possible. This is usually - * the case of a broken device. - */ - if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { - net_dbg_ratelimited("%s: too much data\n", skb->dev->name); - dev_kfree_skb(skb); - return NULL; - } BUG_ON(offset >= PAGE_SIZE); while (len) { unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); @@ -1379,9 +1368,14 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct ret = XDP_PASS; rcu_read_lock(); prog = rcu_dereference(rq->xdp_prog); - /* TODO: support multi buffer. */ - if (prog && num_buf == 1) - ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); + if (prog) { + /* TODO: support multi buffer. */ + if (num_buf == 1) + ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, + stats); + else + ret = XDP_ABORTED; + } rcu_read_unlock(); switch (ret) { @@ -2107,9 +2101,19 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_rq_stats *stats) { struct page *page = buf; - struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); + struct sk_buff *skb; + + /* Make sure that len does not exceed the size allocated in + * add_recvbuf_big. + */ + if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) { + pr_debug("%s: rx error: len %u exceeds allocated size %lu\n", + dev->name, len, + (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE); + goto err; + } + skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); u64_stats_add(&stats->bytes, len - vi->hdr_len); if (unlikely(!skb)) goto err; @@ -2534,6 +2538,13 @@ err_buf: return NULL; } +static inline u32 +virtio_net_hash_value(const struct virtio_net_hdr_v1_hash *hdr_hash) +{ + return __le16_to_cpu(hdr_hash->hash_value_lo) | + (__le16_to_cpu(hdr_hash->hash_value_hi) << 16); +} + static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, struct sk_buff *skb) { @@ -2560,7 +2571,7 @@ static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, default: rss_hash_type = PKT_HASH_TYPE_NONE; } - skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); + skb_set_hash(skb, virtio_net_hash_value(hdr_hash), rss_hash_type); } static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, @@ -2620,22 +2631,28 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; } - /* 1. Save the flags early, as the XDP program might overwrite them. + /* About the flags below: + * 1. Save the flags early, as the XDP program might overwrite them. * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID * stay valid after XDP processing. * 2. XDP doesn't work with partially checksummed packets (refer to * virtnet_xdp_set()), so packets marked as * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. */ - flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; - if (vi->mergeable_rx_bufs) + if (vi->mergeable_rx_bufs) { + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); - else if (vi->big_packets) + } else if (vi->big_packets) { + void *p = page_address((struct page *)buf); + + flags = ((struct virtio_net_common_hdr *)p)->hdr.flags; skb = receive_big(dev, vi, rq, buf, len, stats); - else + } else { + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); + } if (unlikely(!skb)) return; @@ -3306,6 +3323,10 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); + /* Make sure it's safe to cast between formats */ + BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr)); + BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr.hdr)); + can_push = vi->any_header_sg && !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; @@ -6745,7 +6766,7 @@ static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, hash_report = VIRTIO_NET_HASH_REPORT_NONE; *rss_type = virtnet_xdp_rss_type[hash_report]; - *hash = __le32_to_cpu(hdr_hash->hash_value); + *hash = virtio_net_hash_value(hdr_hash); return 0; } diff --git a/drivers/net/wan/framer/pef2256/pef2256.c b/drivers/net/wan/framer/pef2256/pef2256.c index c5501826db1e..c058cc79137d 100644 --- a/drivers/net/wan/framer/pef2256/pef2256.c +++ b/drivers/net/wan/framer/pef2256/pef2256.c @@ -648,7 +648,8 @@ static int pef2256_add_audio_devices(struct pef2256 *pef2256) audio_devs[i].id = i; } - ret = mfd_add_devices(pef2256->dev, 0, audio_devs, count, NULL, 0, NULL); + ret = devm_mfd_add_devices(pef2256->dev, 0, audio_devs, count, + NULL, 0, NULL); kfree(audio_devs); return ret; } @@ -822,8 +823,8 @@ static int pef2256_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pef2256); - ret = mfd_add_devices(pef2256->dev, 0, pef2256_devs, - ARRAY_SIZE(pef2256_devs), NULL, 0, NULL); + ret = devm_mfd_add_devices(pef2256->dev, 0, pef2256_devs, + ARRAY_SIZE(pef2256_devs), NULL, 0, NULL); if (ret) { dev_err(pef2256->dev, "add devices failed (%d)\n", ret); return ret; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index e595b0979a56..b4aad6604d6d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1764,32 +1764,33 @@ void ath10k_wmi_put_wmi_channel(struct ath10k *ar, struct wmi_channel *ch, int ath10k_wmi_wait_for_service_ready(struct ath10k *ar) { - unsigned long timeout = jiffies + WMI_SERVICE_READY_TIMEOUT_HZ; unsigned long time_left, i; - /* Sometimes the PCI HIF doesn't receive interrupt - * for the service ready message even if the buffer - * was completed. PCIe sniffer shows that it's - * because the corresponding CE ring doesn't fires - * it. Workaround here by polling CE rings. Since - * the message could arrive at any time, continue - * polling until timeout. - */ - do { + time_left = wait_for_completion_timeout(&ar->wmi.service_ready, + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + /* Sometimes the PCI HIF doesn't receive interrupt + * for the service ready message even if the buffer + * was completed. PCIe sniffer shows that it's + * because the corresponding CE ring doesn't fires + * it. Workaround here by polling CE rings once. + */ + ath10k_warn(ar, "failed to receive service ready completion, polling..\n"); + for (i = 0; i < CE_COUNT; i++) ath10k_hif_send_complete_check(ar, i, 1); - /* The 100 ms granularity is a tradeoff considering scheduler - * overhead and response latency - */ time_left = wait_for_completion_timeout(&ar->wmi.service_ready, - msecs_to_jiffies(100)); - if (time_left) - return 0; - } while (time_before(jiffies, timeout)); + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + ath10k_warn(ar, "polling timed out\n"); + return -ETIMEDOUT; + } - ath10k_warn(ar, "failed to receive service ready completion\n"); - return -ETIMEDOUT; + ath10k_warn(ar, "service ready completion received, continuing normally\n"); + } + + return 0; } int ath10k_wmi_wait_for_unified_ready(struct ath10k *ar) @@ -1937,6 +1938,7 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) if (cmd_id == WMI_CMD_UNSUPPORTED) { ath10k_warn(ar, "wmi command %d is not supported by firmware\n", cmd_id); + dev_kfree_skb_any(skb); return ret; } diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 2810752260f2..812686173ac8 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -912,42 +912,84 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { static const struct dmi_system_id ath11k_pm_quirk_table[] = { { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* X13 G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J3"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* X13 G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21J4"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T14 G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K3"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T14 G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K4"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* P14s G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K5"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* P14s G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K6"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T16 G2 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K7"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T16 G2 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K8"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* P16s G2 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K9"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* P16s G2 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21KA"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T14s G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21F8"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T14s G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21F9"), }, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 106e2530b64e..0e41b5a91d66 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include <net/mac80211.h> @@ -4417,9 +4417,9 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) - flags |= WMI_KEY_PAIRWISE; + flags = WMI_KEY_PAIRWISE; else - flags |= WMI_KEY_GROUP; + flags = WMI_KEY_GROUP; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "%s for peer %pM on vdev %d flags 0x%X, type = %d, num_sta %d\n", @@ -4456,7 +4456,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, is_ap_with_no_sta = (vif->type == NL80211_IFTYPE_AP && !arvif->num_stations); - if ((flags & WMI_KEY_PAIRWISE) || cmd == SET_KEY || is_ap_with_no_sta) { + if (flags == WMI_KEY_PAIRWISE || cmd == SET_KEY || is_ap_with_no_sta) { ret = ath11k_install_key(arvif, key, cmd, peer_addr, flags); if (ret) { ath11k_warn(ab, "ath11k_install_key failed (%d)\n", ret); @@ -4470,7 +4470,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, goto exit; } - if ((flags & WMI_KEY_GROUP) && cmd == SET_KEY && is_ap_with_no_sta) + if (flags == WMI_KEY_GROUP && cmd == SET_KEY && is_ap_with_no_sta) arvif->reinstall_group_keys = true; } diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 0491e3fd6b5e..e3b444333dee 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -5961,6 +5961,9 @@ static int wmi_process_mgmt_tx_comp(struct ath11k *ar, dma_unmap_single(ar->ab->dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(msdu); + memset(&info->status, 0, sizeof(info->status)); + info->status.rates[0].idx = -1; + if ((!(info->flags & IEEE80211_TX_CTL_NO_ACK)) && !tx_compl_param->status) { info->flags |= IEEE80211_TX_STAT_ACK; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 1d7b60aa5cb0..db351c922018 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4064,68 +4064,12 @@ static int ath12k_mac_fils_discovery(struct ath12k_link_vif *arvif, return ret; } -static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) -{ - struct ath12k *ar = arvif->ar; - struct ieee80211_vif *vif = arvif->ahvif->vif; - struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; - enum wmi_sta_powersave_param param; - struct ieee80211_bss_conf *info; - enum wmi_sta_ps_mode psmode; - int ret; - int timeout; - bool enable_ps; - - lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - - if (vif->type != NL80211_IFTYPE_STATION) - return; - - enable_ps = arvif->ahvif->ps; - if (enable_ps) { - psmode = WMI_STA_PS_MODE_ENABLED; - param = WMI_STA_PS_PARAM_INACTIVITY_TIME; - - timeout = conf->dynamic_ps_timeout; - if (timeout == 0) { - info = ath12k_mac_get_link_bss_conf(arvif); - if (!info) { - ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", - vif->addr, arvif->link_id); - return; - } - - /* firmware doesn't like 0 */ - timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; - } - - ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, - timeout); - if (ret) { - ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", - arvif->vdev_id, ret); - return; - } - } else { - psmode = WMI_STA_PS_MODE_DISABLED; - } - - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", - arvif->vdev_id, psmode ? "enable" : "disable"); - - ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); - if (ret) - ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", - psmode, arvif->vdev_id, ret); -} - static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u64 changed) { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); unsigned long links = ahvif->links_map; - struct ieee80211_vif_cfg *vif_cfg; struct ieee80211_bss_conf *info; struct ath12k_link_vif *arvif; struct ieee80211_sta *sta; @@ -4189,24 +4133,61 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, } } } +} - if (changed & BSS_CHANGED_PS) { - links = ahvif->links_map; - vif_cfg = &vif->cfg; +static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) +{ + struct ath12k *ar = arvif->ar; + struct ieee80211_vif *vif = arvif->ahvif->vif; + struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; + enum wmi_sta_powersave_param param; + struct ieee80211_bss_conf *info; + enum wmi_sta_ps_mode psmode; + int ret; + int timeout; + bool enable_ps; - for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { - arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); - if (!arvif || !arvif->ar) - continue; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - ar = arvif->ar; + if (vif->type != NL80211_IFTYPE_STATION) + return; - if (ar->ab->hw_params->supports_sta_ps) { - ahvif->ps = vif_cfg->ps; - ath12k_mac_vif_setup_ps(arvif); + enable_ps = arvif->ahvif->ps; + if (enable_ps) { + psmode = WMI_STA_PS_MODE_ENABLED; + param = WMI_STA_PS_PARAM_INACTIVITY_TIME; + + timeout = conf->dynamic_ps_timeout; + if (timeout == 0) { + info = ath12k_mac_get_link_bss_conf(arvif); + if (!info) { + ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; } + + /* firmware doesn't like 0 */ + timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; + } + + ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, + timeout); + if (ret) { + ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", + arvif->vdev_id, ret); + return; } + } else { + psmode = WMI_STA_PS_MODE_DISABLED; } + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", + arvif->vdev_id, psmode ? "enable" : "disable"); + + ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); + if (ret) + ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", + psmode, arvif->vdev_id, ret); } static bool ath12k_mac_supports_tpc(struct ath12k *ar, struct ath12k_vif *ahvif, @@ -4228,6 +4209,7 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); + struct ieee80211_vif_cfg *vif_cfg = &vif->cfg; struct cfg80211_chan_def def; u32 param_id, param_value; enum nl80211_band band; @@ -4514,6 +4496,12 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, } ath12k_mac_fils_discovery(arvif, info); + + if (changed & BSS_CHANGED_PS && + ar->ab->hw_params->supports_sta_ps) { + ahvif->ps = vif_cfg->ps; + ath12k_mac_vif_setup_ps(arvif); + } } static struct ath12k_vif_cache *ath12k_ahvif_get_link_cache(struct ath12k_vif *ahvif, @@ -8290,23 +8278,32 @@ static void ath12k_mgmt_over_wmi_tx_drop(struct ath12k *ar, struct sk_buff *skb) wake_up(&ar->txmgmt_empty_waitq); } -int ath12k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +static void ath12k_mac_tx_mgmt_free(struct ath12k *ar, int buf_id) { - struct sk_buff *msdu = skb; + struct sk_buff *msdu; struct ieee80211_tx_info *info; - struct ath12k *ar = ctx; - struct ath12k_base *ab = ar->ab; spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); + msdu = idr_remove(&ar->txmgmt_idr, buf_id); spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, ATH12K_SKB_CB(msdu)->paddr, msdu->len, + + if (!msdu) + return; + + dma_unmap_single(ar->ab->dev, ATH12K_SKB_CB(msdu)->paddr, msdu->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(msdu); memset(&info->status, 0, sizeof(info->status)); - ath12k_mgmt_over_wmi_tx_drop(ar, skb); + ath12k_mgmt_over_wmi_tx_drop(ar, msdu); +} + +int ath12k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +{ + struct ath12k *ar = ctx; + + ath12k_mac_tx_mgmt_free(ar, buf_id); return 0; } @@ -8315,17 +8312,10 @@ static int ath12k_mac_vif_txmgmt_idr_remove(int buf_id, void *skb, void *ctx) { struct ieee80211_vif *vif = ctx; struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb); - struct sk_buff *msdu = skb; struct ath12k *ar = skb_cb->ar; - struct ath12k_base *ab = ar->ab; - if (skb_cb->vif == vif) { - spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); - spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len, - DMA_TO_DEVICE); - } + if (skb_cb->vif == vif) + ath12k_mac_tx_mgmt_free(ar, buf_id); return 0; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 8afaffe31031..bb96b87b2a6e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5627,8 +5627,7 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, *cookie, le16_to_cpu(action_frame->len), le32_to_cpu(af_params->channel)); - ack = brcmf_p2p_send_action_frame(cfg, cfg_to_ndev(cfg), - af_params); + ack = brcmf_p2p_send_action_frame(vif->ifp, af_params); cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, ack, GFP_KERNEL); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 0dc9d28cd77b..e1752a513c73 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -1529,6 +1529,7 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, /** * brcmf_p2p_tx_action_frame() - send action frame over fil. * + * @ifp: interface to transmit on. * @p2p: p2p info struct for vif. * @af_params: action frame data/info. * @@ -1538,12 +1539,11 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, * The WLC_E_ACTION_FRAME_COMPLETE event will be received when the action * frame is transmitted. */ -static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, +static s32 brcmf_p2p_tx_action_frame(struct brcmf_if *ifp, + struct brcmf_p2p_info *p2p, struct brcmf_fil_af_params_le *af_params) { struct brcmf_pub *drvr = p2p->cfg->pub; - struct brcmf_cfg80211_vif *vif; - struct brcmf_p2p_action_frame *p2p_af; s32 err = 0; brcmf_dbg(TRACE, "Enter\n"); @@ -1552,14 +1552,7 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, clear_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status); clear_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status); - /* check if it is a p2p_presence response */ - p2p_af = (struct brcmf_p2p_action_frame *)af_params->action_frame.data; - if (p2p_af->subtype == P2P_AF_PRESENCE_RSP) - vif = p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif; - else - vif = p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif; - - err = brcmf_fil_bsscfg_data_set(vif->ifp, "actframe", af_params, + err = brcmf_fil_bsscfg_data_set(ifp, "actframe", af_params, sizeof(*af_params)); if (err) { bphy_err(drvr, " sending action frame has failed\n"); @@ -1711,16 +1704,14 @@ static bool brcmf_p2p_check_dwell_overflow(u32 requested_dwell, /** * brcmf_p2p_send_action_frame() - send action frame . * - * @cfg: driver private data for cfg80211 interface. - * @ndev: net device to transmit on. + * @ifp: interface to transmit on. * @af_params: configuration data for action frame. */ -bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, - struct net_device *ndev, +bool brcmf_p2p_send_action_frame(struct brcmf_if *ifp, struct brcmf_fil_af_params_le *af_params) { + struct brcmf_cfg80211_info *cfg = ifp->drvr->config; struct brcmf_p2p_info *p2p = &cfg->p2p; - struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_fil_action_frame_le *action_frame; struct brcmf_config_af_params config_af_params; struct afx_hdl *afx_hdl = &p2p->afx_hdl; @@ -1857,7 +1848,7 @@ bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, if (af_params->channel) msleep(P2P_AF_RETRY_DELAY_TIME); - ack = !brcmf_p2p_tx_action_frame(p2p, af_params); + ack = !brcmf_p2p_tx_action_frame(ifp, p2p, af_params); tx_retry++; dwell_overflow = brcmf_p2p_check_dwell_overflow(requested_dwell, dwell_jiffies); @@ -2217,7 +2208,6 @@ static struct wireless_dev *brcmf_p2p_create_p2pdev(struct brcmf_p2p_info *p2p, WARN_ON(p2p_ifp->bsscfgidx != bsscfgidx); - init_completion(&p2p->send_af_done); INIT_WORK(&p2p->afx_hdl.afx_work, brcmf_p2p_afx_handler); init_completion(&p2p->afx_hdl.act_frm_scan); init_completion(&p2p->wait_next_af); @@ -2513,6 +2503,8 @@ s32 brcmf_p2p_attach(struct brcmf_cfg80211_info *cfg, bool p2pdev_forced) pri_ifp = brcmf_get_ifp(cfg->pub, 0); p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif = pri_ifp->vif; + init_completion(&p2p->send_af_done); + if (p2pdev_forced) { err_ptr = brcmf_p2p_create_p2pdev(p2p, NULL, NULL); if (IS_ERR(err_ptr)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h index d2ecee565bf2..d3137ebd7158 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h @@ -168,8 +168,7 @@ int brcmf_p2p_notify_action_frame_rx(struct brcmf_if *ifp, int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, const struct brcmf_event_msg *e, void *data); -bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, - struct net_device *ndev, +bool brcmf_p2p_send_action_frame(struct brcmf_if *ifp, struct brcmf_fil_af_params_le *af_params); bool brcmf_p2p_scan_finding_common_channel(struct brcmf_cfg80211_info *cfg, struct brcmf_bss_info_le *bi); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index 738f80fe0c50..f6f52d297a72 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -501,6 +501,7 @@ void iwl_mld_remove_link(struct iwl_mld *mld, struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(bss_conf->vif); struct iwl_mld_link *link = iwl_mld_link_from_mac80211(bss_conf); bool is_deflink = link == &mld_vif->deflink; + u8 fw_id = link->fw_id; if (WARN_ON(!link || link->active)) return; @@ -513,10 +514,10 @@ void iwl_mld_remove_link(struct iwl_mld *mld, RCU_INIT_POINTER(mld_vif->link[bss_conf->link_id], NULL); - if (WARN_ON(link->fw_id >= mld->fw->ucode_capa.num_links)) + if (WARN_ON(fw_id >= mld->fw->ucode_capa.num_links)) return; - RCU_INIT_POINTER(mld->fw_id_to_bss_conf[link->fw_id], NULL); + RCU_INIT_POINTER(mld->fw_id_to_bss_conf[fw_id], NULL); } void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld, @@ -707,18 +708,13 @@ static int iwl_mld_get_chan_load_from_element(struct iwl_mld *mld, struct ieee80211_bss_conf *link_conf) { - struct ieee80211_vif *vif = link_conf->vif; const struct cfg80211_bss_ies *ies; const struct element *bss_load_elem = NULL; const struct ieee80211_bss_load_elem *bss_load; guard(rcu)(); - if (ieee80211_vif_link_active(vif, link_conf->link_id)) - ies = rcu_dereference(link_conf->bss->beacon_ies); - else - ies = rcu_dereference(link_conf->bss->ies); - + ies = rcu_dereference(link_conf->bss->beacon_ies); if (ies) bss_load_elem = cfg80211_find_elem(WLAN_EID_QBSS_LOAD, ies->data, ies->len); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 9c9e0e1c6e1d..867807abde66 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -938,19 +938,12 @@ u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct iwl_mvm *mvm, u16 iwl_mvm_mac_ctxt_get_beacon_flags(const struct iwl_fw *fw, u8 rate_idx) { + u16 flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); bool is_new_rate = iwl_fw_lookup_cmd_ver(fw, BEACON_TEMPLATE_CMD, 0) > 10; - u16 flags, cck_flag; - - if (is_new_rate) { - flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); - cck_flag = IWL_MAC_BEACON_CCK; - } else { - cck_flag = IWL_MAC_BEACON_CCK_V1; - flags = iwl_fw_rate_idx_to_plcp(rate_idx); - } if (rate_idx <= IWL_LAST_CCK_RATE) - flags |= cck_flag; + flags |= is_new_rate ? IWL_MAC_BEACON_CCK + : IWL_MAC_BEACON_CCK_V1; return flags; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 0c9c2492d8a7..0b12ee8ad618 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -463,7 +463,7 @@ static int iwl_mvm_aux_roc_te_handle_notif(struct iwl_mvm *mvm, if (!aux_roc_te) /* Not a Aux ROC time event */ return -EINVAL; - iwl_mvm_te_check_trigger(mvm, notif, te_data); + iwl_mvm_te_check_trigger(mvm, notif, aux_roc_te); IWL_DEBUG_TE(mvm, "Aux ROC time event notification - UID = 0x%x action %d (error = %d)\n", @@ -475,14 +475,14 @@ static int iwl_mvm_aux_roc_te_handle_notif(struct iwl_mvm *mvm, /* End TE, notify mac80211 */ ieee80211_remain_on_channel_expired(mvm->hw); iwl_mvm_roc_finished(mvm); /* flush aux queue */ - list_del(&te_data->list); /* remove from list */ - te_data->running = false; - te_data->vif = NULL; - te_data->uid = 0; - te_data->id = TE_MAX; + list_del(&aux_roc_te->list); /* remove from list */ + aux_roc_te->running = false; + aux_roc_te->vif = NULL; + aux_roc_te->uid = 0; + aux_roc_te->id = TE_MAX; } else if (le32_to_cpu(notif->action) == TE_V2_NOTIF_HOST_EVENT_START) { set_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status); - te_data->running = true; + aux_roc_te->running = true; ieee80211_ready_on_channel(mvm->hw); /* Start TE */ } else { IWL_DEBUG_TE(mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 22602c32faa5..fa995e235d9b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -159,9 +159,15 @@ int iwl_mvm_legacy_rate_to_mac80211_idx(u32 rate_n_flags, u8 iwl_mvm_mac80211_idx_to_hwrate(const struct iwl_fw *fw, int rate_idx) { - return (rate_idx >= IWL_FIRST_OFDM_RATE ? - rate_idx - IWL_FIRST_OFDM_RATE : - rate_idx); + if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8) + /* In the new rate legacy rates are indexed: + * 0 - 3 for CCK and 0 - 7 for OFDM. + */ + return (rate_idx >= IWL_FIRST_OFDM_RATE ? + rate_idx - IWL_FIRST_OFDM_RATE : + rate_idx); + + return iwl_fw_rate_idx_to_plcp(rate_idx); } u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac) diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 891e125ad30b..54d6d00ecdf1 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -2966,6 +2966,51 @@ mwl8k_cmd_rf_antenna(struct ieee80211_hw *hw, int antenna, int mask) /* * CMD_SET_BEACON. */ + +static bool mwl8k_beacon_has_ds_params(const u8 *buf, int len) +{ + const struct ieee80211_mgmt *mgmt = (const void *)buf; + int ies_len; + + if (len <= offsetof(struct ieee80211_mgmt, u.beacon.variable)) + return false; + + ies_len = len - offsetof(struct ieee80211_mgmt, u.beacon.variable); + + return cfg80211_find_ie(WLAN_EID_DS_PARAMS, mgmt->u.beacon.variable, + ies_len) != NULL; +} + +static void mwl8k_beacon_copy_inject_ds_params(struct ieee80211_hw *hw, + u8 *buf_dst, const u8 *buf_src, + int src_len) +{ + const struct ieee80211_mgmt *mgmt = (const void *)buf_src; + static const u8 before_ds_params[] = { + WLAN_EID_SSID, + WLAN_EID_SUPP_RATES, + }; + const u8 *ies; + int hdr_len, left, offs, pos; + + ies = mgmt->u.beacon.variable; + hdr_len = offsetof(struct ieee80211_mgmt, u.beacon.variable); + + offs = ieee80211_ie_split(ies, src_len - hdr_len, before_ds_params, + ARRAY_SIZE(before_ds_params), 0); + + pos = hdr_len + offs; + left = src_len - pos; + + memcpy(buf_dst, buf_src, pos); + + /* Inject a DSSS Parameter Set after SSID + Supp Rates */ + buf_dst[pos + 0] = WLAN_EID_DS_PARAMS; + buf_dst[pos + 1] = 1; + buf_dst[pos + 2] = hw->conf.chandef.chan->hw_value; + + memcpy(buf_dst + pos + 3, buf_src + pos, left); +} struct mwl8k_cmd_set_beacon { struct mwl8k_cmd_pkt_hdr header; __le16 beacon_len; @@ -2975,17 +3020,33 @@ struct mwl8k_cmd_set_beacon { static int mwl8k_cmd_set_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u8 *beacon, int len) { + bool ds_params_present = mwl8k_beacon_has_ds_params(beacon, len); struct mwl8k_cmd_set_beacon *cmd; - int rc; + int rc, final_len = len; - cmd = kzalloc(sizeof(*cmd) + len, GFP_KERNEL); + if (!ds_params_present) { + /* + * mwl8k firmware requires a DS Params IE with the current + * channel in AP beacons. If mac80211/hostapd does not + * include it, inject one here. IE ID + length + channel + * number = 3 bytes. + */ + final_len += 3; + } + + cmd = kzalloc(sizeof(*cmd) + final_len, GFP_KERNEL); if (cmd == NULL) return -ENOMEM; cmd->header.code = cpu_to_le16(MWL8K_CMD_SET_BEACON); - cmd->header.length = cpu_to_le16(sizeof(*cmd) + len); - cmd->beacon_len = cpu_to_le16(len); - memcpy(cmd->beacon, beacon, len); + cmd->header.length = cpu_to_le16(sizeof(*cmd) + final_len); + cmd->beacon_len = cpu_to_le16(final_len); + + if (ds_params_present) + memcpy(cmd->beacon, beacon, len); + else + mwl8k_beacon_copy_inject_ds_params(hw, cmd->beacon, beacon, + len); rc = mwl8k_post_pervif_cmd(hw, vif, &cmd->header); kfree(cmd); diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 9f856042a67a..5903d82e1ab1 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -2003,8 +2003,14 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, struct ieee80211_sta *sta = control->sta; struct ieee80211_bss_conf *bss_conf; + /* This can happen in case of monitor injection */ + if (!vif) { + ieee80211_free_txskb(hw, skb); + return; + } + if (link != IEEE80211_LINK_UNSPECIFIED) { - bss_conf = rcu_dereference(txi->control.vif->link_conf[link]); + bss_conf = rcu_dereference(vif->link_conf[link]); if (sta) link_sta = rcu_dereference(sta->link[link]); } else { @@ -2065,13 +2071,13 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, return; } - if (txi->control.vif) - hwsim_check_magic(txi->control.vif); + if (vif) + hwsim_check_magic(vif); if (control->sta) hwsim_check_sta_magic(control->sta); if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) - ieee80211_get_tx_rates(txi->control.vif, control->sta, skb, + ieee80211_get_tx_rates(vif, control->sta, skb, txi->control.rates, ARRAY_SIZE(txi->control.rates)); @@ -6698,14 +6704,15 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; -static void remove_user_radios(u32 portid) +static void remove_user_radios(u32 portid, int netgroup) { struct mac80211_hwsim_data *entry, *tmp; LIST_HEAD(list); spin_lock_bh(&hwsim_radio_lock); list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) { - if (entry->destroy_on_close && entry->portid == portid) { + if (entry->destroy_on_close && entry->portid == portid && + entry->netgroup == netgroup) { list_move(&entry->list, &list); rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht, hwsim_rht_params); @@ -6730,7 +6737,7 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, if (state != NETLINK_URELEASE) return NOTIFY_DONE; - remove_user_radios(notify->portid); + remove_user_radios(notify->portid, hwsim_net_get_netgroup(notify->net)); if (notify->portid == hwsim_net_get_wmediumd(notify->net)) { printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink" diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index 2faa0de2a36e..8ee15a15f4ca 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -791,6 +791,7 @@ error: if (urbs) { for (i = 0; i < RX_URBS_COUNT; i++) free_rx_urb(urbs[i]); + kfree(urbs); } return r; } diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 012fcfc79a73..a01178caf15b 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -36,6 +36,7 @@ struct nvme_dhchap_queue_context { u8 status; u8 dhgroup_id; u8 hash_id; + u8 sc_c; size_t hash_len; u8 c1[64]; u8 c2[64]; @@ -154,6 +155,8 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl, data->auth_protocol[0].dhchap.idlist[34] = NVME_AUTH_DHGROUP_6144; data->auth_protocol[0].dhchap.idlist[35] = NVME_AUTH_DHGROUP_8192; + chap->sc_c = data->sc_c; + return size; } @@ -489,7 +492,7 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl, ret = crypto_shash_update(shash, buf, 2); if (ret) goto out; - memset(buf, 0, sizeof(buf)); + *buf = chap->sc_c; ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; @@ -500,6 +503,7 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl, strlen(ctrl->opts->host->nqn)); if (ret) goto out; + memset(buf, 0, sizeof(buf)); ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3da980dc60d9..543e17aead12 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -182,12 +182,14 @@ void nvme_mpath_start_request(struct request *rq) struct nvme_ns *ns = rq->q->queuedata; struct gendisk *disk = ns->head->disk; - if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) { + if ((READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) && + !(nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)) { atomic_inc(&ns->ctrl->nr_active); nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE; } - if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq)) + if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq) || + (nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) return; nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c916176bd9f0..72fb675a696f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1042,7 +1042,7 @@ static blk_status_t nvme_map_data(struct request *req) return nvme_pci_setup_data_prp(req, &iter); } -static blk_status_t nvme_pci_setup_meta_sgls(struct request *req) +static blk_status_t nvme_pci_setup_meta_iter(struct request *req) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; unsigned int entries = req->nr_integrity_segments; @@ -1072,8 +1072,12 @@ static blk_status_t nvme_pci_setup_meta_sgls(struct request *req) * descriptor provides an explicit length, so we're relying on that * mechanism to catch any misunderstandings between the application and * device. + * + * P2P DMA also needs to use the blk_dma_iter method, so mptr setup + * leverages this routine when that happens. */ - if (entries == 1 && !(nvme_req(req)->flags & NVME_REQ_USERCMD)) { + if (!nvme_ctrl_meta_sgl_supported(&dev->ctrl) || + (entries == 1 && !(nvme_req(req)->flags & NVME_REQ_USERCMD))) { iod->cmd.common.metadata = cpu_to_le64(iter.addr); iod->meta_total_len = iter.len; iod->meta_dma = iter.addr; @@ -1114,6 +1118,9 @@ static blk_status_t nvme_pci_setup_meta_mptr(struct request *req) struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct bio_vec bv = rq_integrity_vec(req); + if (is_pci_p2pdma_page(bv.bv_page)) + return nvme_pci_setup_meta_iter(req); + iod->meta_dma = dma_map_bvec(nvmeq->dev->dev, &bv, rq_dma_dir(req), 0); if (dma_mapping_error(nvmeq->dev->dev, iod->meta_dma)) return BLK_STS_IOERR; @@ -1128,7 +1135,7 @@ static blk_status_t nvme_map_metadata(struct request *req) if ((iod->cmd.common.flags & NVME_CMD_SGL_METABUF) && nvme_pci_metadata_use_sgls(req)) - return nvme_pci_setup_meta_sgls(req); + return nvme_pci_setup_meta_iter(req); return nvme_pci_setup_meta_mptr(req); } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1413788ca7d5..9a96df1a511c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1081,6 +1081,9 @@ static void nvme_tcp_write_space(struct sock *sk) queue = sk->sk_user_data; if (likely(queue && sk_stream_is_writeable(sk))) { clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + /* Ensure pending TLS partial records are retried */ + if (nvme_tcp_queue_tls(queue)) + queue->write_space(sk); queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } read_unlock_bh(&sk->sk_callback_lock); diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index b340380f3892..ceba21684e82 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -298,7 +298,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, const char *hash_name; u8 *challenge = req->sq->dhchap_c1; struct nvme_dhchap_key *transformed_key; - u8 buf[4]; + u8 buf[4], sc_c = ctrl->concat ? 1 : 0; int ret; hash_name = nvme_auth_hmac_name(ctrl->shash_id); @@ -367,13 +367,14 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, ret = crypto_shash_update(shash, buf, 2); if (ret) goto out; - memset(buf, 0, 4); + *buf = sc_c; ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; ret = crypto_shash_update(shash, "HostHost", 8); if (ret) goto out; + memset(buf, 0, 4); ret = crypto_shash_update(shash, ctrl->hostnqn, strlen(ctrl->hostnqn)); if (ret) goto out; diff --git a/drivers/nvmem/rcar-efuse.c b/drivers/nvmem/rcar-efuse.c index f24bdb9cb5a7..d9a96a1d59c8 100644 --- a/drivers/nvmem/rcar-efuse.c +++ b/drivers/nvmem/rcar-efuse.c @@ -127,6 +127,7 @@ static const struct of_device_id rcar_fuse_match[] = { { .compatible = "renesas,r8a779h0-otp", .data = &rcar_fuse_v4m }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, rcar_fuse_match); static struct platform_driver rcar_fuse_driver = { .probe = rcar_fuse_probe, diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 65c3c23255b7..1cd93549d093 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -671,6 +671,36 @@ err: } } +static int of_check_msi_parent(struct device_node *dev_node, struct device_node **msi_node) +{ + struct of_phandle_args msi_spec; + int ret; + + /* + * An msi-parent phandle with a missing or == 0 #msi-cells + * property identifies a 1:1 ID translation mapping. + * + * Set the msi controller node if the firmware matches this + * condition. + */ + ret = of_parse_phandle_with_optional_args(dev_node, "msi-parent", "#msi-cells", + 0, &msi_spec); + if (ret) + return ret; + + if ((*msi_node && *msi_node != msi_spec.np) || msi_spec.args_count != 0) + ret = -EINVAL; + + if (!ret) { + /* Return with a node reference held */ + *msi_node = msi_spec.np; + return 0; + } + of_node_put(msi_spec.np); + + return ret; +} + /** * of_msi_xlate - map a MSI ID and find relevant MSI controller node * @dev: device for which the mapping is to be done. @@ -678,7 +708,7 @@ err: * @id_in: Device ID. * * Walk up the device hierarchy looking for devices with a "msi-map" - * property. If found, apply the mapping to @id_in. + * or "msi-parent" property. If found, apply the mapping to @id_in. * If @msi_np points to a non-NULL device node pointer, only entries targeting * that node will be matched; if it points to a NULL value, it will receive the * device node of the first matching target phandle, with a reference held. @@ -692,14 +722,18 @@ u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in) /* * Walk up the device parent links looking for one with a - * "msi-map" property. + * "msi-map" or an "msi-parent" property. */ - for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) + for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) { if (!of_map_id(parent_dev->of_node, id_in, "msi-map", "msi-map-mask", msi_np, &id_out)) break; + if (!of_check_msi_parent(parent_dev->of_node, msi_np)) + break; + } return id_out; } +EXPORT_SYMBOL_GPL(of_msi_xlate); /** * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain @@ -741,8 +775,10 @@ struct irq_domain *of_msi_get_domain(struct device *dev, of_for_each_phandle(&it, err, np, "msi-parent", "#msi-cells", 0) { d = irq_find_matching_host(it.node, token); - if (d) + if (d) { + of_node_put(it.node); return d; + } } return NULL; diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7065a8e5f9b1..f94f5d384362 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -306,6 +306,7 @@ config VGA_ARB bool "VGA Arbitration" if EXPERT default y depends on (PCI && !S390) + select SCREEN_INFO if X86 help Some "legacy" VGA devices implemented on PCI typically have the same hard-decoded addresses as they did on ISA. When multiple PCI devices diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c index 1eac012a8226..c0e1194a936b 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-ep.c +++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c @@ -255,7 +255,7 @@ static int cdns_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn) u16 flags, mme; u8 cap; - cap = cdns_pcie_find_capability(pcie, PCI_CAP_ID_MSIX); + cap = cdns_pcie_find_capability(pcie, PCI_CAP_ID_MSI); fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn); /* Validate that the MSI feature is actually enabled. */ diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 20c9333bcb1c..e92513c5bda5 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -23,6 +23,7 @@ #include "pcie-designware.h" static struct pci_ops dw_pcie_ops; +static struct pci_ops dw_pcie_ecam_ops; static struct pci_ops dw_child_pcie_ops; #define DW_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ @@ -471,9 +472,6 @@ static int dw_pcie_create_ecam_window(struct dw_pcie_rp *pp, struct resource *re if (IS_ERR(pp->cfg)) return PTR_ERR(pp->cfg); - pci->dbi_base = pp->cfg->win; - pci->dbi_phys_addr = res->start; - return 0; } @@ -529,7 +527,7 @@ static int dw_pcie_host_get_resources(struct dw_pcie_rp *pp) if (ret) return ret; - pp->bridge->ops = (struct pci_ops *)&pci_generic_ecam_ops.pci_ops; + pp->bridge->ops = &dw_pcie_ecam_ops; pp->bridge->sysdata = pp->cfg; pp->cfg->priv = pp; } else { @@ -842,12 +840,34 @@ void __iomem *dw_pcie_own_conf_map_bus(struct pci_bus *bus, unsigned int devfn, } EXPORT_SYMBOL_GPL(dw_pcie_own_conf_map_bus); +static void __iomem *dw_pcie_ecam_conf_map_bus(struct pci_bus *bus, unsigned int devfn, int where) +{ + struct pci_config_window *cfg = bus->sysdata; + struct dw_pcie_rp *pp = cfg->priv; + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + unsigned int busn = bus->number; + + if (busn > 0) + return pci_ecam_map_bus(bus, devfn, where); + + if (PCI_SLOT(devfn) > 0) + return NULL; + + return pci->dbi_base + where; +} + static struct pci_ops dw_pcie_ops = { .map_bus = dw_pcie_own_conf_map_bus, .read = pci_generic_config_read, .write = pci_generic_config_write, }; +static struct pci_ops dw_pcie_ecam_ops = { + .map_bus = dw_pcie_ecam_conf_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 805edbbfe7eb..c48a20602d7f 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -55,7 +55,6 @@ #define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8 #define PARF_Q2A_FLUSH 0x1ac #define PARF_LTSSM 0x1b0 -#define PARF_SLV_DBI_ELBI 0x1b4 #define PARF_INT_ALL_STATUS 0x224 #define PARF_INT_ALL_CLEAR 0x228 #define PARF_INT_ALL_MASK 0x22c @@ -65,16 +64,6 @@ #define PARF_DBI_BASE_ADDR_V2_HI 0x354 #define PARF_SLV_ADDR_SPACE_SIZE_V2 0x358 #define PARF_SLV_ADDR_SPACE_SIZE_V2_HI 0x35c -#define PARF_BLOCK_SLV_AXI_WR_BASE 0x360 -#define PARF_BLOCK_SLV_AXI_WR_BASE_HI 0x364 -#define PARF_BLOCK_SLV_AXI_WR_LIMIT 0x368 -#define PARF_BLOCK_SLV_AXI_WR_LIMIT_HI 0x36c -#define PARF_BLOCK_SLV_AXI_RD_BASE 0x370 -#define PARF_BLOCK_SLV_AXI_RD_BASE_HI 0x374 -#define PARF_BLOCK_SLV_AXI_RD_LIMIT 0x378 -#define PARF_BLOCK_SLV_AXI_RD_LIMIT_HI 0x37c -#define PARF_ECAM_BASE 0x380 -#define PARF_ECAM_BASE_HI 0x384 #define PARF_NO_SNOOP_OVERRIDE 0x3d4 #define PARF_ATU_BASE_ADDR 0x634 #define PARF_ATU_BASE_ADDR_HI 0x638 @@ -98,7 +87,6 @@ /* PARF_SYS_CTRL register fields */ #define MAC_PHY_POWERDOWN_IN_P2_D_MUX_EN BIT(29) -#define PCIE_ECAM_BLOCKER_EN BIT(26) #define MST_WAKEUP_EN BIT(13) #define SLV_WAKEUP_EN BIT(12) #define MSTR_ACLK_CGC_DIS BIT(10) @@ -146,9 +134,6 @@ /* PARF_LTSSM register fields */ #define LTSSM_EN BIT(8) -/* PARF_SLV_DBI_ELBI */ -#define SLV_DBI_ELBI_ADDR_BASE GENMASK(11, 0) - /* PARF_INT_ALL_{STATUS/CLEAR/MASK} register fields */ #define PARF_INT_ALL_LINK_UP BIT(13) #define PARF_INT_MSI_DEV_0_7 GENMASK(30, 23) @@ -262,6 +247,7 @@ struct qcom_pcie_ops { int (*get_resources)(struct qcom_pcie *pcie); int (*init)(struct qcom_pcie *pcie); int (*post_init)(struct qcom_pcie *pcie); + void (*host_post_init)(struct qcom_pcie *pcie); void (*deinit)(struct qcom_pcie *pcie); void (*ltssm_enable)(struct qcom_pcie *pcie); int (*config_sid)(struct qcom_pcie *pcie); @@ -326,47 +312,6 @@ static void qcom_ep_reset_deassert(struct qcom_pcie *pcie) qcom_perst_assert(pcie, false); } -static void qcom_pci_config_ecam(struct dw_pcie_rp *pp) -{ - struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct qcom_pcie *pcie = to_qcom_pcie(pci); - u64 addr, addr_end; - u32 val; - - writel_relaxed(lower_32_bits(pci->dbi_phys_addr), pcie->parf + PARF_ECAM_BASE); - writel_relaxed(upper_32_bits(pci->dbi_phys_addr), pcie->parf + PARF_ECAM_BASE_HI); - - /* - * The only device on the root bus is a single Root Port. If we try to - * access any devices other than Device/Function 00.0 on Bus 0, the TLP - * will go outside of the controller to the PCI bus. But with CFG Shift - * Feature (ECAM) enabled in iATU, there is no guarantee that the - * response is going to be all F's. Hence, to make sure that the - * requester gets all F's response for accesses other than the Root - * Port, configure iATU to block the transactions starting from - * function 1 of the root bus to the end of the root bus (i.e., from - * dbi_base + 4KB to dbi_base + 1MB). - */ - addr = pci->dbi_phys_addr + SZ_4K; - writel_relaxed(lower_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_WR_BASE); - writel_relaxed(upper_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_WR_BASE_HI); - - writel_relaxed(lower_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_RD_BASE); - writel_relaxed(upper_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_RD_BASE_HI); - - addr_end = pci->dbi_phys_addr + SZ_1M - 1; - - writel_relaxed(lower_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_WR_LIMIT); - writel_relaxed(upper_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_WR_LIMIT_HI); - - writel_relaxed(lower_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_RD_LIMIT); - writel_relaxed(upper_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_RD_LIMIT_HI); - - val = readl_relaxed(pcie->parf + PARF_SYS_CTRL); - val |= PCIE_ECAM_BLOCKER_EN; - writel_relaxed(val, pcie->parf + PARF_SYS_CTRL); -} - static int qcom_pcie_start_link(struct dw_pcie *pci) { struct qcom_pcie *pcie = to_qcom_pcie(pci); @@ -1094,6 +1039,25 @@ static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie) return 0; } +static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata) +{ + /* + * Downstream devices need to be in D0 state before enabling PCI PM + * substates. + */ + pci_set_power_state_locked(pdev, PCI_D0); + pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); + + return 0; +} + +static void qcom_pcie_host_post_init_2_7_0(struct qcom_pcie *pcie) +{ + struct dw_pcie_rp *pp = &pcie->pci->pp; + + pci_walk_bus(pp->bridge->bus, qcom_pcie_enable_aspm, NULL); +} + static void qcom_pcie_deinit_2_7_0(struct qcom_pcie *pcie) { struct qcom_pcie_resources_2_7_0 *res = &pcie->res.v2_7_0; @@ -1320,7 +1284,6 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct qcom_pcie *pcie = to_qcom_pcie(pci); - u16 offset; int ret; qcom_ep_reset_assert(pcie); @@ -1329,17 +1292,6 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) if (ret) return ret; - if (pp->ecam_enabled) { - /* - * Override ELBI when ECAM is enabled, as when ECAM is enabled, - * ELBI moves under the 'config' space. - */ - offset = FIELD_GET(SLV_DBI_ELBI_ADDR_BASE, readl(pcie->parf + PARF_SLV_DBI_ELBI)); - pci->elbi_base = pci->dbi_base + offset; - - qcom_pci_config_ecam(pp); - } - ret = qcom_pcie_phy_power_on(pcie); if (ret) goto err_deinit; @@ -1380,9 +1332,19 @@ static void qcom_pcie_host_deinit(struct dw_pcie_rp *pp) pcie->cfg->ops->deinit(pcie); } +static void qcom_pcie_host_post_init(struct dw_pcie_rp *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct qcom_pcie *pcie = to_qcom_pcie(pci); + + if (pcie->cfg->ops->host_post_init) + pcie->cfg->ops->host_post_init(pcie); +} + static const struct dw_pcie_host_ops qcom_pcie_dw_ops = { .init = qcom_pcie_host_init, .deinit = qcom_pcie_host_deinit, + .post_init = qcom_pcie_host_post_init, }; /* Qcom IP rev.: 2.1.0 Synopsys IP rev.: 4.01a */ @@ -1444,6 +1406,7 @@ static const struct qcom_pcie_ops ops_1_9_0 = { .get_resources = qcom_pcie_get_resources_2_7_0, .init = qcom_pcie_init_2_7_0, .post_init = qcom_pcie_post_init_2_7_0, + .host_post_init = qcom_pcie_host_post_init_2_7_0, .deinit = qcom_pcie_deinit_2_7_0, .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, .config_sid = qcom_pcie_config_sid_1_9_0, @@ -1454,6 +1417,7 @@ static const struct qcom_pcie_ops ops_1_21_0 = { .get_resources = qcom_pcie_get_resources_2_7_0, .init = qcom_pcie_init_2_7_0, .post_init = qcom_pcie_post_init_2_7_0, + .host_post_init = qcom_pcie_host_post_init_2_7_0, .deinit = qcom_pcie_deinit_2_7_0, .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, }; diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 1bd5bf4a6097..b4b62b9ccc45 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -192,6 +192,12 @@ static void vmd_pci_msi_enable(struct irq_data *data) data->chip->irq_unmask(data); } +static unsigned int vmd_pci_msi_startup(struct irq_data *data) +{ + vmd_pci_msi_enable(data); + return 0; +} + static void vmd_irq_disable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; @@ -210,6 +216,11 @@ static void vmd_pci_msi_disable(struct irq_data *data) vmd_irq_disable(data->parent_data); } +static void vmd_pci_msi_shutdown(struct irq_data *data) +{ + vmd_pci_msi_disable(data); +} + static struct irq_chip vmd_msi_controller = { .name = "VMD-MSI", .irq_compose_msi_msg = vmd_compose_msi_msg, @@ -309,6 +320,8 @@ static bool vmd_init_dev_msi_info(struct device *dev, struct irq_domain *domain, if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) return false; + info->chip->irq_startup = vmd_pci_msi_startup; + info->chip->irq_shutdown = vmd_pci_msi_shutdown; info->chip->irq_enable = vmd_pci_msi_enable; info->chip->irq_disable = vmd_pci_msi_disable; return true; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 4492b809094b..36f8c0985430 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -958,6 +958,7 @@ void pci_save_aspm_l1ss_state(struct pci_dev *dev); void pci_restore_aspm_l1ss_state(struct pci_dev *dev); #ifdef CONFIG_PCIEASPM +void pcie_aspm_remove_cap(struct pci_dev *pdev, u32 lnkcap); void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); @@ -965,6 +966,7 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev); void pci_configure_ltr(struct pci_dev *pdev); void pci_bridge_reconfigure_ltr(struct pci_dev *pdev); #else +static inline void pcie_aspm_remove_cap(struct pci_dev *pdev, u32 lnkcap) { } static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 7cc8281e7011..cedea47a3547 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -243,8 +243,7 @@ struct pcie_link_state { /* Clock PM state */ u32 clkpm_capable:1; /* Clock PM capable? */ u32 clkpm_enabled:1; /* Current Clock PM state */ - u32 clkpm_default:1; /* Default Clock PM state by BIOS or - override */ + u32 clkpm_default:1; /* Default Clock PM state by BIOS */ u32 clkpm_disable:1; /* Clock PM disabled */ }; @@ -376,18 +375,6 @@ static void pcie_set_clkpm(struct pcie_link_state *link, int enable) pcie_set_clkpm_nocheck(link, enable); } -static void pcie_clkpm_override_default_link_state(struct pcie_link_state *link, - int enabled) -{ - struct pci_dev *pdev = link->downstream; - - /* For devicetree platforms, enable ClockPM by default */ - if (of_have_populated_dt() && !enabled) { - link->clkpm_default = 1; - pci_info(pdev, "ASPM: DT platform, enabling ClockPM\n"); - } -} - static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) { int capable = 1, enabled = 1; @@ -410,7 +397,6 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) } link->clkpm_enabled = enabled; link->clkpm_default = enabled; - pcie_clkpm_override_default_link_state(link, enabled); link->clkpm_capable = capable; link->clkpm_disable = blacklist ? 1 : 0; } @@ -811,26 +797,23 @@ static void pcie_aspm_override_default_link_state(struct pcie_link_state *link) struct pci_dev *pdev = link->downstream; u32 override; - /* For devicetree platforms, enable all ASPM states by default */ + /* For devicetree platforms, enable L0s and L1 by default */ if (of_have_populated_dt()) { - link->aspm_default = PCIE_LINK_STATE_ASPM_ALL; + if (link->aspm_support & PCIE_LINK_STATE_L0S) + link->aspm_default |= PCIE_LINK_STATE_L0S; + if (link->aspm_support & PCIE_LINK_STATE_L1) + link->aspm_default |= PCIE_LINK_STATE_L1; override = link->aspm_default & ~link->aspm_enabled; if (override) - pci_info(pdev, "ASPM: DT platform, enabling%s%s%s%s%s%s%s\n", - FLAG(override, L0S_UP, " L0s-up"), - FLAG(override, L0S_DW, " L0s-dw"), - FLAG(override, L1, " L1"), - FLAG(override, L1_1, " ASPM-L1.1"), - FLAG(override, L1_2, " ASPM-L1.2"), - FLAG(override, L1_1_PCIPM, " PCI-PM-L1.1"), - FLAG(override, L1_2_PCIPM, " PCI-PM-L1.2")); + pci_info(pdev, "ASPM: default states%s%s\n", + FLAG(override, L0S, " L0s"), + FLAG(override, L1, " L1")); } } static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) { struct pci_dev *child = link->downstream, *parent = link->pdev; - u32 parent_lnkcap, child_lnkcap; u16 parent_lnkctl, child_lnkctl; struct pci_bus *linkbus = parent->subordinate; @@ -845,9 +828,8 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) * If ASPM not supported, don't mess with the clocks and link, * bail out now. */ - pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap); - pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap); - if (!(parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPMS)) + if (!(parent->aspm_l0s_support && child->aspm_l0s_support) && + !(parent->aspm_l1_support && child->aspm_l1_support)) return; /* Configure common clock before checking latencies */ @@ -859,8 +841,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) * read-only Link Capabilities may change depending on common clock * configuration (PCIe r5.0, sec 7.5.3.6). */ - pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap); - pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap); pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl); pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl); @@ -880,7 +860,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) * given link unless components on both sides of the link each * support L0s. */ - if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L0S) + if (parent->aspm_l0s_support && child->aspm_l0s_support) link->aspm_support |= PCIE_LINK_STATE_L0S; if (child_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) @@ -889,7 +869,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) link->aspm_enabled |= PCIE_LINK_STATE_L0S_DW; /* Setup L1 state */ - if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1) + if (parent->aspm_l1_support && child->aspm_l1_support) link->aspm_support |= PCIE_LINK_STATE_L1; if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1) @@ -1546,6 +1526,19 @@ int pci_enable_link_state_locked(struct pci_dev *pdev, int state) } EXPORT_SYMBOL(pci_enable_link_state_locked); +void pcie_aspm_remove_cap(struct pci_dev *pdev, u32 lnkcap) +{ + if (lnkcap & PCI_EXP_LNKCAP_ASPM_L0S) + pdev->aspm_l0s_support = 0; + if (lnkcap & PCI_EXP_LNKCAP_ASPM_L1) + pdev->aspm_l1_support = 0; + + pci_info(pdev, "ASPM: Link Capabilities%s%s treated as unsupported to avoid device defect\n", + lnkcap & PCI_EXP_LNKCAP_ASPM_L0S ? " L0s" : "", + lnkcap & PCI_EXP_LNKCAP_ASPM_L1 ? " L1" : ""); + +} + static int pcie_aspm_set_policy(const char *val, const struct kernel_param *kp) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c83e75a0ec12..9cd032dff31e 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -538,14 +538,10 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) } if (io) { bridge->io_window = 1; - pci_read_bridge_io(bridge, - pci_resource_n(bridge, PCI_BRIDGE_IO_WINDOW), - true); + pci_read_bridge_io(bridge, &res, true); } - pci_read_bridge_mmio(bridge, - pci_resource_n(bridge, PCI_BRIDGE_MEM_WINDOW), - true); + pci_read_bridge_mmio(bridge, &res, true); /* * DECchip 21050 pass 2 errata: the bridge may miss an address @@ -583,10 +579,7 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) bridge->pref_64_window = 1; } - pci_read_bridge_mmio_pref(bridge, - pci_resource_n(bridge, - PCI_BRIDGE_PREF_MEM_WINDOW), - true); + pci_read_bridge_mmio_pref(bridge, &res, true); } void pci_read_bridge_bases(struct pci_bus *child) @@ -1663,6 +1656,13 @@ void set_pcie_port_type(struct pci_dev *pdev) if (reg32 & PCI_EXP_LNKCAP_DLLLARC) pdev->link_active_reporting = 1; +#ifdef CONFIG_PCIEASPM + if (reg32 & PCI_EXP_LNKCAP_ASPM_L0S) + pdev->aspm_l0s_support = 1; + if (reg32 & PCI_EXP_LNKCAP_ASPM_L1) + pdev->aspm_l1_support = 1; +#endif + parent = pci_upstream_bridge(pdev); if (!parent) return; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 214ed060ca1b..b9c252aa6fe0 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2494,28 +2494,27 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, */ static void quirk_disable_aspm_l0s(struct pci_dev *dev) { - pci_info(dev, "Disabling L0s\n"); - pci_disable_link_state(dev, PCIE_LINK_STATE_L0S); -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10a7, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10a9, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10b6, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c6, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c7, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c8, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10d6, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10db, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10dd, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10e1, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10ec, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f1, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f4, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1508, quirk_disable_aspm_l0s); + pcie_aspm_remove_cap(dev, PCI_EXP_LNKCAP_ASPM_L0S); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10a7, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10a9, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10b6, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10c6, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10c7, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10c8, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10d6, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10db, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10dd, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e1, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10ec, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10f1, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10f4, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1508, quirk_disable_aspm_l0s); static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev) { - pci_info(dev, "Disabling ASPM L0s/L1\n"); - pci_disable_link_state(dev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); + pcie_aspm_remove_cap(dev, + PCI_EXP_LNKCAP_ASPM_L0S | PCI_EXP_LNKCAP_ASPM_L1); } /* @@ -2523,7 +2522,10 @@ static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev) * upstream PCIe root port when ASPM is enabled. At least L0s mode is affected; * disable both L0s and L1 for now to be safe. */ -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, 0x0451, quirk_disable_aspm_l0s_l1); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PASEMI, 0xa002, quirk_disable_aspm_l0s_l1); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0x1105, quirk_disable_aspm_l0s_l1); /* * Some Pericom PCIe-to-PCI bridges in reverse mode need the PCIe Retrain diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4a8735b275e4..3645f392a9fd 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1604,7 +1604,7 @@ void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head) pbus_size_io(bus, realloc_head ? 0 : additional_io_size, additional_io_size, realloc_head); - if (pref) { + if (pref && (pref->flags & IORESOURCE_PREFETCH)) { pbus_size_mem(bus, IORESOURCE_MEM | IORESOURCE_PREFETCH | (pref->flags & IORESOURCE_MEM_64), diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index b58f94ee4891..436fa7f4c387 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -556,10 +556,8 @@ EXPORT_SYMBOL(vga_put); static bool vga_is_firmware_default(struct pci_dev *pdev) { -#ifdef CONFIG_SCREEN_INFO - struct screen_info *si = &screen_info; - - return pdev == screen_info_pci_dev(si); +#if defined CONFIG_X86 + return pdev == screen_info_pci_dev(&screen_info); #else return false; #endif diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 4776013e0764..16a2fd9fdd9b 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -2015,6 +2015,7 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, unsigned int blk_ if (pmc->block[blk_num].type == MLXBF_PMC_TYPE_CRSPACE) { /* Program crspace counters to count clock cycles using "count_clock" sysfs */ attr = &pmc->block[blk_num].attr_count_clock; + sysfs_attr_init(&attr->dev_attr.attr); attr->dev_attr.attr.mode = 0644; attr->dev_attr.show = mlxbf_pmc_count_clock_show; attr->dev_attr.store = mlxbf_pmc_count_clock_store; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 46e62feeda3c..c122016d82f1 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -432,7 +432,7 @@ config WIRELESS_HOTKEY depends on INPUT help This driver provides supports for the wireless buttons found on some AMD, - HP, & Xioami laptops. + HP, & Xiaomi laptops. On such systems the driver should load automatically (via ACPI alias). To compile this driver as a module, choose M here: the module will diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 31f9643a6a3b..f417dcc9af35 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -210,6 +210,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { .driver_data = &g_series_quirks, }, { + .ident = "Dell Inc. G15 5530", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5530"), + }, + .driver_data = &g_series_quirks, + }, + { .ident = "Dell Inc. G16 7630", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), @@ -1639,7 +1647,7 @@ static int wmax_wmi_probe(struct wmi_device *wdev, const void *context) static int wmax_wmi_suspend(struct device *dev) { - if (awcc->hwmon) + if (awcc && awcc->hwmon) awcc_hwmon_suspend(dev); return 0; @@ -1647,7 +1655,7 @@ static int wmax_wmi_suspend(struct device *dev) static int wmax_wmi_resume(struct device *dev) { - if (awcc->hwmon) + if (awcc && awcc->hwmon) awcc_hwmon_resume(dev); return 0; diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c index 841a5414d28a..28076929d6af 100644 --- a/drivers/platform/x86/dell/dell-wmi-base.c +++ b/drivers/platform/x86/dell/dell-wmi-base.c @@ -365,6 +365,13 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = { /* Backlight brightness change event */ { KE_IGNORE, 0x0003, { KEY_RESERVED } }, + /* + * Electronic privacy screen toggled, extended data gives state, + * separate entries for on/off see handling in dell_wmi_process_key(). + */ + { KE_KEY, 0x000c, { KEY_EPRIVACY_SCREEN_OFF } }, + { KE_KEY, 0x000c, { KEY_EPRIVACY_SCREEN_ON } }, + /* Ultra-performance mode switch request */ { KE_IGNORE, 0x000d, { KEY_RESERVED } }, @@ -435,6 +442,11 @@ static int dell_wmi_process_key(struct wmi_device *wdev, int type, int code, u16 "Dell tablet mode switch", SW_TABLET_MODE, !buffer[0]); return 1; + } else if (type == 0x0012 && code == 0x000c && remaining > 0) { + /* Eprivacy toggle, switch to "on" key entry for on events */ + if (buffer[0] == 2) + key++; + used = 1; } else if (type == 0x0012 && code == 0x000d && remaining > 0) { value = (buffer[2] == 2); used = 1; diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c index 476ec24d3702..9e052b164a1a 100644 --- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c +++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c @@ -245,15 +245,12 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, if (IS_ERR(regulator->rdev)) return PTR_ERR(regulator->rdev); - int3472->regulators[int3472->n_regulator_gpios].ena_gpio = gpio; int3472->n_regulator_gpios++; return 0; } void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472) { - for (int i = 0; i < int3472->n_regulator_gpios; i++) { + for (int i = 0; i < int3472->n_regulator_gpios; i++) regulator_unregister(int3472->regulators[i].rdev); - gpiod_put(int3472->regulators[i].ena_gpio); - } } diff --git a/drivers/platform/x86/intel/int3472/led.c b/drivers/platform/x86/intel/int3472/led.c index f1d6d7b0cb75..b1d84b968112 100644 --- a/drivers/platform/x86/intel/int3472/led.c +++ b/drivers/platform/x86/intel/int3472/led.c @@ -43,7 +43,7 @@ int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gp int3472->pled.lookup.provider = int3472->pled.name; int3472->pled.lookup.dev_id = int3472->sensor_name; - int3472->pled.lookup.con_id = "privacy-led"; + int3472->pled.lookup.con_id = "privacy"; led_add_lookup(&int3472->pled.lookup); return 0; diff --git a/drivers/pmdomain/arm/scmi_pm_domain.c b/drivers/pmdomain/arm/scmi_pm_domain.c index 8fe1c0a501c9..b5e2ffd5ea64 100644 --- a/drivers/pmdomain/arm/scmi_pm_domain.c +++ b/drivers/pmdomain/arm/scmi_pm_domain.c @@ -41,7 +41,7 @@ static int scmi_pd_power_off(struct generic_pm_domain *domain) static int scmi_pm_domain_probe(struct scmi_device *sdev) { - int num_domains, i; + int num_domains, i, ret; struct device *dev = &sdev->dev; struct device_node *np = dev->of_node; struct scmi_pm_domain *scmi_pd; @@ -108,9 +108,18 @@ static int scmi_pm_domain_probe(struct scmi_device *sdev) scmi_pd_data->domains = domains; scmi_pd_data->num_domains = num_domains; + ret = of_genpd_add_provider_onecell(np, scmi_pd_data); + if (ret) + goto err_rm_genpds; + dev_set_drvdata(dev, scmi_pd_data); - return of_genpd_add_provider_onecell(np, scmi_pd_data); + return 0; +err_rm_genpds: + for (i = num_domains - 1; i >= 0; i--) + pm_genpd_remove(domains[i]); + + return ret; } static void scmi_pm_domain_remove(struct scmi_device *sdev) diff --git a/drivers/pmdomain/imx/gpc.c b/drivers/pmdomain/imx/gpc.c index 33991f3c6b55..a34b260274f7 100644 --- a/drivers/pmdomain/imx/gpc.c +++ b/drivers/pmdomain/imx/gpc.c @@ -536,6 +536,8 @@ static void imx_gpc_remove(struct platform_device *pdev) return; } } + + of_node_put(pgc_node); } static struct platform_driver imx_gpc_driver = { diff --git a/drivers/pmdomain/samsung/exynos-pm-domains.c b/drivers/pmdomain/samsung/exynos-pm-domains.c index 5d478bb37ad6..5c3aa8983087 100644 --- a/drivers/pmdomain/samsung/exynos-pm-domains.c +++ b/drivers/pmdomain/samsung/exynos-pm-domains.c @@ -92,13 +92,14 @@ static const struct of_device_id exynos_pm_domain_of_match[] = { { }, }; -static const char *exynos_get_domain_name(struct device_node *node) +static const char *exynos_get_domain_name(struct device *dev, + struct device_node *node) { const char *name; if (of_property_read_string(node, "label", &name) < 0) name = kbasename(node->full_name); - return kstrdup_const(name, GFP_KERNEL); + return devm_kstrdup_const(dev, name, GFP_KERNEL); } static int exynos_pd_probe(struct platform_device *pdev) @@ -115,20 +116,27 @@ static int exynos_pd_probe(struct platform_device *pdev) if (!pd) return -ENOMEM; - pd->pd.name = exynos_get_domain_name(np); + pd->pd.name = exynos_get_domain_name(dev, np); if (!pd->pd.name) return -ENOMEM; pd->base = of_iomap(np, 0); - if (!pd->base) { - kfree_const(pd->pd.name); + if (!pd->base) return -ENODEV; - } pd->pd.power_off = exynos_pd_power_off; pd->pd.power_on = exynos_pd_power_on; pd->local_pwr_cfg = pm_domain_cfg->local_pwr_cfg; + /* + * Some Samsung platforms with bootloaders turning on the splash-screen + * and handing it over to the kernel, requires the power-domains to be + * reset during boot. + */ + if (IS_ENABLED(CONFIG_ARM) && + of_device_is_compatible(np, "samsung,exynos4210-pd")) + exynos_pd_power_off(&pd->pd); + on = readl_relaxed(pd->base + 0x4) & pd->local_pwr_cfg; pm_genpd_init(&pd->pd, NULL, !on); @@ -147,15 +155,6 @@ static int exynos_pd_probe(struct platform_device *pdev) parent.np, child.np); } - /* - * Some Samsung platforms with bootloaders turning on the splash-screen - * and handing it over to the kernel, requires the power-domains to be - * reset during boot. As a temporary hack to manage this, let's enforce - * a sync_state. - */ - if (!ret) - of_genpd_sync_state(np); - pm_runtime_enable(dev); return ret; } diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 8106eb617c8c..c61cf9edac48 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -561,10 +561,14 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, return ptp_mask_en_single(pccontext->private_clkdata, argptr); case PTP_SYS_OFFSET_PRECISE_CYCLES: + if (!ptp->has_cycles) + return -EOPNOTSUPP; return ptp_sys_offset_precise(ptp, argptr, ptp->info->getcrosscycles); case PTP_SYS_OFFSET_EXTENDED_CYCLES: + if (!ptp->has_cycles) + return -EOPNOTSUPP; return ptp_sys_offset_extended(ptp, argptr, ptp->info->getcyclesx64); default: diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 794ec6e71990..a5c363252986 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2548,7 +2548,7 @@ ptp_ocp_sma_fb_init(struct ptp_ocp *bp) for (i = 0; i < OCP_SMA_NUM; i++) { bp->sma[i].fixed_fcn = true; bp->sma[i].fixed_dir = true; - bp->sma[1].dpll_prop.capabilities &= + bp->sma[i].dpll_prop.capabilities &= ~DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE; } return; diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c index 022d98f3c32a..ea9c4058ee6a 100644 --- a/drivers/regulator/bd718x7-regulator.c +++ b/drivers/regulator/bd718x7-regulator.c @@ -1613,6 +1613,8 @@ static int setup_feedback_loop(struct device *dev, struct device_node *np, step /= r1; new[j].min = min; + new[j].min_sel = desc->linear_ranges[j].min_sel; + new[j].max_sel = desc->linear_ranges[j].max_sel; new[j].step = step; dev_dbg(dev, "%s: old range min %d, step %d\n", diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index 1cb647ed70c6..a2d16e9abfb5 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -334,6 +334,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) ret = dev_err_probe(&pdev->dev, PTR_ERR(drvdata->dev), "Failed to register regulator: %ld\n", PTR_ERR(drvdata->dev)); + gpiod_put(cfg.ena_gpiod); return ret; } diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index 8b6b35716f53..c170345ac076 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -268,7 +268,6 @@ static int cpcap_rtc_probe(struct platform_device *pdev) return err; rtc->alarm_irq = platform_get_irq(pdev, 0); - rtc->alarm_enabled = true; err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL, cpcap_rtc_alarm_irq, IRQF_TRIGGER_NONE | IRQF_ONESHOT, diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index aabe62c283a1..7e9f7cb90c28 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -316,7 +316,7 @@ static int rx8025_init_client(struct i2c_client *client) return hour_reg; rx8025->is_24 = (hour_reg & RX8035_BIT_HOUR_1224); } else { - rx8025->is_24 = (ctrl[1] & RX8025_BIT_CTRL1_1224); + rx8025->is_24 = (ctrl[0] & RX8025_BIT_CTRL1_1224); } out: return err; diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index 76ecf7b798f0..54c8429b16bf 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -258,7 +258,6 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) irq_set_status_flags(rtc->irq, IRQ_NOAUTOEN); - rtc->irq_en = true; ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, tps6586x_rtc_irq, IRQF_ONESHOT, diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index cc5d05dc395c..17173239301e 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -611,8 +611,9 @@ int scsi_host_busy(struct Scsi_Host *shost) { int cnt = 0; - blk_mq_tagset_busy_iter(&shost->tag_set, - scsi_host_check_in_flight, &cnt); + if (shost->tag_set.ops) + blk_mq_tagset_busy_iter(&shost->tag_set, + scsi_host_check_in_flight, &cnt); return cnt; } EXPORT_SYMBOL(scsi_host_busy); diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 16d0f02af1e4..31d08c115521 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -503,7 +503,7 @@ static void fc_fcp_recv_data(struct fc_fcp_pkt *fsp, struct fc_frame *fp) host_bcode = FC_ERROR; goto err; } - if (offset + len > fsp->data_len) { + if (size_add(offset, len) > fsp->data_len) { /* this should never happen */ if ((fr_flags(fp) & FCPHF_CRC_UNCHECKED) && fc_frame_crc_check(fp)) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index a761c0aa5127..83ff66f954e6 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4104,7 +4104,7 @@ void qla4xxx_srb_compl(struct kref *ref) * The mid-level driver tries to ensure that queuecommand never gets * invoked concurrently with itself or the interrupt handler (although * the interrupt handler may call this routine as part of request- - * completion handling). Unfortunely, it sometimes calls the scheduler + * completion handling). Unfortunately, it sometimes calls the scheduler * in interrupt context which is a big NO! NO!. **/ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) @@ -4647,7 +4647,7 @@ static int qla4xxx_cmd_wait(struct scsi_qla_host *ha) cmd = scsi_host_find_tag(ha->host, index); /* * We cannot just check if the index is valid, - * becase if we are run from the scsi eh, then + * because if we are run from the scsi eh, then * the scsi/block layer is going to prevent * the tag from being released. */ @@ -4952,7 +4952,7 @@ recover_ha_init_adapter: /* Upon successful firmware/chip reset, re-initialize the adapter */ if (status == QLA_SUCCESS) { /* For ISP-4xxx, force function 1 to always initialize - * before function 3 to prevent both funcions from + * before function 3 to prevent both functions from * stepping on top of the other */ if (is_qla40XX(ha) && (ha->mac_index == 3)) ssleep(6); @@ -6914,7 +6914,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, struct ddb_entry *ddb_entry = NULL; /* Create session object, with INVALID_ENTRY, - * the targer_id would get set when we issue the login + * the target_id would get set when we issue the login */ cls_sess = iscsi_session_setup(&qla4xxx_iscsi_transport, ha->host, cmds_max, sizeof(struct ddb_entry), diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 746ff6a1f309..1c13812a3f03 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -554,9 +554,9 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) * happened, even if someone else gets the sense data. */ if (sshdr.asc == 0x28) - scmd->device->ua_new_media_ctr++; + atomic_inc(&sdev->ua_new_media_ctr); else if (sshdr.asc == 0x29) - scmd->device->ua_por_ctr++; + atomic_inc(&sdev->ua_por_ctr); } if (scsi_sense_is_deferred(&sshdr)) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 567f9cd29102..6e4112143c76 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1406,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device, } /* - * Our channel array is sparsley populated and we + * Our channel array could be sparsley populated and we * initiated I/O on a processor/hw-q that does not * currently have a designated channel. Fix this. * The strategy is simple: - * I. Ensure NUMA locality - * II. Distribute evenly (best effort) + * I. Prefer the channel associated with the current CPU + * II. Ensure NUMA locality + * III. Distribute evenly (best effort) */ + /* Prefer the channel on the I/O issuing processor/hw-q */ + if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus)) + return stor_device->stor_chns[q_num]; + node_mask = cpumask_of_node(cpu_to_node(q_num)); num_channels = 0; @@ -1469,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device, /* See storvsc_change_target_cpu(). */ outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]); if (outgoing_channel != NULL) { - if (outgoing_channel->target_cpu == q_num) { - /* - * Ideally, we want to pick a different channel if - * available on the same NUMA node. - */ - node_mask = cpumask_of_node(cpu_to_node(q_num)); - for_each_cpu_wrap(tgt_cpu, - &stor_device->alloced_cpus, q_num + 1) { - if (!cpumask_test_cpu(tgt_cpu, node_mask)) - continue; - if (tgt_cpu == q_num) - continue; - channel = READ_ONCE( - stor_device->stor_chns[tgt_cpu]); - if (channel == NULL) - continue; - if (hv_get_avail_to_write_percent( - &channel->outbound) - > ring_avail_percent_lowater) { - outgoing_channel = channel; - goto found_channel; - } - } + if (hv_get_avail_to_write_percent(&outgoing_channel->outbound) + > ring_avail_percent_lowater) + goto found_channel; - /* - * All the other channels on the same NUMA node are - * busy. Try to use the channel on the current CPU - */ - if (hv_get_avail_to_write_percent( - &outgoing_channel->outbound) - > ring_avail_percent_lowater) + /* + * Channel is busy, try to find a channel on the same NUMA node + */ + node_mask = cpumask_of_node(cpu_to_node(q_num)); + for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus, + q_num + 1) { + if (!cpumask_test_cpu(tgt_cpu, node_mask)) + continue; + channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]); + if (!channel) + continue; + if (hv_get_avail_to_write_percent(&channel->outbound) + > ring_avail_percent_lowater) { + outgoing_channel = channel; goto found_channel; + } + } - /* - * If we reach here, all the channels on the current - * NUMA node are busy. Try to find a channel in - * other NUMA nodes - */ - for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) { - if (cpumask_test_cpu(tgt_cpu, node_mask)) - continue; - channel = READ_ONCE( - stor_device->stor_chns[tgt_cpu]); - if (channel == NULL) - continue; - if (hv_get_avail_to_write_percent( - &channel->outbound) - > ring_avail_percent_lowater) { - outgoing_channel = channel; - goto found_channel; - } + /* + * If we reach here, all the channels on the current + * NUMA node are busy. Try to find a channel in + * all NUMA nodes + */ + for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus, + q_num + 1) { + channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]); + if (!channel) + continue; + if (hv_get_avail_to_write_percent(&channel->outbound) + > ring_avail_percent_lowater) { + outgoing_channel = channel; + goto found_channel; } } + /* + * If we reach here, all the channels are busy. Use the + * original channel found. + */ } else { spin_lock_irqsave(&stor_device->lock, flags); outgoing_channel = stor_device->stor_chns[q_num]; diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index a25ebe6cd503..553ae7ee20f1 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -402,7 +402,7 @@ static int of_channel_match_helper(struct device_node *np, const char *name, * @name: slave channel name * @config: dma configuration parameters * - * Returns pointer to appropriate DMA channel on success or error. + * Return: Pointer to appropriate DMA channel on success or NULL on error. */ void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config) @@ -414,13 +414,13 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (!kdev) { pr_err("keystone-navigator-dma driver not registered\n"); - return (void *)-EINVAL; + return NULL; } chan_num = of_channel_match_helper(dev->of_node, name, &instance); if (chan_num < 0) { dev_err(kdev->dev, "No DMA instance with name %s\n", name); - return (void *)-EINVAL; + return NULL; } dev_dbg(kdev->dev, "initializing %s channel %d from DMA %s\n", @@ -431,7 +431,7 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (config->direction != DMA_MEM_TO_DEV && config->direction != DMA_DEV_TO_MEM) { dev_err(kdev->dev, "bad direction\n"); - return (void *)-EINVAL; + return NULL; } /* Look for correct dma instance */ @@ -443,7 +443,7 @@ void *knav_dma_open_channel(struct device *dev, const char *name, } if (!dma) { dev_err(kdev->dev, "No DMA instance with name %s\n", instance); - return (void *)-EINVAL; + return NULL; } /* Look for correct dma channel from dma instance */ @@ -463,14 +463,14 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (!chan) { dev_err(kdev->dev, "channel %d is not in DMA %s\n", chan_num, instance); - return (void *)-EINVAL; + return NULL; } if (atomic_read(&chan->ref_count) >= 1) { if (!check_config(chan, config)) { dev_err(kdev->dev, "channel %d config miss-match\n", chan_num); - return (void *)-EINVAL; + return NULL; } } diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index dbe640986825..b78163eaed61 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -192,6 +192,14 @@ #define SPI_NAND_OP_RESET 0xff #define SPI_NAND_OP_DIE_SELECT 0xc2 +/* SNAND FIFO commands */ +#define SNAND_FIFO_TX_BUSWIDTH_SINGLE 0x08 +#define SNAND_FIFO_TX_BUSWIDTH_DUAL 0x09 +#define SNAND_FIFO_TX_BUSWIDTH_QUAD 0x0a +#define SNAND_FIFO_RX_BUSWIDTH_SINGLE 0x0c +#define SNAND_FIFO_RX_BUSWIDTH_DUAL 0x0e +#define SNAND_FIFO_RX_BUSWIDTH_QUAD 0x0f + #define SPI_NAND_CACHE_SIZE (SZ_4K + SZ_256) #define SPI_MAX_TRANSFER_SIZE 511 @@ -387,10 +395,26 @@ static int airoha_snand_set_mode(struct airoha_snand_ctrl *as_ctrl, return regmap_write(as_ctrl->regmap_ctrl, REG_SPI_CTRL_DUMMY, 0); } -static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, u8 cmd, - const u8 *data, int len) +static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, + const u8 *data, int len, int buswidth) { int i, data_len; + u8 cmd; + + switch (buswidth) { + case 0: + case 1: + cmd = SNAND_FIFO_TX_BUSWIDTH_SINGLE; + break; + case 2: + cmd = SNAND_FIFO_TX_BUSWIDTH_DUAL; + break; + case 4: + cmd = SNAND_FIFO_TX_BUSWIDTH_QUAD; + break; + default: + return -EINVAL; + } for (i = 0; i < len; i += data_len) { int err; @@ -409,16 +433,32 @@ static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, u8 cmd, return 0; } -static int airoha_snand_read_data(struct airoha_snand_ctrl *as_ctrl, u8 *data, - int len) +static int airoha_snand_read_data(struct airoha_snand_ctrl *as_ctrl, + u8 *data, int len, int buswidth) { int i, data_len; + u8 cmd; + + switch (buswidth) { + case 0: + case 1: + cmd = SNAND_FIFO_RX_BUSWIDTH_SINGLE; + break; + case 2: + cmd = SNAND_FIFO_RX_BUSWIDTH_DUAL; + break; + case 4: + cmd = SNAND_FIFO_RX_BUSWIDTH_QUAD; + break; + default: + return -EINVAL; + } for (i = 0; i < len; i += data_len) { int err; data_len = min(len - i, SPI_MAX_TRANSFER_SIZE); - err = airoha_snand_set_fifo_op(as_ctrl, 0xc, data_len); + err = airoha_snand_set_fifo_op(as_ctrl, cmd, data_len); if (err) return err; @@ -618,6 +658,10 @@ static int airoha_snand_dirmap_create(struct spi_mem_dirmap_desc *desc) if (desc->info.offset + desc->info.length > U32_MAX) return -EINVAL; + /* continuous reading is not supported */ + if (desc->info.length > SPI_NAND_CACHE_SIZE) + return -E2BIG; + if (!airoha_snand_supports_op(desc->mem, &desc->info.op_tmpl)) return -EOPNOTSUPP; @@ -654,13 +698,13 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, err = airoha_snand_nfi_config(as_ctrl); if (err) - return err; + goto error_dma_mode_off; dma_addr = dma_map_single(as_ctrl->dev, txrx_buf, SPI_NAND_CACHE_SIZE, DMA_FROM_DEVICE); err = dma_mapping_error(as_ctrl->dev, dma_addr); if (err) - return err; + goto error_dma_mode_off; /* set dma addr */ err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_STRADDR, @@ -689,8 +733,9 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, if (err) goto error_dma_unmap; - /* set read addr */ - err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_RD_CTL3, 0x0); + /* set read addr: zero page offset + descriptor read offset */ + err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_RD_CTL3, + desc->info.offset); if (err) goto error_dma_unmap; @@ -760,6 +805,8 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, error_dma_unmap: dma_unmap_single(as_ctrl->dev, dma_addr, SPI_NAND_CACHE_SIZE, DMA_FROM_DEVICE); +error_dma_mode_off: + airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); return err; } @@ -824,7 +871,9 @@ static ssize_t airoha_snand_dirmap_write(struct spi_mem_dirmap_desc *desc, if (err) goto error_dma_unmap; - err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_PG_CTL2, 0x0); + /* set write addr: zero page offset + descriptor write offset */ + err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_PG_CTL2, + desc->info.offset); if (err) goto error_dma_unmap; @@ -892,18 +941,35 @@ static ssize_t airoha_snand_dirmap_write(struct spi_mem_dirmap_desc *desc, error_dma_unmap: dma_unmap_single(as_ctrl->dev, dma_addr, SPI_NAND_CACHE_SIZE, DMA_TO_DEVICE); + airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); return err; } static int airoha_snand_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) { - u8 data[8], cmd, opcode = op->cmd.opcode; struct airoha_snand_ctrl *as_ctrl; + int op_len, addr_len, dummy_len; + u8 buf[20], *data; int i, err; as_ctrl = spi_controller_get_devdata(mem->spi->controller); + op_len = op->cmd.nbytes; + addr_len = op->addr.nbytes; + dummy_len = op->dummy.nbytes; + + if (op_len + dummy_len + addr_len > sizeof(buf)) + return -EIO; + + data = buf; + for (i = 0; i < op_len; i++) + *data++ = op->cmd.opcode >> (8 * (op_len - i - 1)); + for (i = 0; i < addr_len; i++) + *data++ = op->addr.val >> (8 * (addr_len - i - 1)); + for (i = 0; i < dummy_len; i++) + *data++ = 0xff; + /* switch to manual mode */ err = airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); if (err < 0) @@ -914,40 +980,40 @@ static int airoha_snand_exec_op(struct spi_mem *mem, return err; /* opcode */ - err = airoha_snand_write_data(as_ctrl, 0x8, &opcode, sizeof(opcode)); + data = buf; + err = airoha_snand_write_data(as_ctrl, data, op_len, + op->cmd.buswidth); if (err) return err; /* addr part */ - cmd = opcode == SPI_NAND_OP_GET_FEATURE ? 0x11 : 0x8; - put_unaligned_be64(op->addr.val, data); - - for (i = ARRAY_SIZE(data) - op->addr.nbytes; - i < ARRAY_SIZE(data); i++) { - err = airoha_snand_write_data(as_ctrl, cmd, &data[i], - sizeof(data[0])); + data += op_len; + if (addr_len) { + err = airoha_snand_write_data(as_ctrl, data, addr_len, + op->addr.buswidth); if (err) return err; } /* dummy */ - data[0] = 0xff; - for (i = 0; i < op->dummy.nbytes; i++) { - err = airoha_snand_write_data(as_ctrl, 0x8, &data[0], - sizeof(data[0])); + data += addr_len; + if (dummy_len) { + err = airoha_snand_write_data(as_ctrl, data, dummy_len, + op->dummy.buswidth); if (err) return err; } /* data */ - if (op->data.dir == SPI_MEM_DATA_IN) { - err = airoha_snand_read_data(as_ctrl, op->data.buf.in, - op->data.nbytes); - if (err) - return err; - } else { - err = airoha_snand_write_data(as_ctrl, 0x8, op->data.buf.out, - op->data.nbytes); + if (op->data.nbytes) { + if (op->data.dir == SPI_MEM_DATA_IN) + err = airoha_snand_read_data(as_ctrl, op->data.buf.in, + op->data.nbytes, + op->data.buswidth); + else + err = airoha_snand_write_data(as_ctrl, op->data.buf.out, + op->data.nbytes, + op->data.buswidth); if (err) return err; } diff --git a/drivers/spi/spi-amlogic-spifc-a4.c b/drivers/spi/spi-amlogic-spifc-a4.c index 4338d00e56a6..35a7c4965e11 100644 --- a/drivers/spi/spi-amlogic-spifc-a4.c +++ b/drivers/spi/spi-amlogic-spifc-a4.c @@ -286,7 +286,7 @@ static int aml_sfc_set_bus_width(struct aml_sfc *sfc, u8 buswidth, u32 mask) for (i = 0; i <= LANE_MAX; i++) { if (buswidth == 1 << i) { - conf = i << __bf_shf(mask); + conf = i << __ffs(mask); return regmap_update_bits(sfc->regmap_base, SFC_SPI_CFG, mask, conf); } @@ -566,7 +566,7 @@ static int aml_sfc_raw_io_op(struct aml_sfc *sfc, const struct spi_mem_op *op) if (!op->data.nbytes) goto end_xfer; - conf = (op->data.nbytes >> RAW_SIZE_BW) << __bf_shf(RAW_EXT_SIZE); + conf = (op->data.nbytes >> RAW_SIZE_BW) << __ffs(RAW_EXT_SIZE); ret = regmap_update_bits(sfc->regmap_base, SFC_SPI_CFG, RAW_EXT_SIZE, conf); if (ret) goto err_out; diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 8fb13df8ff87..81017402bc56 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1995,7 +1995,7 @@ static int cqspi_probe(struct platform_device *pdev) if (cqspi->use_direct_mode) { ret = cqspi_request_mmap_dma(cqspi); if (ret == -EPROBE_DEFER) - goto probe_setup_failed; + goto probe_dma_failed; } if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { @@ -2019,9 +2019,10 @@ static int cqspi_probe(struct platform_device *pdev) return 0; probe_setup_failed: - cqspi_controller_enable(cqspi, 0); if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) pm_runtime_disable(dev); +probe_dma_failed: + cqspi_controller_enable(cqspi, 0); probe_reset_failed: if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c index f0f576fac77a..7a5197586919 100644 --- a/drivers/spi/spi-dw-mmio.c +++ b/drivers/spi/spi-dw-mmio.c @@ -358,7 +358,9 @@ static int dw_spi_mmio_probe(struct platform_device *pdev) if (IS_ERR(dwsmmio->rstc)) return PTR_ERR(dwsmmio->rstc); - reset_control_deassert(dwsmmio->rstc); + ret = reset_control_deassert(dwsmmio->rstc); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to deassert resets\n"); dws->bus_num = pdev->id; diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 155ddeb8fcd4..bbf1fd4fe1e9 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -519,9 +519,15 @@ static void mx51_ecspi_trigger(struct spi_imx_data *spi_imx) { u32 reg; - reg = readl(spi_imx->base + MX51_ECSPI_CTRL); - reg |= MX51_ECSPI_CTRL_XCH; - writel(reg, spi_imx->base + MX51_ECSPI_CTRL); + if (spi_imx->usedma) { + reg = readl(spi_imx->base + MX51_ECSPI_DMA); + reg |= MX51_ECSPI_DMA_TEDEN | MX51_ECSPI_DMA_RXDEN; + writel(reg, spi_imx->base + MX51_ECSPI_DMA); + } else { + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); + reg |= MX51_ECSPI_CTRL_XCH; + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); + } } static void mx51_ecspi_disable(struct spi_imx_data *spi_imx) @@ -759,7 +765,6 @@ static void mx51_setup_wml(struct spi_imx_data *spi_imx) writel(MX51_ECSPI_DMA_RX_WML(spi_imx->wml - 1) | MX51_ECSPI_DMA_TX_WML(tx_wml) | MX51_ECSPI_DMA_RXT_WML(spi_imx->wml) | - MX51_ECSPI_DMA_TEDEN | MX51_ECSPI_DMA_RXDEN | MX51_ECSPI_DMA_RXTDEN, spi_imx->base + MX51_ECSPI_DMA); } @@ -1520,6 +1525,8 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx, reinit_completion(&spi_imx->dma_tx_completion); dma_async_issue_pending(controller->dma_tx); + spi_imx->devtype_data->trigger(spi_imx); + transfer_timeout = spi_imx_calculate_timeout(spi_imx, transfer->len); /* Wait SDMA to finish the data transfer.*/ diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 4b63cb98df9c..b8c572394aac 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -75,10 +75,13 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x38a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x4d23), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x5794), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x5825), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x7723), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info }, diff --git a/drivers/spi/spi-intel.c b/drivers/spi/spi-intel.c index 13bbb2133507..1775ad39e633 100644 --- a/drivers/spi/spi-intel.c +++ b/drivers/spi/spi-intel.c @@ -132,6 +132,7 @@ #define FLCOMP_C0DEN_16M 0x05 #define FLCOMP_C0DEN_32M 0x06 #define FLCOMP_C0DEN_64M 0x07 +#define FLCOMP_C0DEN_128M 0x08 #define INTEL_SPI_TIMEOUT 5000 /* ms */ #define INTEL_SPI_FIFO_SZ 64 @@ -1347,7 +1348,12 @@ static int intel_spi_read_desc(struct intel_spi *ispi) case FLCOMP_C0DEN_64M: ispi->chip0_size = SZ_64M; break; + case FLCOMP_C0DEN_128M: + ispi->chip0_size = SZ_128M; + break; default: + dev_warn(ispi->dev, "unsupported C0DEN: %#lx\n", + flcomp & FLCOMP_C0DEN_MASK); return -EINVAL; } diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index f9371f98a65b..b6c79e50d842 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -404,6 +404,10 @@ struct nxp_fspi { #define FSPI_NEED_INIT BIT(0) #define FSPI_DTR_MODE BIT(1) int flags; + /* save the previous operation clock rate */ + unsigned long pre_op_rate; + /* the max clock rate fspi output to device */ + unsigned long max_rate; }; static inline int needs_ip_only(struct nxp_fspi *f) @@ -685,10 +689,13 @@ static void nxp_fspi_select_rx_sample_clk_source(struct nxp_fspi *f, * change the mode back to mode 0. */ reg = fspi_readl(f, f->iobase + FSPI_MCR0); - if (op_is_dtr) + if (op_is_dtr) { reg |= FSPI_MCR0_RXCLKSRC(3); - else /*select mode 0 */ + f->max_rate = 166000000; + } else { /*select mode 0 */ reg &= ~FSPI_MCR0_RXCLKSRC(3); + f->max_rate = 66000000; + } fspi_writel(f, reg, f->iobase + FSPI_MCR0); } @@ -719,6 +726,12 @@ static void nxp_fspi_dll_calibration(struct nxp_fspi *f) 0, POLL_TOUT, true); if (ret) dev_warn(f->dev, "DLL lock failed, please fix it!\n"); + + /* + * For ERR050272, DLL lock status bit is not accurate, + * wait for 4us more as a workaround. + */ + udelay(4); } /* @@ -780,11 +793,17 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, uint64_t size_kb; /* - * Return, if previously selected target device is same as current - * requested target device. Also the DTR or STR mode do not change. + * Return when following condition all meet, + * 1, if previously selected target device is same as current + * requested target device. + * 2, the DTR or STR mode do not change. + * 3, previous operation max rate equals current one. + * + * For other case, need to re-config. */ if ((f->selected == spi_get_chipselect(spi, 0)) && - (!!(f->flags & FSPI_DTR_MODE) == op_is_dtr)) + (!!(f->flags & FSPI_DTR_MODE) == op_is_dtr) && + (f->pre_op_rate == op->max_freq)) return; /* Reset FLSHxxCR0 registers */ @@ -802,6 +821,7 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, dev_dbg(f->dev, "Target device [CS:%x] selected\n", spi_get_chipselect(spi, 0)); nxp_fspi_select_rx_sample_clk_source(f, op_is_dtr); + rate = min(f->max_rate, op->max_freq); if (op_is_dtr) { f->flags |= FSPI_DTR_MODE; @@ -832,6 +852,8 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, else nxp_fspi_dll_override(f); + f->pre_op_rate = op->max_freq; + f->selected = spi_get_chipselect(spi, 0); } diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c index 9eba5c0a60f2..b3c2b03b1153 100644 --- a/drivers/spi/spi-rockchip-sfc.c +++ b/drivers/spi/spi-rockchip-sfc.c @@ -704,7 +704,12 @@ static int rockchip_sfc_probe(struct platform_device *pdev) ret = -ENOMEM; goto err_dma; } - sfc->dma_buffer = virt_to_phys(sfc->buffer); + sfc->dma_buffer = dma_map_single(dev, sfc->buffer, + sfc->max_iosize, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, sfc->dma_buffer)) { + ret = -ENOMEM; + goto err_dma_map; + } } ret = devm_spi_register_controller(dev, host); @@ -715,6 +720,9 @@ static int rockchip_sfc_probe(struct platform_device *pdev) return 0; err_register: + dma_unmap_single(dev, sfc->dma_buffer, sfc->max_iosize, + DMA_BIDIRECTIONAL); +err_dma_map: free_pages((unsigned long)sfc->buffer, get_order(sfc->max_iosize)); err_dma: pm_runtime_get_sync(dev); @@ -736,6 +744,8 @@ static void rockchip_sfc_remove(struct platform_device *pdev) struct spi_controller *host = sfc->host; spi_unregister_controller(host); + dma_unmap_single(&pdev->dev, sfc->dma_buffer, sfc->max_iosize, + DMA_BIDIRECTIONAL); free_pages((unsigned long)sfc->buffer, get_order(sfc->max_iosize)); clk_disable_unprepare(sfc->clk); diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index d59cc8a18484..c86dc56f38b4 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -300,7 +300,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) /* Read out all the data from the Rx FIFO */ rx_words = n_words; - stalled = 10; + stalled = 32; while (rx_words) { if (rx_words == n_words && !(stalled--) && !(sr & XSPI_SR_TX_EMPTY_MASK) && diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 2e0647a06890..e25df9990f82 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2851,6 +2851,18 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias, sizeof(spi->modalias)); + /* + * This gets re-tried in spi_probe() for -EPROBE_DEFER handling in case + * the GPIO controller does not have a driver yet. This needs to be done + * here too, because this call sets the GPIO direction and/or bias. + * Setting these needs to be done even if there is no driver, in which + * case spi_probe() will never get called. + * TODO: ideally the setup of the GPIO should be handled in a generic + * manner in the ACPI/gpiolib core code. + */ + if (spi->irq < 0) + spi->irq = acpi_dev_gpio_irq_get(adev, 0); + acpi_device_set_enumerated(adev); adev->power.flags.ignore_parent = true; diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index 94bbb3b6576d..01a5bb43cd2d 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -182,10 +182,12 @@ static int agilent_82350b_accel_write(struct gpib_board *board, u8 *buffer, return retval; #endif - retval = agilent_82350b_write(board, buffer, 1, 0, &num_bytes); - *bytes_written += num_bytes; - if (retval < 0) - return retval; + if (fifotransferlength > 0) { + retval = agilent_82350b_write(board, buffer, 1, 0, &num_bytes); + *bytes_written += num_bytes; + if (retval < 0) + return retval; + } write_byte(tms_priv, tms_priv->imr0_bits & ~HR_BOIE, IMR0); for (i = 1; i < fifotransferlength;) { @@ -217,7 +219,7 @@ static int agilent_82350b_accel_write(struct gpib_board *board, u8 *buffer, break; } write_byte(tms_priv, tms_priv->imr0_bits, IMR0); - if (retval) + if (retval < 0) return retval; if (send_eoi) { diff --git a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c index 164dcfc3c9ef..f7bfb4a8e553 100644 --- a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c +++ b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c @@ -1517,6 +1517,11 @@ void fmh_gpib_detach(struct gpib_board *board) resource_size(e_priv->gpib_iomem_res)); } fmh_gpib_generic_detach(board); + + if (board->dev) { + put_device(board->dev); + board->dev = NULL; + } } static int fmh_gpib_pci_attach_impl(struct gpib_board *board, diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index 4dec87d12687..1f8412de9fa3 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -327,7 +327,10 @@ static void ni_usb_soft_update_status(struct gpib_board *board, unsigned int ni_ board->status &= ~clear_mask; board->status &= ~ni_usb_ibsta_mask; board->status |= ni_usb_ibsta & ni_usb_ibsta_mask; - // FIXME should generate events on DTAS and DCAS + if (ni_usb_ibsta & DCAS) + push_gpib_event(board, EVENT_DEV_CLR); + if (ni_usb_ibsta & DTAS) + push_gpib_event(board, EVENT_DEV_TRG); spin_lock_irqsave(&board->spinlock, flags); /* remove set status bits from monitored set why ?***/ @@ -694,8 +697,12 @@ static int ni_usb_read(struct gpib_board *board, u8 *buffer, size_t length, */ break; case NIUSB_ATN_STATE_ERROR: - retval = -EIO; - dev_err(&usb_dev->dev, "read when ATN set\n"); + if (status.ibsta & DCAS) { + retval = -EINTR; + } else { + retval = -EIO; + dev_dbg(&usb_dev->dev, "read when ATN set stat: 0x%06x\n", status.ibsta); + } break; case NIUSB_ADDRESSING_ERROR: retval = -EIO; diff --git a/drivers/tee/qcomtee/Kconfig b/drivers/tee/qcomtee/Kconfig index 927686abceb1..9f19dee08db4 100644 --- a/drivers/tee/qcomtee/Kconfig +++ b/drivers/tee/qcomtee/Kconfig @@ -2,6 +2,7 @@ # Qualcomm Trusted Execution Environment Configuration config QCOMTEE tristate "Qualcomm TEE Support" + depends on ARCH_QCOM || COMPILE_TEST depends on !CPU_BIG_ENDIAN select QCOM_SCM select QCOM_TZMEM_MODE_SHMBRIDGE diff --git a/drivers/tee/qcomtee/call.c b/drivers/tee/qcomtee/call.c index cc17a48d0ab7..ac134452cc9c 100644 --- a/drivers/tee/qcomtee/call.c +++ b/drivers/tee/qcomtee/call.c @@ -308,7 +308,7 @@ out_failed: } /* Release any IO and OO objects not processed. */ - for (; u[i].type && i < num_params; i++) { + for (; i < num_params && u[i].type; i++) { if (u[i].type == QCOMTEE_ARG_TYPE_OO || u[i].type == QCOMTEE_ARG_TYPE_IO) qcomtee_object_put(u[i].o); diff --git a/drivers/tee/qcomtee/core.c b/drivers/tee/qcomtee/core.c index 783acc59cfa9..b6715ada7700 100644 --- a/drivers/tee/qcomtee/core.c +++ b/drivers/tee/qcomtee/core.c @@ -424,7 +424,7 @@ static int qcomtee_prepare_msg(struct qcomtee_object_invoke_ctx *oic, if (!(u[i].flags & QCOMTEE_ARG_FLAGS_UADDR)) memcpy(msgptr, u[i].b.addr, u[i].b.size); else if (copy_from_user(msgptr, u[i].b.uaddr, u[i].b.size)) - return -EINVAL; + return -EFAULT; offset += qcomtee_msg_offset_align(u[i].b.size); ib++; diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index a53ba04d9770..710ae4d40aec 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -635,7 +635,9 @@ static int dw8250_probe(struct platform_device *pdev) if (IS_ERR(data->rst)) return PTR_ERR(data->rst); - reset_control_deassert(data->rst); + err = reset_control_deassert(data->rst); + if (err) + return dev_err_probe(dev, err, "failed to deassert resets\n"); err = devm_add_action_or_reset(dev, dw8250_reset_control_assert, data->rst); if (err) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 04a0cbab02c2..b9cc0b786ca6 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -40,6 +40,8 @@ #define PCI_DEVICE_ID_ACCESSIO_COM_4SM 0x10db #define PCI_DEVICE_ID_ACCESSIO_COM_8SM 0x10ea +#define PCI_DEVICE_ID_ADVANTECH_XR17V352 0x0018 + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -1622,6 +1624,12 @@ static const struct exar8250_board pbn_fastcom35x_8 = { .exit = pci_xr17v35x_exit, }; +static const struct exar8250_board pbn_adv_XR17V352 = { + .num_ports = 2, + .setup = pci_xr17v35x_setup, + .exit = pci_xr17v35x_exit, +}; + static const struct exar8250_board pbn_exar_XR17V4358 = { .num_ports = 12, .setup = pci_xr17v35x_setup, @@ -1696,6 +1704,9 @@ static const struct pci_device_id exar_pci_tbl[] = { USR_DEVICE(XR17C152, 2980, pbn_exar_XR17C15x), USR_DEVICE(XR17C152, 2981, pbn_exar_XR17C15x), + /* ADVANTECH devices */ + EXAR_DEVICE(ADVANTECH, XR17V352, pbn_adv_XR17V352), + /* Exar Corp. XR17C15[248] Dual/Quad/Octal UART */ EXAR_DEVICE(EXAR, XR17C152, pbn_exar_XR17C15x), EXAR_DEVICE(EXAR, XR17C154, pbn_exar_XR17C15x), diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index b44de2ed7413..5875a7b9b4b1 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -435,6 +435,7 @@ static int __maybe_unused mtk8250_runtime_suspend(struct device *dev) while (serial_in(up, MTK_UART_DEBUG0)); + clk_disable_unprepare(data->uart_clk); clk_disable_unprepare(data->bus_clk); return 0; @@ -445,6 +446,7 @@ static int __maybe_unused mtk8250_runtime_resume(struct device *dev) struct mtk8250_data *data = dev_get_drvdata(dev); clk_prepare_enable(data->bus_clk); + clk_prepare_enable(data->uart_clk); return 0; } @@ -475,13 +477,13 @@ static int mtk8250_probe_of(struct platform_device *pdev, struct uart_port *p, int dmacnt; #endif - data->uart_clk = devm_clk_get(&pdev->dev, "baud"); + data->uart_clk = devm_clk_get_enabled(&pdev->dev, "baud"); if (IS_ERR(data->uart_clk)) { /* * For compatibility with older device trees try unnamed * clk when no baud clk can be found. */ - data->uart_clk = devm_clk_get(&pdev->dev, NULL); + data->uart_clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(data->uart_clk)) { dev_warn(&pdev->dev, "Can't get uart clock\n"); return PTR_ERR(data->uart_clk); diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 1a2c4c14f6aa..c7435595dce1 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -588,13 +588,6 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) div /= prescaler; } - /* Enable enhanced features */ - sc16is7xx_efr_lock(port); - sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, - SC16IS7XX_EFR_ENABLE_BIT, - SC16IS7XX_EFR_ENABLE_BIT); - sc16is7xx_efr_unlock(port); - /* If bit MCR_CLKSEL is set, the divide by 4 prescaler is activated. */ sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_CLKSEL_BIT, diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 538b2f991609..62bb62b82cbe 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1014,16 +1014,18 @@ static int sci_handle_fifo_overrun(struct uart_port *port) struct sci_port *s = to_sci_port(port); const struct plat_sci_reg *reg; int copied = 0; - u16 status; + u32 status; - reg = sci_getreg(port, s->params->overrun_reg); - if (!reg->size) - return 0; + if (s->type != SCI_PORT_RSCI) { + reg = sci_getreg(port, s->params->overrun_reg); + if (!reg->size) + return 0; + } - status = sci_serial_in(port, s->params->overrun_reg); + status = s->ops->read_reg(port, s->params->overrun_reg); if (status & s->params->overrun_mask) { status &= ~s->params->overrun_mask; - sci_serial_out(port, s->params->overrun_reg, status); + s->ops->write_reg(port, s->params->overrun_reg, status); port->icount.overrun++; diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index c040afc6668e..0086816b27cd 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1949,7 +1949,7 @@ static umode_t ufs_sysfs_hid_is_visible(struct kobject *kobj, return hba->dev_info.hid_sup ? attr->mode : 0; } -const struct attribute_group ufs_sysfs_hid_group = { +static const struct attribute_group ufs_sysfs_hid_group = { .name = "hid", .attrs = ufs_sysfs_hid, .is_visible = ufs_sysfs_hid_is_visible, diff --git a/drivers/ufs/core/ufs-sysfs.h b/drivers/ufs/core/ufs-sysfs.h index 6efb82a082fd..8d94af3b8077 100644 --- a/drivers/ufs/core/ufs-sysfs.h +++ b/drivers/ufs/core/ufs-sysfs.h @@ -14,6 +14,5 @@ void ufs_sysfs_remove_nodes(struct device *dev); extern const struct attribute_group ufs_sysfs_unit_descriptor_group; extern const struct attribute_group ufs_sysfs_lun_attributes_group; -extern const struct attribute_group ufs_sysfs_hid_group; #endif diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8339fec975b9..d6a060a72461 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4282,8 +4282,8 @@ int ufshcd_dme_get_attr(struct ufs_hba *hba, u32 attr_sel, get, UIC_GET_ATTR_ID(attr_sel), UFS_UIC_COMMAND_RETRIES - retries); - if (mib_val && !ret) - *mib_val = uic_cmd.argument3; + if (mib_val) + *mib_val = ret == 0 ? uic_cmd.argument3 : 0; if (peer && (hba->quirks & UFSHCD_QUIRK_DME_PEER_ACCESS_AUTO_MODE) && pwr_mode_change) @@ -4999,7 +4999,7 @@ EXPORT_SYMBOL_GPL(ufshcd_hba_enable); static int ufshcd_disable_tx_lcc(struct ufs_hba *hba, bool peer) { - int tx_lanes = 0, i, err = 0; + int tx_lanes, i, err = 0; if (!peer) ufshcd_dme_get(hba, UIC_ARG_MIB(PA_CONNECTEDTXDATALANES), @@ -5066,7 +5066,8 @@ static int ufshcd_link_startup(struct ufs_hba *hba) * If UFS device isn't active then we will have to issue link startup * 2 times to make sure the device state move to active. */ - if (!ufshcd_is_ufs_dev_active(hba)) + if (!(hba->quirks & UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE) && + !ufshcd_is_ufs_dev_active(hba)) link_startup_again = true; link_startup: @@ -5131,12 +5132,8 @@ link_startup: ufshcd_readl(hba, REG_UIC_ERROR_CODE_PHY_ADAPTER_LAYER); ret = ufshcd_make_hba_operational(hba); out: - if (ret) { + if (ret) dev_err(hba->dev, "link startup failed %d\n", ret); - ufshcd_print_host_state(hba); - ufshcd_print_pwr_info(hba); - ufshcd_print_evt_hist(hba); - } return ret; } @@ -6673,6 +6670,20 @@ static void ufshcd_err_handler(struct work_struct *work) hba->saved_uic_err, hba->force_reset, ufshcd_is_link_broken(hba) ? "; link is broken" : ""); + /* + * Use ufshcd_rpm_get_noresume() here to safely perform link recovery + * even if an error occurs during runtime suspend or runtime resume. + * This avoids potential deadlocks that could happen if we tried to + * resume the device while a PM operation is already in progress. + */ + ufshcd_rpm_get_noresume(hba); + if (hba->pm_op_in_progress) { + ufshcd_link_recovery(hba); + ufshcd_rpm_put(hba); + return; + } + ufshcd_rpm_put(hba); + down(&hba->host_sem); spin_lock_irqsave(hba->host->host_lock, flags); if (ufshcd_err_handling_should_stop(hba)) { @@ -6684,14 +6695,6 @@ static void ufshcd_err_handler(struct work_struct *work) } spin_unlock_irqrestore(hba->host->host_lock, flags); - ufshcd_rpm_get_noresume(hba); - if (hba->pm_op_in_progress) { - ufshcd_link_recovery(hba); - ufshcd_rpm_put(hba); - return; - } - ufshcd_rpm_put(hba); - ufshcd_err_handling_prepare(hba); spin_lock_irqsave(hba->host->host_lock, flags); @@ -8497,8 +8500,6 @@ static int ufs_get_device_desc(struct ufs_hba *hba) DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP) & UFS_DEV_HID_SUPPORT; - sysfs_update_group(&hba->dev->kobj, &ufs_sysfs_hid_group); - model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; err = ufshcd_read_string_desc(hba, model_index, @@ -10655,7 +10656,7 @@ remove_scsi_host: * @mmio_base: base register address * @irq: Interrupt line of device * - * Return: 0 on success, non-zero value on failure. + * Return: 0 on success; < 0 on failure. */ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) { @@ -10885,8 +10886,8 @@ initialized: if (err) goto out_disable; - async_schedule(ufshcd_async_scan, hba); ufs_sysfs_add_nodes(hba->dev); + async_schedule(ufshcd_async_scan, hba); device_enable_async_suspend(dev); ufshcd_pm_qos_init(hba); @@ -10896,7 +10897,7 @@ out_disable: hba->is_irq_enabled = false; ufshcd_hba_exit(hba); out_error: - return err; + return err > 0 ? -EIO : err; } EXPORT_SYMBOL_GPL(ufshcd_init); diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 3e83dc51d538..eba0e6617483 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -740,8 +740,21 @@ static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, /* reset the connected UFS device during power down */ - if (ufs_qcom_is_link_off(hba) && host->device_reset) + if (ufs_qcom_is_link_off(hba) && host->device_reset) { ufs_qcom_device_reset_ctrl(hba, true); + /* + * After sending the SSU command, asserting the rst_n + * line causes the device firmware to wake up and + * execute its reset routine. + * + * During this process, the device may draw current + * beyond the permissible limit for low-power mode (LPM). + * A 10ms delay, based on experimental observations, + * allows the UFS device to complete its hardware reset + * before transitioning the power rail to LPM. + */ + usleep_range(10000, 11000); + } return ufs_qcom_ice_suspend(host); } diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index b87e03777395..5f65dfad1a71 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -15,6 +15,7 @@ #include <linux/pci.h> #include <linux/pm_runtime.h> #include <linux/pm_qos.h> +#include <linux/suspend.h> #include <linux/debugfs.h> #include <linux/uuid.h> #include <linux/acpi.h> @@ -31,6 +32,7 @@ struct intel_host { u32 dsm_fns; u32 active_ltr; u32 idle_ltr; + int saved_spm_lvl; struct dentry *debugfs_root; struct gpio_desc *reset_gpio; }; @@ -347,6 +349,7 @@ static int ufs_intel_common_init(struct ufs_hba *hba) host = devm_kzalloc(hba->dev, sizeof(*host), GFP_KERNEL); if (!host) return -ENOMEM; + host->saved_spm_lvl = -1; ufshcd_set_variant(hba, host); intel_dsm_init(host, hba->dev); if (INTEL_DSM_SUPPORTED(host, RESET)) { @@ -425,7 +428,8 @@ static int ufs_intel_lkf_init(struct ufs_hba *hba) static int ufs_intel_adl_init(struct ufs_hba *hba) { hba->nop_out_timeout = 200; - hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8 | + UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE; hba->caps |= UFSHCD_CAP_WB_EN; return ufs_intel_common_init(hba); } @@ -538,6 +542,66 @@ static int ufshcd_pci_restore(struct device *dev) return ufshcd_system_resume(dev); } + +static int ufs_intel_suspend_prepare(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + struct intel_host *host = ufshcd_get_variant(hba); + int err; + + /* + * Only s2idle (S0ix) retains link state. Force power-off + * (UFS_PM_LVL_5) for any other case. + */ + if (pm_suspend_target_state != PM_SUSPEND_TO_IDLE && hba->spm_lvl < UFS_PM_LVL_5) { + host->saved_spm_lvl = hba->spm_lvl; + hba->spm_lvl = UFS_PM_LVL_5; + } + + err = ufshcd_suspend_prepare(dev); + + if (err < 0 && host->saved_spm_lvl != -1) { + hba->spm_lvl = host->saved_spm_lvl; + host->saved_spm_lvl = -1; + } + + return err; +} + +static void ufs_intel_resume_complete(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + struct intel_host *host = ufshcd_get_variant(hba); + + ufshcd_resume_complete(dev); + + if (host->saved_spm_lvl != -1) { + hba->spm_lvl = host->saved_spm_lvl; + host->saved_spm_lvl = -1; + } +} + +static int ufshcd_pci_suspend_prepare(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + if (!strcmp(hba->vops->name, "intel-pci")) + return ufs_intel_suspend_prepare(dev); + + return ufshcd_suspend_prepare(dev); +} + +static void ufshcd_pci_resume_complete(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + if (!strcmp(hba->vops->name, "intel-pci")) { + ufs_intel_resume_complete(dev); + return; + } + + ufshcd_resume_complete(dev); +} #endif /** @@ -611,8 +675,8 @@ static const struct dev_pm_ops ufshcd_pci_pm_ops = { .thaw = ufshcd_system_resume, .poweroff = ufshcd_system_suspend, .restore = ufshcd_pci_restore, - .prepare = ufshcd_suspend_prepare, - .complete = ufshcd_resume_complete, + .prepare = ufshcd_pci_suspend_prepare, + .complete = ufshcd_pci_resume_complete, #endif }; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f5bc53875330..47f589c4104a 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -467,6 +467,8 @@ static const struct usb_device_id usb_quirk_list[] = { /* Huawei 4G LTE module */ { USB_DEVICE(0x12d1, 0x15bb), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, + { USB_DEVICE(0x12d1, 0x15c1), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, { USB_DEVICE(0x12d1, 0x15c3), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, diff --git a/drivers/usb/dwc3/dwc3-generic-plat.c b/drivers/usb/dwc3/dwc3-generic-plat.c index d96b20570002..f8ad79c08c4e 100644 --- a/drivers/usb/dwc3/dwc3-generic-plat.c +++ b/drivers/usb/dwc3/dwc3-generic-plat.c @@ -85,11 +85,8 @@ static int dwc3_generic_probe(struct platform_device *pdev) static void dwc3_generic_remove(struct platform_device *pdev) { struct dwc3 *dwc = platform_get_drvdata(pdev); - struct dwc3_generic *dwc3g = to_dwc3_generic(dwc); dwc3_core_remove(dwc); - - clk_bulk_disable_unprepare(dwc3g->num_clocks, dwc3g->clks); } static int dwc3_generic_suspend(struct device *dev) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 20165e1582d9..b71680c58de6 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -667,8 +667,6 @@ static void *raw_alloc_io_data(struct usb_raw_ep_io *io, void __user *ptr, return ERR_PTR(-EINVAL); if (!usb_raw_io_flags_valid(io->flags)) return ERR_PTR(-EINVAL); - if (io->length > PAGE_SIZE) - return ERR_PTR(-EINVAL); if (get_from_user) data = memdup_user(ptr + sizeof(*io), io->length); else { diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 63edf2d8f245..ecda964e018a 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -892,7 +892,8 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) dev_info(dbc->dev, "DbC configured\n"); portsc = readl(&dbc->regs->portsc); writel(portsc, &dbc->regs->portsc); - return EVT_GSER; + ret = EVT_GSER; + break; } return EVT_DONE; @@ -954,7 +955,8 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) break; case TRB_TYPE(TRB_TRANSFER): dbc_handle_xfer_event(dbc, evt); - ret = EVT_XFER_DONE; + if (ret != EVT_GSER) + ret = EVT_XFER_DONE; break; default: break; @@ -1390,8 +1392,15 @@ int xhci_dbc_suspend(struct xhci_hcd *xhci) if (!dbc) return 0; - if (dbc->state == DS_CONFIGURED) + switch (dbc->state) { + case DS_ENABLED: + case DS_CONNECTED: + case DS_CONFIGURED: dbc->resume_required = 1; + break; + default: + break; + } xhci_dbc_stop(dbc); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5c8ab519f497..f67a4d956204 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -582,6 +582,8 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + xhci->allow_single_roothub = 1; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) xhci_pme_acpi_rtd3_enable(pdev); @@ -637,7 +639,6 @@ int xhci_pci_common_probe(struct pci_dev *dev, const struct pci_device_id *id) xhci = hcd_to_xhci(hcd); xhci->reset = reset; - xhci->allow_single_roothub = 1; if (!xhci_has_one_roothub(xhci)) { xhci->shared_hcd = usb_create_shared_hcd(&xhci_pci_hc_driver, &dev->dev, pci_name(dev), hcd); diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig index 09ac6f1c985f..0b56b773dbdf 100644 --- a/drivers/usb/misc/Kconfig +++ b/drivers/usb/misc/Kconfig @@ -182,6 +182,7 @@ config USB_LJCA config USB_USBIO tristate "Intel USBIO Bridge support" depends on USB && ACPI + depends on X86 || COMPILE_TEST select AUXILIARY_BUS help This adds support for Intel USBIO drivers. diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 62e984d20e59..5de856f65f0d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -273,6 +273,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05CN 0x0312 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM05G_RS 0x0314 +#define QUECTEL_PRODUCT_RG255C 0x0316 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -617,6 +618,7 @@ static void option_instat_callback(struct urb *urb); #define UNISOC_VENDOR_ID 0x1782 /* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */ #define TOZED_PRODUCT_LT70C 0x4055 +#define UNISOC_PRODUCT_UIS7720 0x4064 /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 @@ -1270,6 +1272,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0xff, 0x40) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, @@ -1398,10 +1403,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a2, 0xff), /* Telit FN920C04 (MBIM) */ .driver_info = NCTRL(4) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a3, 0xff), /* Telit FN920C04 (ECM) */ + .driver_info = NCTRL(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a7, 0xff), /* Telit FN920C04 (MBIM) */ .driver_info = NCTRL(4) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a8, 0xff), /* Telit FN920C04 (ECM) */ + .driver_info = NCTRL(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10aa, 0xff), /* Telit FN920C04 (MBIM) */ @@ -2466,6 +2475,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, UNISOC_PRODUCT_UIS7720, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ .driver_info = NCTRL(1) }, diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index b2a568a5bc9b..cc78770509db 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -7876,9 +7876,9 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) port->partner_desc.identity = &port->partner_ident; - port->role_sw = usb_role_switch_get(port->dev); + port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); if (!port->role_sw) - port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); + port->role_sw = usb_role_switch_get(port->dev); if (IS_ERR(port->role_sw)) { err = PTR_ERR(port->role_sw); goto out_destroy_wq; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 82034efb74fc..a7936bd1aabe 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -573,6 +573,8 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) vcq->mcq.set_ci_db = vcq->db.db; vcq->mcq.arm_db = vcq->db.db + 1; vcq->mcq.cqe_sz = 64; + vcq->mcq.comp = mlx5_vdpa_cq_comp; + vcq->cqe = num_ent; err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); if (err) @@ -612,10 +614,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) if (err) goto err_vec; - vcq->mcq.comp = mlx5_vdpa_cq_comp; - vcq->cqe = num_ent; - vcq->mcq.set_ci_db = vcq->db.db; - vcq->mcq.arm_db = vcq->db.db + 1; mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); kfree(in); return 0; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 916cad80941c..5167bec14e36 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -38,6 +38,7 @@ #include <linux/workqueue.h> #include <linux/notifier.h> #include <linux/mm_inline.h> +#include <linux/overflow.h> #include "vfio.h" #define DRIVER_VERSION "0.2" @@ -167,12 +168,14 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, { struct rb_node *node = iommu->dma_list.rb_node; + WARN_ON(!size); + while (node) { struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); - if (start + size <= dma->iova) + if (start + size - 1 < dma->iova) node = node->rb_left; - else if (start >= dma->iova + dma->size) + else if (start > dma->iova + dma->size - 1) node = node->rb_right; else return dma; @@ -182,16 +185,19 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, } static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, - dma_addr_t start, u64 size) + dma_addr_t start, + dma_addr_t end) { struct rb_node *res = NULL; struct rb_node *node = iommu->dma_list.rb_node; struct vfio_dma *dma_res = NULL; + WARN_ON(end < start); + while (node) { struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); - if (start < dma->iova + dma->size) { + if (start <= dma->iova + dma->size - 1) { res = node; dma_res = dma; if (start >= dma->iova) @@ -201,7 +207,7 @@ static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, node = node->rb_right; } } - if (res && size && dma_res->iova >= start + size) + if (res && dma_res->iova > end) res = NULL; return res; } @@ -211,11 +217,13 @@ static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new) struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; struct vfio_dma *dma; + WARN_ON(new->size != 0); + while (*link) { parent = *link; dma = rb_entry(parent, struct vfio_dma, node); - if (new->iova + new->size <= dma->iova) + if (new->iova <= dma->iova) link = &(*link)->rb_left; else link = &(*link)->rb_right; @@ -895,14 +903,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, unsigned long remote_vaddr; struct vfio_dma *dma; bool do_accounting; + dma_addr_t iova_end; + size_t iova_size; - if (!iommu || !pages) + if (!iommu || !pages || npage <= 0) return -EINVAL; /* Supported for v2 version only */ if (!iommu->v2) return -EACCES; + if (check_mul_overflow(npage, PAGE_SIZE, &iova_size) || + check_add_overflow(user_iova, iova_size - 1, &iova_end)) + return -EOVERFLOW; + mutex_lock(&iommu->lock); if (WARN_ONCE(iommu->vaddr_invalid_count, @@ -1008,12 +1022,21 @@ static void vfio_iommu_type1_unpin_pages(void *iommu_data, { struct vfio_iommu *iommu = iommu_data; bool do_accounting; + dma_addr_t iova_end; + size_t iova_size; int i; /* Supported for v2 version only */ if (WARN_ON(!iommu->v2)) return; + if (WARN_ON(npage <= 0)) + return; + + if (WARN_ON(check_mul_overflow(npage, PAGE_SIZE, &iova_size) || + check_add_overflow(user_iova, iova_size - 1, &iova_end))) + return; + mutex_lock(&iommu->lock); do_accounting = list_empty(&iommu->domain_list); @@ -1067,7 +1090,7 @@ static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain, #define VFIO_IOMMU_TLB_SYNC_MAX 512 static size_t unmap_unpin_fast(struct vfio_domain *domain, - struct vfio_dma *dma, dma_addr_t *iova, + struct vfio_dma *dma, dma_addr_t iova, size_t len, phys_addr_t phys, long *unlocked, struct list_head *unmapped_list, int *unmapped_cnt, @@ -1077,18 +1100,17 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry) { - unmapped = iommu_unmap_fast(domain->domain, *iova, len, + unmapped = iommu_unmap_fast(domain->domain, iova, len, iotlb_gather); if (!unmapped) { kfree(entry); } else { - entry->iova = *iova; + entry->iova = iova; entry->phys = phys; entry->len = unmapped; list_add_tail(&entry->list, unmapped_list); - *iova += unmapped; (*unmapped_cnt)++; } } @@ -1107,18 +1129,17 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, } static size_t unmap_unpin_slow(struct vfio_domain *domain, - struct vfio_dma *dma, dma_addr_t *iova, + struct vfio_dma *dma, dma_addr_t iova, size_t len, phys_addr_t phys, long *unlocked) { - size_t unmapped = iommu_unmap(domain->domain, *iova, len); + size_t unmapped = iommu_unmap(domain->domain, iova, len); if (unmapped) { - *unlocked += vfio_unpin_pages_remote(dma, *iova, + *unlocked += vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT, unmapped >> PAGE_SHIFT, false); - *iova += unmapped; cond_resched(); } return unmapped; @@ -1127,12 +1148,12 @@ static size_t unmap_unpin_slow(struct vfio_domain *domain, static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, bool do_accounting) { - dma_addr_t iova = dma->iova, end = dma->iova + dma->size; struct vfio_domain *domain, *d; LIST_HEAD(unmapped_region_list); struct iommu_iotlb_gather iotlb_gather; int unmapped_region_cnt = 0; long unlocked = 0; + size_t pos = 0; if (!dma->size) return 0; @@ -1156,13 +1177,14 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, } iommu_iotlb_gather_init(&iotlb_gather); - while (iova < end) { + while (pos < dma->size) { size_t unmapped, len; phys_addr_t phys, next; + dma_addr_t iova = dma->iova + pos; phys = iommu_iova_to_phys(domain->domain, iova); if (WARN_ON(!phys)) { - iova += PAGE_SIZE; + pos += PAGE_SIZE; continue; } @@ -1171,7 +1193,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, * may require hardware cache flushing, try to find the * largest contiguous physical memory chunk to unmap. */ - for (len = PAGE_SIZE; iova + len < end; len += PAGE_SIZE) { + for (len = PAGE_SIZE; pos + len < dma->size; len += PAGE_SIZE) { next = iommu_iova_to_phys(domain->domain, iova + len); if (next != phys + len) break; @@ -1181,16 +1203,18 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, * First, try to use fast unmap/unpin. In case of failure, * switch to slow unmap/unpin path. */ - unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys, + unmapped = unmap_unpin_fast(domain, dma, iova, len, phys, &unlocked, &unmapped_region_list, &unmapped_region_cnt, &iotlb_gather); if (!unmapped) { - unmapped = unmap_unpin_slow(domain, dma, &iova, len, + unmapped = unmap_unpin_slow(domain, dma, iova, len, phys, &unlocked); if (WARN_ON(!unmapped)) break; } + + pos += unmapped; } dma->iommu_mapped = false; @@ -1282,7 +1306,7 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, } static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, - dma_addr_t iova, size_t size, size_t pgsize) + dma_addr_t iova, dma_addr_t iova_end, size_t pgsize) { struct vfio_dma *dma; struct rb_node *n; @@ -1299,8 +1323,8 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, if (dma && dma->iova != iova) return -EINVAL; - dma = vfio_find_dma(iommu, iova + size - 1, 0); - if (dma && dma->iova + dma->size != iova + size) + dma = vfio_find_dma(iommu, iova_end, 1); + if (dma && dma->iova + dma->size - 1 != iova_end) return -EINVAL; for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { @@ -1309,7 +1333,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, if (dma->iova < iova) continue; - if (dma->iova > iova + size - 1) + if (dma->iova > iova_end) break; ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize); @@ -1374,7 +1398,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, int ret = -EINVAL, retries = 0; unsigned long pgshift; dma_addr_t iova = unmap->iova; - u64 size = unmap->size; + dma_addr_t iova_end; + size_t size = unmap->size; bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL; bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR; struct rb_node *n, *first_n; @@ -1387,6 +1412,11 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, goto unlock; } + if (iova != unmap->iova || size != unmap->size) { + ret = -EOVERFLOW; + goto unlock; + } + pgshift = __ffs(iommu->pgsize_bitmap); pgsize = (size_t)1 << pgshift; @@ -1396,10 +1426,15 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (unmap_all) { if (iova || size) goto unlock; - size = U64_MAX; - } else if (!size || size & (pgsize - 1) || - iova + size - 1 < iova || size > SIZE_MAX) { - goto unlock; + iova_end = ~(dma_addr_t)0; + } else { + if (!size || size & (pgsize - 1)) + goto unlock; + + if (check_add_overflow(iova, size - 1, &iova_end)) { + ret = -EOVERFLOW; + goto unlock; + } } /* When dirty tracking is enabled, allow only min supported pgsize */ @@ -1446,17 +1481,17 @@ again: if (dma && dma->iova != iova) goto unlock; - dma = vfio_find_dma(iommu, iova + size - 1, 0); - if (dma && dma->iova + dma->size != iova + size) + dma = vfio_find_dma(iommu, iova_end, 1); + if (dma && dma->iova + dma->size - 1 != iova_end) goto unlock; } ret = 0; - n = first_n = vfio_find_dma_first_node(iommu, iova, size); + n = first_n = vfio_find_dma_first_node(iommu, iova, iova_end); while (n) { dma = rb_entry(n, struct vfio_dma, node); - if (dma->iova >= iova + size) + if (dma->iova > iova_end) break; if (!iommu->v2 && iova > dma->iova) @@ -1648,7 +1683,9 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, { bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR; dma_addr_t iova = map->iova; + dma_addr_t iova_end; unsigned long vaddr = map->vaddr; + unsigned long vaddr_end; size_t size = map->size; int ret = 0, prot = 0; size_t pgsize; @@ -1656,8 +1693,15 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, /* Verify that none of our __u64 fields overflow */ if (map->size != size || map->vaddr != vaddr || map->iova != iova) + return -EOVERFLOW; + + if (!size) return -EINVAL; + if (check_add_overflow(iova, size - 1, &iova_end) || + check_add_overflow(vaddr, size - 1, &vaddr_end)) + return -EOVERFLOW; + /* READ/WRITE from device perspective */ if (map->flags & VFIO_DMA_MAP_FLAG_WRITE) prot |= IOMMU_WRITE; @@ -1673,13 +1717,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, WARN_ON((pgsize - 1) & PAGE_MASK); - if (!size || (size | iova | vaddr) & (pgsize - 1)) { - ret = -EINVAL; - goto out_unlock; - } - - /* Don't allow IOVA or virtual address wrap */ - if (iova + size - 1 < iova || vaddr + size - 1 < vaddr) { + if ((size | iova | vaddr) & (pgsize - 1)) { ret = -EINVAL; goto out_unlock; } @@ -1710,7 +1748,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } - if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) { + if (!vfio_iommu_iova_dma_valid(iommu, iova, iova_end)) { ret = -EINVAL; goto out_unlock; } @@ -1783,12 +1821,12 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, for (; n; n = rb_next(n)) { struct vfio_dma *dma; - dma_addr_t iova; + size_t pos = 0; dma = rb_entry(n, struct vfio_dma, node); - iova = dma->iova; - while (iova < dma->iova + dma->size) { + while (pos < dma->size) { + dma_addr_t iova = dma->iova + pos; phys_addr_t phys; size_t size; @@ -1804,14 +1842,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, phys = iommu_iova_to_phys(d->domain, iova); if (WARN_ON(!phys)) { - iova += PAGE_SIZE; + pos += PAGE_SIZE; continue; } size = PAGE_SIZE; p = phys + size; i = iova + size; - while (i < dma->iova + dma->size && + while (pos + size < dma->size && p == iommu_iova_to_phys(d->domain, i)) { size += PAGE_SIZE; p += PAGE_SIZE; @@ -1819,9 +1857,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } } else { unsigned long pfn; - unsigned long vaddr = dma->vaddr + - (iova - dma->iova); - size_t n = dma->iova + dma->size - iova; + unsigned long vaddr = dma->vaddr + pos; + size_t n = dma->size - pos; long npage; npage = vfio_pin_pages_remote(dma, vaddr, @@ -1852,7 +1889,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, goto unwind; } - iova += size; + pos += size; } } @@ -1869,29 +1906,29 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, unwind: for (; n; n = rb_prev(n)) { struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); - dma_addr_t iova; + size_t pos = 0; if (dma->iommu_mapped) { iommu_unmap(domain->domain, dma->iova, dma->size); continue; } - iova = dma->iova; - while (iova < dma->iova + dma->size) { + while (pos < dma->size) { + dma_addr_t iova = dma->iova + pos; phys_addr_t phys, p; size_t size; dma_addr_t i; phys = iommu_iova_to_phys(domain->domain, iova); if (!phys) { - iova += PAGE_SIZE; + pos += PAGE_SIZE; continue; } size = PAGE_SIZE; p = phys + size; i = iova + size; - while (i < dma->iova + dma->size && + while (pos + size < dma->size && p == iommu_iova_to_phys(domain->domain, i)) { size += PAGE_SIZE; p += PAGE_SIZE; @@ -2977,7 +3014,8 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, struct vfio_iommu_type1_dirty_bitmap_get range; unsigned long pgshift; size_t data_size = dirty.argsz - minsz; - size_t iommu_pgsize; + size_t size, iommu_pgsize; + dma_addr_t iova, iova_end; if (!data_size || data_size < sizeof(range)) return -EINVAL; @@ -2986,14 +3024,24 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, sizeof(range))) return -EFAULT; - if (range.iova + range.size < range.iova) + iova = range.iova; + size = range.size; + + if (iova != range.iova || size != range.size) + return -EOVERFLOW; + + if (!size) return -EINVAL; + + if (check_add_overflow(iova, size - 1, &iova_end)) + return -EOVERFLOW; + if (!access_ok((void __user *)range.bitmap.data, range.bitmap.size)) return -EINVAL; pgshift = __ffs(range.bitmap.pgsize); - ret = verify_bitmap_size(range.size >> pgshift, + ret = verify_bitmap_size(size >> pgshift, range.bitmap.size); if (ret) return ret; @@ -3007,19 +3055,18 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, ret = -EINVAL; goto out_unlock; } - if (range.iova & (iommu_pgsize - 1)) { + if (iova & (iommu_pgsize - 1)) { ret = -EINVAL; goto out_unlock; } - if (!range.size || range.size & (iommu_pgsize - 1)) { + if (size & (iommu_pgsize - 1)) { ret = -EINVAL; goto out_unlock; } if (iommu->dirty_page_tracking) ret = vfio_iova_dirty_bitmap(range.bitmap.data, - iommu, range.iova, - range.size, + iommu, iova, iova_end, range.bitmap.pgsize); else ret = -EINVAL; diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 210fd3ac18a4..56ef1d88e003 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -2614,8 +2614,12 @@ static int aty_init(struct fb_info *info) pr_cont("\n"); } #endif - if (par->pll_ops->init_pll) - par->pll_ops->init_pll(info, &par->pll); + if (par->pll_ops->init_pll) { + ret = par->pll_ops->init_pll(info, &par->pll); + if (ret) + return ret; + } + if (par->pll_ops->resume_pll) par->pll_ops->resume_pll(info, &par->pll); diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c index a9ec7f488522..dc5ad3fcc7be 100644 --- a/drivers/video/fbdev/core/bitblit.c +++ b/drivers/video/fbdev/core/bitblit.c @@ -79,12 +79,16 @@ static inline void bit_putcs_aligned(struct vc_data *vc, struct fb_info *info, struct fb_image *image, u8 *buf, u8 *dst) { u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; + unsigned int charcnt = vc->vc_font.charcount; u32 idx = vc->vc_font.width >> 3; u8 *src; while (cnt--) { - src = vc->vc_font.data + (scr_readw(s++)& - charmask)*cellsize; + u16 ch = scr_readw(s++) & charmask; + + if (ch >= charcnt) + ch = 0; + src = vc->vc_font.data + (unsigned int)ch * cellsize; if (attr) { update_attr(buf, src, attr, vc); @@ -112,14 +116,18 @@ static inline void bit_putcs_unaligned(struct vc_data *vc, u8 *dst) { u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; + unsigned int charcnt = vc->vc_font.charcount; u32 shift_low = 0, mod = vc->vc_font.width % 8; u32 shift_high = 8; u32 idx = vc->vc_font.width >> 3; u8 *src; while (cnt--) { - src = vc->vc_font.data + (scr_readw(s++)& - charmask)*cellsize; + u16 ch = scr_readw(s++) & charmask; + + if (ch >= charcnt) + ch = 0; + src = vc->vc_font.data + (unsigned int)ch * cellsize; if (attr) { update_attr(buf, src, attr, vc); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 96cc9b389246..9bd3c3814b5c 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2810,6 +2810,25 @@ int fbcon_mode_deleted(struct fb_info *info, return found; } +static void fbcon_delete_mode(struct fb_videomode *m) +{ + struct fbcon_display *p; + + for (int i = first_fb_vc; i <= last_fb_vc; i++) { + p = &fb_display[i]; + if (p->mode == m) + p->mode = NULL; + } +} + +void fbcon_delete_modelist(struct list_head *head) +{ + struct fb_modelist *modelist; + + list_for_each_entry(modelist, head, list) + fbcon_delete_mode(&modelist->mode); +} + #ifdef CONFIG_VT_HW_CONSOLE_BINDING static void fbcon_unbind(void) { diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 53f1719b1ae1..eff757ebbed1 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -544,6 +544,7 @@ static void do_unregister_framebuffer(struct fb_info *fb_info) fb_info->pixmap.addr = NULL; } + fbcon_delete_modelist(&fb_info->modelist); fb_destroy_modelist(&fb_info->modelist); registered_fb[fb_info->node] = NULL; num_registered_fb--; diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index cbdb1caf61bd..0b8d23c12b77 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -192,7 +192,7 @@ static unsigned long pvr2fb_map; #ifdef CONFIG_PVR2_DMA static unsigned int shdma = PVR2_CASCADE_CHAN; -static unsigned int pvr2dma = ONCHIP_NR_DMA_CHANNELS; +static unsigned int pvr2dma = CONFIG_NR_ONCHIP_DMA_CHANNELS; #endif static struct fb_videomode pvr2_modedb[] = { diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c index 91d070ef6989..6ff059ee1694 100644 --- a/drivers/video/fbdev/valkyriefb.c +++ b/drivers/video/fbdev/valkyriefb.c @@ -329,11 +329,13 @@ static int __init valkyriefb_init(void) if (of_address_to_resource(dp, 0, &r)) { printk(KERN_ERR "can't find address for valkyrie\n"); + of_node_put(dp); return 0; } frame_buffer_phys = r.start; cmap_regs_phys = r.start + 0x304000; + of_node_put(dp); } #endif /* ppc (!CONFIG_MAC) */ diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f3248a3e5402..c1acbc98465d 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -66,7 +66,6 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct p9_fid *fid; struct inode *inode; struct v9fs_inode *v9inode; - unsigned int cached; if (flags & LOOKUP_RCU) return -ECHILD; @@ -76,11 +75,7 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto out_valid; v9inode = V9FS_I(inode); - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - - cached = v9ses->cache & (CACHE_META | CACHE_LOOSE); - - if (!cached || v9inode->cache_validity & V9FS_INO_INVALID_ATTR) { + if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) { int retval; struct v9fs_session_info *v9ses; @@ -114,6 +109,7 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) p9_debug(P9_DEBUG_VFS, "refresh inode: dentry = %pd (%p), got error %pe\n", dentry, dentry, ERR_PTR(retval)); + if (retval < 0) return retval; } } @@ -150,8 +146,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { }; const struct dentry_operations v9fs_dentry_operations = { - .d_revalidate = v9fs_lookup_revalidate, - .d_weak_revalidate = __v9fs_lookup_revalidate, .d_release = v9fs_dentry_release, .d_unalias_trylock = v9fs_dentry_unalias_trylock, .d_unalias_unlock = v9fs_dentry_unalias_unlock, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 69f378a83775..d0c77ec31b1d 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1339,14 +1339,8 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) * Don't update inode if the file type is different */ umode = p9mode2unixmode(v9ses, st, &rdev); - if (inode_wrong_type(inode, umode)) { - /* - * Do this as a way of letting the caller know the inode should not - * be reused - */ - v9fs_invalidate_inode_attr(inode); + if (inode_wrong_type(inode, umode)) goto out; - } /* * We don't want to refresh inode->i_size, diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0b404e8484d2..be297e335468 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -897,14 +897,8 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) /* * Don't update inode if the file type is different */ - if (inode_wrong_type(inode, st->st_mode)) { - /* - * Do this as a way of letting the caller know the inode should not - * be reused - */ - v9fs_invalidate_inode_attr(inode); + if (inode_wrong_type(inode, st->st_mode)) goto out; - } /* * We don't want to refresh inode->i_size, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 41e37f7f67cc..3df7b9d7fbe8 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -2110,9 +2110,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) for (int i = 0; i < count; i++) { __btrfs_kill_delayed_node(delayed_nodes[i]); + btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]); btrfs_release_delayed_node(delayed_nodes[i], &delayed_node_trackers[i]); - btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]); } } } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0d949edc0caf..b09d4ec8c77d 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -219,6 +219,13 @@ static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER)) return; + /* + * Only print if there are leaked references. The caller is + * holding one reference, so if refs == 1 there is no leak. + */ + if (refcount_read(&node->refs) == 1) + return; + ref_tracker_dir_print(&node->ref_dir.dir, BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 755ec6dfd51c..23273d0e6f22 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2228,6 +2228,14 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, wbc_account_cgroup_owner(wbc, folio, range_len); folio_unlock(folio); } + /* + * If the fs is already in error status, do not submit any writeback + * but immediately finish it. + */ + if (unlikely(BTRFS_FS_ERROR(fs_info))) { + btrfs_bio_end_io(bbio, errno_to_blk_status(BTRFS_FS_ERROR(fs_info))); + return; + } btrfs_submit_bbio(bbio, 0); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7efd1f8a1912..fa82def46e39 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2854,12 +2854,22 @@ static int btrfs_fallocate_update_isize(struct inode *inode, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + u64 range_start; + u64 range_end; int ret; int ret2; if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode)) return 0; + range_start = round_down(i_size_read(inode), root->fs_info->sectorsize); + range_end = round_up(end, root->fs_info->sectorsize); + + ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), range_start, + range_end - range_start); + if (ret) + return ret; + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) return PTR_ERR(trans); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b1b3a0553ee..6282911e536f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -177,8 +177,10 @@ static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, return ret; } ret = paths_from_inode(inum, ipath); - if (ret < 0) + if (ret < 0) { + btrfs_put_root(local_root); goto err; + } /* * We deliberately ignore the bit ipath might have been too small to @@ -6873,7 +6875,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inode_inc_iversion(inode); inode_set_ctime_current(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), &fname.disk_name, 1, index); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1175b8192cd7..31ad8580322a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1539,8 +1539,10 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst ASSERT(prealloc); /* Check the level of src and dst first */ - if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) + if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) { + kfree(prealloc); return -EINVAL; + } mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) { diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index de4cb0f3fbd0..e9224145d754 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -982,7 +982,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) extent_root = btrfs_extent_root(fs_info, 0); /* If the extent tree is damaged we cannot ignore it (IGNOREBADROOTS). */ - if (IS_ERR(extent_root)) { + if (!extent_root) { btrfs_warn(fs_info, "ref-verify: extent tree not available, disabling"); btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); return 0; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 651b11884f82..ba20d9286a34 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2203,6 +2203,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start, &length, &bioc, NULL, NULL); if (ret < 0) { + bio_put(bio); btrfs_put_bioc(bioc); btrfs_bio_counter_dec(fs_info); goto out; @@ -2212,6 +2213,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, btrfs_put_bioc(bioc); if (!rbio) { ret = -ENOMEM; + bio_put(bio); btrfs_bio_counter_dec(fs_info); goto out; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6144e66661f5..96a030d28e09 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4102,6 +4102,48 @@ out: return ret; } +static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node) +{ + const struct recorded_ref *data = k; + const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node); + + if (data->dir > ref->dir) + return 1; + if (data->dir < ref->dir) + return -1; + if (data->dir_gen > ref->dir_gen) + return 1; + if (data->dir_gen < ref->dir_gen) + return -1; + return 0; +} + +static bool rbtree_check_dir_ref_less(struct rb_node *node, const struct rb_node *parent) +{ + const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node); + + return rbtree_check_dir_ref_comp(entry, parent) < 0; +} + +static int record_check_dir_ref_in_tree(struct rb_root *root, + struct recorded_ref *ref, struct list_head *list) +{ + struct recorded_ref *tmp_ref; + int ret; + + if (rb_find(ref, root, rbtree_check_dir_ref_comp)) + return 0; + + ret = dup_ref(ref, list); + if (ret < 0) + return ret; + + tmp_ref = list_last_entry(list, struct recorded_ref, list); + rb_add(&tmp_ref->node, root, rbtree_check_dir_ref_less); + tmp_ref->root = root; + return 0; +} + static int rename_current_inode(struct send_ctx *sctx, struct fs_path *current_path, struct fs_path *new_path) @@ -4129,11 +4171,11 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) struct recorded_ref *cur; struct recorded_ref *cur2; LIST_HEAD(check_dirs); + struct rb_root rbtree_check_dirs = RB_ROOT; struct fs_path *valid_path = NULL; u64 ow_inode = 0; u64 ow_gen; u64 ow_mode; - u64 last_dir_ino_rm = 0; bool did_overwrite = false; bool is_orphan = false; bool can_rename = true; @@ -4437,7 +4479,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) goto out; } } - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4465,7 +4507,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) } list_for_each_entry(cur, &sctx->deleted_refs, list) { - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4475,7 +4517,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) * We have a moved dir. Add the old parent to check_dirs */ cur = list_first_entry(&sctx->deleted_refs, struct recorded_ref, list); - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } else if (!S_ISDIR(sctx->cur_inode_mode)) { @@ -4509,7 +4551,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) if (is_current_inode_path(sctx, cur->full_path)) fs_path_reset(&sctx->cur_inode_path); } - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4552,8 +4594,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; - } else if (ret == inode_state_did_delete && - cur->dir != last_dir_ino_rm) { + } else if (ret == inode_state_did_delete) { ret = can_rmdir(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; @@ -4565,7 +4606,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) ret = send_rmdir(sctx, valid_path); if (ret < 0) goto out; - last_dir_ino_rm = cur->dir; } } } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index aadc02374b2a..430e7419349c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2068,7 +2068,13 @@ static int btrfs_get_tree_subvol(struct fs_context *fc) fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); if (!fs_info->super_copy || !fs_info->super_for_commit) { - btrfs_free_fs_info(fs_info); + /* + * Dont call btrfs_free_fs_info() to free it as it's still + * initialized partially. + */ + kfree(fs_info->super_copy); + kfree(fs_info->super_for_commit); + kvfree(fs_info); return -ENOMEM; } btrfs_init_fs_info(fs_info); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 621e0df097e3..30f3c3b849c1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7122,7 +7122,7 @@ log_extents: * a power failure unless the log was synced as part of an fsync * against any other unrelated inode. */ - if (inode_only != LOG_INODE_EXISTS) + if (!ctx->logging_new_name && inode_only != LOG_INODE_EXISTS) inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); @@ -7910,6 +7910,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, bool log_pinned = false; int ret; + /* The inode has a new name (ref/extref), so make sure we log it. */ + set_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); + btrfs_init_log_ctx(&ctx, inode); ctx.logging_new_name = true; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 0ea0df18a8e4..d1db7fa1fe58 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1317,6 +1317,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, if (!btrfs_dev_is_sequential(device, info->physical)) { up_read(&dev_replace->rwsem); info->alloc_offset = WP_CONVENTIONAL; + info->capacity = device->zone_info->zone_size; return 0; } @@ -1522,6 +1523,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; + u64 stripe_nr = 0, stripe_offset = 0; + u32 stripe_index = 0; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", @@ -1529,28 +1532,26 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, return -EINVAL; } + if (last_alloc) { + u32 factor = map->num_stripes; + + stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT; + stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK; + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); + } + for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { - u64 stripe_nr, full_stripe_nr; - u64 stripe_offset; - int stripe_index; - stripe_nr = div64_u64(last_alloc, map->stripe_size); - stripe_offset = stripe_nr * map->stripe_size; - full_stripe_nr = div_u64(stripe_nr, map->num_stripes); - div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - - zone_info[i].alloc_offset = - full_stripe_nr * map->stripe_size; + zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); if (stripe_index > i) - zone_info[i].alloc_offset += map->stripe_size; + zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; else if (stripe_index == i) - zone_info[i].alloc_offset += - (last_alloc - stripe_offset); + zone_info[i].alloc_offset += stripe_offset; } if (test_bit(0, active) != test_bit(i, active)) { @@ -1574,6 +1575,8 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; + u64 stripe_nr = 0, stripe_offset = 0; + u32 stripe_index = 0; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", @@ -1581,6 +1584,14 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, return -EINVAL; } + if (last_alloc) { + u32 factor = map->num_stripes / map->sub_stripes; + + stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT; + stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK; + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); + } + for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; @@ -1594,26 +1605,12 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { - u64 stripe_nr, full_stripe_nr; - u64 stripe_offset; - int stripe_index; - - stripe_nr = div64_u64(last_alloc, map->stripe_size); - stripe_offset = stripe_nr * map->stripe_size; - full_stripe_nr = div_u64(stripe_nr, - map->num_stripes / map->sub_stripes); - div_u64_rem(stripe_nr, - (map->num_stripes / map->sub_stripes), - &stripe_index); - - zone_info[i].alloc_offset = - full_stripe_nr * map->stripe_size; + zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); if (stripe_index > (i / map->sub_stripes)) - zone_info[i].alloc_offset += map->stripe_size; + zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; else if (stripe_index == (i / map->sub_stripes)) - zone_info[i].alloc_offset += - (last_alloc - stripe_offset); + zone_info[i].alloc_offset += stripe_offset; } if ((i % map->sub_stripes) == 0) { @@ -1683,8 +1680,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); if (num_conventional > 0) { - /* Zone capacity is always zone size in emulation */ - cache->zone_capacity = cache->length; ret = calculate_alloc_pointer(cache, &last_alloc, new); if (ret) { btrfs_err(fs_info, @@ -1693,6 +1688,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } else if (map->num_stripes == num_conventional) { cache->alloc_offset = last_alloc; + cache->zone_capacity = cache->length; set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); goto out; } diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 5dee7c498bc8..ed6e926226b5 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -333,8 +333,7 @@ static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh, inode = mapping->host; *inode_ret = inode; - *lblk_num_ret = ((u64)folio->index << (PAGE_SHIFT - inode->i_blkbits)) + - (bh_offset(bh) >> inode->i_blkbits); + *lblk_num_ret = (folio_pos(folio) + bh_offset(bh)) >> inode->i_blkbits; return true; } diff --git a/fs/erofs/decompressor_zstd.c b/fs/erofs/decompressor_zstd.c index b4bfe14229f9..e38d93bb2104 100644 --- a/fs/erofs/decompressor_zstd.c +++ b/fs/erofs/decompressor_zstd.c @@ -172,7 +172,6 @@ static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, dctx.bounce = strm->bounce; do { - dctx.avail_out = out_buf.size - out_buf.pos; dctx.inbuf_sz = in_buf.size; dctx.inbuf_pos = in_buf.pos; err = z_erofs_stream_switch_bufs(&dctx, &out_buf.dst, @@ -188,14 +187,18 @@ static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, in_buf.pos = dctx.inbuf_pos; zerr = zstd_decompress_stream(stream, &out_buf, &in_buf); - if (zstd_is_error(zerr) || (!zerr && rq->outputsize)) { + dctx.avail_out = out_buf.size - out_buf.pos; + if (zstd_is_error(zerr) || + ((rq->outputsize + dctx.avail_out) && (!zerr || (zerr > 0 && + !(rq->inputsize + in_buf.size - in_buf.pos))))) { erofs_err(sb, "failed to decompress in[%u] out[%u]: %s", rq->inputsize, rq->outputsize, - zerr ? zstd_get_error_name(zerr) : "unexpected end of stream"); + zstd_is_error(zerr) ? zstd_get_error_name(zerr) : + "unexpected end of stream"); err = -EFSCORRUPTED; break; } - } while (rq->outputsize || out_buf.pos < out_buf.size); + } while (rq->outputsize + dctx.avail_out); if (dctx.kout) kunmap_local(dctx.kout); diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index e5581dbeb4c2..c8d8e129eb4b 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -55,10 +55,6 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, } else { m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF); m->clusterofs = le16_to_cpu(di->di_clusterofs); - if (m->clusterofs >= 1 << vi->z_lclusterbits) { - DBG_BUGON(1); - return -EFSCORRUPTED; - } m->pblk = le32_to_cpu(di->di_u.blkaddr); } return 0; @@ -240,21 +236,29 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, unsigned int lcn, bool lookahead) { + struct erofs_inode *vi = EROFS_I(m->inode); + int err; + + if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT) { + err = z_erofs_load_compact_lcluster(m, lcn, lookahead); + } else { + DBG_BUGON(vi->datalayout != EROFS_INODE_COMPRESSED_FULL); + err = z_erofs_load_full_lcluster(m, lcn); + } + if (err) + return err; + if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { erofs_err(m->inode->i_sb, "unknown type %u @ lcn %u of nid %llu", - m->type, lcn, EROFS_I(m->inode)->nid); + m->type, lcn, EROFS_I(m->inode)->nid); DBG_BUGON(1); return -EOPNOTSUPP; + } else if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && + m->clusterofs >= (1 << vi->z_lclusterbits)) { + DBG_BUGON(1); + return -EFSCORRUPTED; } - - switch (EROFS_I(m->inode)->datalayout) { - case EROFS_INODE_COMPRESSED_FULL: - return z_erofs_load_full_lcluster(m, lcn); - case EROFS_INODE_COMPRESSED_COMPACT: - return z_erofs_load_compact_lcluster(m, lcn, lookahead); - default: - return -EINVAL; - } + return 0; } static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, @@ -268,20 +272,19 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned long lcn = m->lcn - lookback_distance; int err; + if (!lookback_distance) + break; + err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; - if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { lookback_distance = m->delta[0]; - if (!lookback_distance) - break; continue; - } else { - m->headtype = m->type; - m->map->m_la = (lcn << lclusterbits) | m->clusterofs; - return 0; } + m->headtype = m->type; + m->map->m_la = (lcn << lclusterbits) | m->clusterofs; + return 0; } erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu", lookback_distance, m->lcn, vi->nid); @@ -431,13 +434,6 @@ static int z_erofs_map_blocks_fo(struct inode *inode, end = inode->i_size; } else { if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) { - /* m.lcn should be >= 1 if endoff < m.clusterofs */ - if (!m.lcn) { - erofs_err(sb, "invalid logical cluster 0 at nid %llu", - vi->nid); - err = -EFSCORRUPTED; - goto unmap_out; - } end = (m.lcn << lclusterbits) | m.clusterofs; map->m_flags |= EROFS_MAP_FULL_MAPPED; m.delta[0] = 1; @@ -596,7 +592,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; - } else if (map->m_plen) { + } else if (map->m_plen & Z_EROFS_EXTENT_PLEN_MASK) { map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED; fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT; @@ -715,6 +711,7 @@ static int z_erofs_map_sanity_check(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_sb_info *sbi = EROFS_I_SB(inode); + u64 pend; if (!(map->m_flags & EROFS_MAP_ENCODED)) return 0; @@ -732,6 +729,10 @@ static int z_erofs_map_sanity_check(struct inode *inode, if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE || map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE)) return -EOPNOTSUPP; + /* Filesystems beyond 48-bit physical block addresses are invalid */ + if (unlikely(check_add_overflow(map->m_pa, map->m_plen, &pend) || + (pend >> sbi->blkszbits) >= BIT_ULL(48))) + return -EFSCORRUPTED; return 0; } diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 329697c89d09..38210fb6901c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -29,7 +29,6 @@ enum exfat_error_mode { enum { NLS_NAME_NO_LOSSY = 0, /* no lossy */ NLS_NAME_LOSSY = 1 << 0, /* just detected incorrect filename(s) */ - NLS_NAME_OVERLEN = 1 << 1, /* the length is over than its limit */ }; #define EXFAT_HASH_BITS 8 diff --git a/fs/exfat/file.c b/fs/exfat/file.c index f246cf439588..adc37b4d7fc2 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -509,8 +509,8 @@ static int exfat_ioctl_get_volume_label(struct super_block *sb, unsigned long ar static int exfat_ioctl_set_volume_label(struct super_block *sb, unsigned long arg) { - int ret = 0, lossy; - char label[FSLABEL_MAX]; + int ret = 0, lossy, label_len; + char label[FSLABEL_MAX] = {0}; struct exfat_uni_name uniname; if (!capable(CAP_SYS_ADMIN)) @@ -520,8 +520,9 @@ static int exfat_ioctl_set_volume_label(struct super_block *sb, return -EFAULT; memset(&uniname, 0, sizeof(uniname)); + label_len = strnlen(label, FSLABEL_MAX - 1); if (label[0]) { - ret = exfat_nls_to_utf16(sb, label, FSLABEL_MAX, + ret = exfat_nls_to_utf16(sb, label, label_len, &uniname, &lossy); if (ret < 0) return ret; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 7eb9c67fd35f..745dce29ddb5 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -442,7 +442,7 @@ static int __exfat_resolve_path(struct inode *inode, const unsigned char *path, return namelen; /* return error value */ if ((lossy && !lookup) || !namelen) - return (lossy & NLS_NAME_OVERLEN) ? -ENAMETOOLONG : -EINVAL; + return -EINVAL; return 0; } @@ -642,10 +642,14 @@ static int exfat_find(struct inode *dir, const struct qstr *qname, info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); - info->size = le64_to_cpu(ep2->dentry.stream.valid_size); info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size); info->size = le64_to_cpu(ep2->dentry.stream.size); + if (info->valid_size < 0) { + exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size); + return -EIO; + } + if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) { exfat_fs_error(sb, "data size is invalid(%lld)", info->size); return -EIO; diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 8243d94ceaf4..57db08a5271c 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -616,9 +616,6 @@ static int exfat_nls_to_ucs2(struct super_block *sb, unilen++; } - if (p_cstring[i] != '\0') - lossy |= NLS_NAME_OVERLEN; - *uniname = '\0'; p_uniname->name_len = unilen; p_uniname->name_hash = exfat_calc_chksum16(upname, unilen << 1, 0, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9c94ed8c3ab0..f42548ee9083 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -478,14 +478,6 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end, if (!hugetlb_vma_trylock_write(vma)) continue; - /* - * Skip VMAs without shareable locks. Per the design in commit - * 40549ba8f8e0, these will be handled by remove_inode_hugepages() - * called after this function with proper locking. - */ - if (!__vma_shareable_lock(vma)) - goto skip; - v_start = vma_offset_start(vma, start); v_end = vma_offset_end(vma, end); @@ -496,7 +488,6 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end, * vmas. Therefore, lock is not held when calling * unmap_hugepage_range for private vmas. */ -skip: hugetlb_vma_unlock_write(vma); } } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 4e3dcc157a83..54699299d5b1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -338,6 +338,14 @@ again: /* Match the xprt security policy */ if (clp->cl_xprtsec.policy != data->xprtsec.policy) continue; + if (clp->cl_xprtsec.policy == RPC_XPRTSEC_TLS_X509) { + if (clp->cl_xprtsec.cert_serial != + data->xprtsec.cert_serial) + continue; + if (clp->cl_xprtsec.privkey_serial != + data->xprtsec.privkey_serial) + continue; + } refcount_inc(&clp->cl_count); return clp; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 46d9c65d50f8..ea9f6ca8f30f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2268,11 +2268,12 @@ int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, return -ENAMETOOLONG; if (open_flags & O_CREAT) { - file->f_mode |= FMODE_CREATED; error = nfs_do_create(dir, dentry, mode, open_flags); - if (error) + if (!error) { + file->f_mode |= FMODE_CREATED; + return finish_open(file, dentry, NULL); + } else if (error != -EEXIST || open_flags & O_EXCL) return error; - return finish_open(file, dentry, NULL); } if (d_in_lookup(dentry)) { /* The only flags nfs_lookup considers are diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index df01d2876b68..9056f05a67dc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -270,19 +270,31 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror) mirror->layout = NULL; } -static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) +static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(u32 dss_count, + gfp_t gfp_flags) { struct nfs4_ff_layout_mirror *mirror; - u32 dss_id; mirror = kzalloc(sizeof(*mirror), gfp_flags); - if (mirror != NULL) { - spin_lock_init(&mirror->lock); - refcount_set(&mirror->ref, 1); - INIT_LIST_HEAD(&mirror->mirrors); - for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) - nfs_localio_file_init(&mirror->dss[dss_id].nfl); + if (mirror == NULL) + return NULL; + + spin_lock_init(&mirror->lock); + refcount_set(&mirror->ref, 1); + INIT_LIST_HEAD(&mirror->mirrors); + + mirror->dss_count = dss_count; + mirror->dss = + kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), + gfp_flags); + if (mirror->dss == NULL) { + kfree(mirror); + return NULL; } + + for (u32 dss_id = 0; dss_id < mirror->dss_count; dss_id++) + nfs_localio_file_init(&mirror->dss[dss_id].nfl); + return mirror; } @@ -507,17 +519,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, if (dss_count > 1 && stripe_unit == 0) goto out_err_free; - fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags); + fls->mirror_array[i] = ff_layout_alloc_mirror(dss_count, gfp_flags); if (fls->mirror_array[i] == NULL) { rc = -ENOMEM; goto out_err_free; } - fls->mirror_array[i]->dss_count = dss_count; - fls->mirror_array[i]->dss = - kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), - gfp_flags); - for (dss_id = 0; dss_id < dss_count; dss_id++) { dss_info = &fls->mirror_array[i]->dss[dss_id]; dss_info->mirror = fls->mirror_array[i]; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 18b57c7c2f97..13ad70fc00d8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -718,6 +718,8 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct nfs_fattr *fattr; loff_t oldsize = i_size_read(inode); int error = 0; + kuid_t task_uid = current_fsuid(); + kuid_t owner_uid = inode->i_uid; nfs_inc_stats(inode, NFSIOS_VFSSETATTR); @@ -739,9 +741,11 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) { spin_lock(&inode->i_lock); if (attr->ia_valid & ATTR_MTIME_SET) { - nfs_set_timestamps_to_ts(inode, attr); - attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET| + if (uid_eq(task_uid, owner_uid)) { + nfs_set_timestamps_to_ts(inode, attr); + attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET| ATTR_ATIME|ATTR_ATIME_SET); + } } else { nfs_update_timestamps(inode, attr->ia_valid); attr->ia_valid &= ~(ATTR_MTIME|ATTR_ATIME); @@ -751,10 +755,12 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, attr->ia_valid & ATTR_ATIME && !(attr->ia_valid & ATTR_MTIME)) { if (attr->ia_valid & ATTR_ATIME_SET) { - spin_lock(&inode->i_lock); - nfs_set_timestamps_to_ts(inode, attr); - spin_unlock(&inode->i_lock); - attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET); + if (uid_eq(task_uid, owner_uid)) { + spin_lock(&inode->i_lock); + nfs_set_timestamps_to_ts(inode, attr); + spin_unlock(&inode->i_lock); + attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET); + } } else { nfs_update_delegated_atime(inode); attr->ia_valid &= ~ATTR_ATIME; diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 2c0455e91571..656976b4f42c 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -42,10 +42,9 @@ struct nfs_local_kiocb { /* Begin mostly DIO-specific members */ size_t end_len; short int end_iter_index; - short int n_iters; + atomic_t n_iters; bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS]; - loff_t offset[NFSLOCAL_MAX_IOS] ____cacheline_aligned; - struct iov_iter iters[NFSLOCAL_MAX_IOS]; + struct iov_iter iters[NFSLOCAL_MAX_IOS] ____cacheline_aligned; /* End mostly DIO-specific members */ }; @@ -314,7 +313,9 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, init_sync_kiocb(&iocb->kiocb, file); iocb->hdr = hdr; + iocb->kiocb.ki_pos = hdr->args.offset; iocb->kiocb.ki_flags &= ~IOCB_APPEND; + iocb->kiocb.ki_complete = NULL; iocb->aio_complete_work = NULL; iocb->end_iter_index = -1; @@ -388,13 +389,24 @@ static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i, return true; } +static void +nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec, + unsigned int nvecs, unsigned long total, + size_t start, size_t len) +{ + iov_iter_bvec(iter, rw, bvec, nvecs, total); + if (start) + iov_iter_advance(iter, start); + iov_iter_truncate(iter, len); +} + /* * Setup as many as 3 iov_iter based on extents described by @local_dio. * Returns the number of iov_iter that were setup. */ static int nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, - unsigned int nvecs, size_t len, + unsigned int nvecs, unsigned long total, struct nfs_local_dio *local_dio) { int n_iters = 0; @@ -402,39 +414,17 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, /* Setup misaligned start? */ if (local_dio->start_len) { - iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); - iters[n_iters].count = local_dio->start_len; - iocb->offset[n_iters] = iocb->hdr->args.offset; - iocb->iter_is_dio_aligned[n_iters] = false; + nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, + nvecs, total, 0, local_dio->start_len); ++n_iters; } - /* Setup misaligned end? - * If so, the end is purposely setup to be issued using buffered IO - * before the middle (which will use DIO, if DIO-aligned, with AIO). - * This creates problems if/when the end results in a partial write. - * So must save index and length of end to handle this corner case. - */ - if (local_dio->end_len) { - iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); - iocb->offset[n_iters] = local_dio->end_offset; - iov_iter_advance(&iters[n_iters], - local_dio->start_len + local_dio->middle_len); - iocb->iter_is_dio_aligned[n_iters] = false; - /* Save index and length of end */ - iocb->end_iter_index = n_iters; - iocb->end_len = local_dio->end_len; - ++n_iters; - } - - /* Setup DIO-aligned middle to be issued last, to allow for - * DIO with AIO completion (see nfs_local_call_{read,write}). + /* + * Setup DIO-aligned middle, if there is no misaligned end (below) + * then AIO completion is used, see nfs_local_call_{read,write} */ - iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); - if (local_dio->start_len) - iov_iter_advance(&iters[n_iters], local_dio->start_len); - iters[n_iters].count -= local_dio->end_len; - iocb->offset[n_iters] = local_dio->middle_offset; + nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs, + total, local_dio->start_len, local_dio->middle_len); iocb->iter_is_dio_aligned[n_iters] = nfs_iov_iter_aligned_bvec(&iters[n_iters], @@ -442,12 +432,22 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) { trace_nfs_local_dio_misaligned(iocb->hdr->inode, - iocb->hdr->args.offset, len, local_dio); + local_dio->start_len, local_dio->middle_len, local_dio); return 0; /* no DIO-aligned IO possible */ } + iocb->end_iter_index = n_iters; ++n_iters; - iocb->n_iters = n_iters; + /* Setup misaligned end? */ + if (local_dio->end_len) { + nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, + nvecs, total, local_dio->start_len + + local_dio->middle_len, local_dio->end_len); + iocb->end_iter_index = n_iters; + ++n_iters; + } + + atomic_set(&iocb->n_iters, n_iters); return n_iters; } @@ -473,18 +473,26 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) } len = hdr->args.count - total; + /* + * For each iocb, iocb->n_iters is always at least 1 and we always + * end io after first nfs_local_pgio_done call unless misaligned DIO. + */ + atomic_set(&iocb->n_iters, 1); + if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { struct nfs_local_dio local_dio; if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) && - nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) + nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) { + /* Ensure DIO WRITE's IO on stable storage upon completion */ + if (rw == ITER_SOURCE) + iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; return; /* is DIO-aligned */ + } } /* Use buffered IO */ - iocb->offset[0] = hdr->args.offset; iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len); - iocb->n_iters = 1; } static void @@ -504,9 +512,11 @@ nfs_local_pgio_init(struct nfs_pgio_header *hdr, hdr->task.tk_start = ktime_get(); } -static void -nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) +static bool +nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status, bool force) { + struct nfs_pgio_header *hdr = iocb->hdr; + /* Must handle partial completions */ if (status >= 0) { hdr->res.count += status; @@ -517,6 +527,12 @@ nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); hdr->task.tk_status = status; } + + if (force) + return true; + + BUG_ON(atomic_read(&iocb->n_iters) <= 0); + return atomic_dec_and_test(&iocb->n_iters); } static void @@ -547,11 +563,11 @@ static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb) queue_work(nfsiod_workqueue, &iocb->work); } -static void -nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) +static void nfs_local_read_done(struct nfs_local_kiocb *iocb) { struct nfs_pgio_header *hdr = iocb->hdr; struct file *filp = iocb->kiocb.ki_filp; + long status = hdr->task.tk_status; if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ @@ -564,20 +580,27 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) */ hdr->res.replen = 0; - if (hdr->res.count != hdr->args.count || - hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp))) + /* nfs_readpage_result() handles short read */ + + if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp))) hdr->res.eof = true; dprintk("%s: read %ld bytes eof %d.\n", __func__, status > 0 ? status : 0, hdr->res.eof); } +static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb) +{ + nfs_local_read_done(iocb); + nfs_local_pgio_release(iocb); +} + static void nfs_local_read_aio_complete_work(struct work_struct *work) { struct nfs_local_kiocb *iocb = container_of(work, struct nfs_local_kiocb, work); - nfs_local_pgio_release(iocb); + nfs_local_read_iocb_done(iocb); } static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) @@ -585,8 +608,10 @@ static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) struct nfs_local_kiocb *iocb = container_of(kiocb, struct nfs_local_kiocb, kiocb); - nfs_local_pgio_done(iocb->hdr, ret); - nfs_local_read_done(iocb, ret); + /* AIO completion of DIO read should always be last to complete */ + if (unlikely(!nfs_local_pgio_done(iocb, ret, false))) + return; + nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ } @@ -596,32 +621,36 @@ static void nfs_local_call_read(struct work_struct *work) container_of(work, struct nfs_local_kiocb, work); struct file *filp = iocb->kiocb.ki_filp; const struct cred *save_cred; + bool force_done = false; ssize_t status; + int n_iters; save_cred = override_creds(filp->f_cred); - for (int i = 0; i < iocb->n_iters ; i++) { + n_iters = atomic_read(&iocb->n_iters); + for (int i = 0; i < n_iters ; i++) { if (iocb->iter_is_dio_aligned[i]) { iocb->kiocb.ki_flags |= IOCB_DIRECT; - iocb->kiocb.ki_complete = nfs_local_read_aio_complete; - iocb->aio_complete_work = nfs_local_read_aio_complete_work; - } + /* Only use AIO completion if DIO-aligned segment is last */ + if (i == iocb->end_iter_index) { + iocb->kiocb.ki_complete = nfs_local_read_aio_complete; + iocb->aio_complete_work = nfs_local_read_aio_complete_work; + } + } else + iocb->kiocb.ki_flags &= ~IOCB_DIRECT; - iocb->kiocb.ki_pos = iocb->offset[i]; status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); if (status != -EIOCBQUEUED) { - nfs_local_pgio_done(iocb->hdr, status); - if (iocb->hdr->task.tk_status) + if (unlikely(status >= 0 && status < iocb->iters[i].count)) + force_done = true; /* Partial read */ + if (nfs_local_pgio_done(iocb, status, force_done)) { + nfs_local_read_iocb_done(iocb); break; + } } } revert_creds(save_cred); - - if (status != -EIOCBQUEUED) { - nfs_local_read_done(iocb, status); - nfs_local_pgio_release(iocb); - } } static int @@ -736,11 +765,10 @@ static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb) fattr->du.nfs3.used = stat.blocks << 9; } -static void -nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) +static void nfs_local_write_done(struct nfs_local_kiocb *iocb) { struct nfs_pgio_header *hdr = iocb->hdr; - struct inode *inode = hdr->inode; + long status = hdr->task.tk_status; dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0); @@ -759,10 +787,17 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset); status = -ENOSPC; /* record -ENOSPC in terms of nfs_local_pgio_done */ - nfs_local_pgio_done(hdr, status); + (void) nfs_local_pgio_done(iocb, status, true); } if (hdr->task.tk_status < 0) - nfs_reset_boot_verifier(inode); + nfs_reset_boot_verifier(hdr->inode); +} + +static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb) +{ + nfs_local_write_done(iocb); + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); } static void nfs_local_write_aio_complete_work(struct work_struct *work) @@ -770,8 +805,7 @@ static void nfs_local_write_aio_complete_work(struct work_struct *work) struct nfs_local_kiocb *iocb = container_of(work, struct nfs_local_kiocb, work); - nfs_local_vfs_getattr(iocb); - nfs_local_pgio_release(iocb); + nfs_local_write_iocb_done(iocb); } static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) @@ -779,8 +813,10 @@ static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) struct nfs_local_kiocb *iocb = container_of(kiocb, struct nfs_local_kiocb, kiocb); - nfs_local_pgio_done(iocb->hdr, ret); - nfs_local_write_done(iocb, ret); + /* AIO completion of DIO write should always be last to complete */ + if (unlikely(!nfs_local_pgio_done(iocb, ret, false))) + return; + nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ } @@ -791,63 +827,40 @@ static void nfs_local_call_write(struct work_struct *work) struct file *filp = iocb->kiocb.ki_filp; unsigned long old_flags = current->flags; const struct cred *save_cred; + bool force_done = false; ssize_t status; + int n_iters; current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; save_cred = override_creds(filp->f_cred); file_start_write(filp); - for (int i = 0; i < iocb->n_iters ; i++) { + n_iters = atomic_read(&iocb->n_iters); + for (int i = 0; i < n_iters ; i++) { if (iocb->iter_is_dio_aligned[i]) { iocb->kiocb.ki_flags |= IOCB_DIRECT; - iocb->kiocb.ki_complete = nfs_local_write_aio_complete; - iocb->aio_complete_work = nfs_local_write_aio_complete_work; - } -retry: - iocb->kiocb.ki_pos = iocb->offset[i]; + /* Only use AIO completion if DIO-aligned segment is last */ + if (i == iocb->end_iter_index) { + iocb->kiocb.ki_complete = nfs_local_write_aio_complete; + iocb->aio_complete_work = nfs_local_write_aio_complete_work; + } + } else + iocb->kiocb.ki_flags &= ~IOCB_DIRECT; + status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); if (status != -EIOCBQUEUED) { - if (unlikely(status >= 0 && status < iocb->iters[i].count)) { - /* partial write */ - if (i == iocb->end_iter_index) { - /* Must not account partial end, otherwise, due - * to end being issued before middle: the partial - * write accounting in nfs_local_write_done() - * would incorrectly advance hdr->args.offset - */ - status = 0; - } else { - /* Partial write at start or buffered middle, - * exit early. - */ - nfs_local_pgio_done(iocb->hdr, status); - break; - } - } else if (unlikely(status == -ENOTBLK && - (iocb->kiocb.ki_flags & IOCB_DIRECT))) { - /* VFS will return -ENOTBLK if DIO WRITE fails to - * invalidate the page cache. Retry using buffered IO. - */ - iocb->kiocb.ki_flags &= ~IOCB_DIRECT; - iocb->kiocb.ki_complete = NULL; - iocb->aio_complete_work = NULL; - goto retry; - } - nfs_local_pgio_done(iocb->hdr, status); - if (iocb->hdr->task.tk_status) + if (unlikely(status >= 0 && status < iocb->iters[i].count)) + force_done = true; /* Partial write */ + if (nfs_local_pgio_done(iocb, status, force_done)) { + nfs_local_write_iocb_done(iocb); break; + } } } file_end_write(filp); revert_creds(save_cred); current->flags = old_flags; - - if (status != -EIOCBQUEUED) { - nfs_local_write_done(iocb, status); - nfs_local_vfs_getattr(iocb); - nfs_local_pgio_release(iocb); - } } static int diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 0d7310c1ee0c..5d97c1d38bb6 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -2,6 +2,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/sunrpc/addr.h> +#include <net/handshake.h> #include "internal.h" #include "nfs3_fs.h" #include "netns.h" @@ -98,7 +99,11 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, .cred = mds_srv->cred, - .xprtsec = mds_clp->cl_xprtsec, + .xprtsec = { + .policy = RPC_XPRTSEC_NONE, + .cert_serial = TLS_NO_CERT, + .privkey_serial = TLS_NO_PRIVKEY, + }, .connect_timeout = connect_timeout, .reconnect_timeout = connect_timeout, }; @@ -111,9 +116,14 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, cl_init.hostname = buf; switch (ds_proto) { + case XPRT_TRANSPORT_TCP_TLS: + if (mds_clp->cl_xprtsec.policy != RPC_XPRTSEC_NONE) + cl_init.xprtsec = mds_clp->cl_xprtsec; + else + ds_proto = XPRT_TRANSPORT_TCP; + fallthrough; case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_TCP_TLS: if (mds_clp->cl_nconnect > 1) cl_init.nconnect = mds_clp->cl_nconnect; } diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 6fddf43d729c..3a4baed993c9 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -11,6 +11,7 @@ #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/bc_xprt.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <net/handshake.h> #include "internal.h" #include "callback.h" #include "delegation.h" @@ -222,6 +223,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; clp->cl_mig_gen = 1; + clp->cl_last_renewal = jiffies; #if IS_ENABLED(CONFIG_NFS_V4_1) init_waitqueue_head(&clp->cl_lock_waitq); #endif @@ -982,7 +984,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, .cred = mds_srv->cred, - .xprtsec = mds_srv->nfs_client->cl_xprtsec, + .xprtsec = { + .policy = RPC_XPRTSEC_NONE, + .cert_serial = TLS_NO_CERT, + .privkey_serial = TLS_NO_PRIVKEY, + }, }; char buf[INET6_ADDRSTRLEN + 1]; @@ -991,9 +997,14 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, cl_init.hostname = buf; switch (ds_proto) { + case XPRT_TRANSPORT_TCP_TLS: + if (mds_srv->nfs_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE) + cl_init.xprtsec = mds_srv->nfs_client->cl_xprtsec; + else + ds_proto = XPRT_TRANSPORT_TCP; + fallthrough; case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_TCP_TLS: if (mds_clp->cl_nconnect > 1) { cl_init.nconnect = mds_clp->cl_nconnect; cl_init.max_connect = NFS_MAX_TRANSPORTS; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f58098417142..93c6ce04332b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3636,6 +3636,7 @@ struct nfs4_closedata { } lr; struct nfs_fattr fattr; unsigned long timestamp; + unsigned short retrans; }; static void nfs4_free_closedata(void *data) @@ -3664,6 +3665,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) .state = state, .inode = calldata->inode, .stateid = &calldata->arg.stateid, + .retrans = calldata->retrans, }; if (!nfs4_sequence_done(task, &calldata->res.seq_res)) @@ -3711,6 +3713,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) default: task->tk_status = nfs4_async_handle_exception(task, server, task->tk_status, &exception); + calldata->retrans = exception.retrans; if (exception.retry) goto out_restart; } @@ -4712,16 +4715,19 @@ static int _nfs4_proc_lookupp(struct inode *inode, }; unsigned short task_flags = 0; - if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + if (server->flags & NFS_MOUNT_SOFTREVAL) task_flags |= RPC_TASK_TIMEOUT; + if (server->caps & NFS_CAP_MOVEABLE) + task_flags |= RPC_TASK_MOVEABLE; args.bitmask = nfs4_bitmask(server, fattr->label); nfs_fattr_init(fattr); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino); - status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, - &res.seq_res, task_flags); + status = nfs4_do_call_sync(clnt, server, &msg, &args.seq_args, + &res.seq_res, task_flags); dprintk("NFS reply lookupp: %d\n", status); return status; } @@ -5593,9 +5599,11 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) .inode = hdr->inode, .state = hdr->args.context->state, .stateid = &hdr->args.stateid, + .retrans = hdr->retrans, }; task->tk_status = nfs4_async_handle_exception(task, server, task->tk_status, &exception); + hdr->retrans = exception.retrans; if (exception.retry) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -5709,10 +5717,12 @@ static int nfs4_write_done_cb(struct rpc_task *task, .inode = hdr->inode, .state = hdr->args.context->state, .stateid = &hdr->args.stateid, + .retrans = hdr->retrans, }; task->tk_status = nfs4_async_handle_exception(task, NFS_SERVER(inode), task->tk_status, &exception); + hdr->retrans = exception.retrans; if (exception.retry) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -6726,6 +6736,7 @@ struct nfs4_delegreturndata { struct nfs_fh fh; nfs4_stateid stateid; unsigned long timestamp; + unsigned short retrans; struct { struct nfs4_layoutreturn_args arg; struct nfs4_layoutreturn_res res; @@ -6746,6 +6757,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) .inode = data->inode, .stateid = &data->stateid, .task_is_privileged = data->args.seq_args.sa_privileged, + .retrans = data->retrans, }; if (!nfs4_sequence_done(task, &data->res.seq_res)) @@ -6817,6 +6829,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) task->tk_status = nfs4_async_handle_exception(task, data->res.server, task->tk_status, &exception); + data->retrans = exception.retrans; if (exception.retry) goto out_restart; } @@ -7093,6 +7106,7 @@ struct nfs4_unlockdata { struct file_lock fl; struct nfs_server *server; unsigned long timestamp; + unsigned short retrans; }; static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, @@ -7147,6 +7161,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) struct nfs4_exception exception = { .inode = calldata->lsp->ls_state->inode, .stateid = &calldata->arg.stateid, + .retrans = calldata->retrans, }; if (!nfs4_sequence_done(task, &calldata->res.seq_res)) @@ -7180,6 +7195,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) task->tk_status = nfs4_async_handle_exception(task, calldata->server, task->tk_status, &exception); + calldata->retrans = exception.retrans; if (exception.retry) rpc_restart_call_prepare(task); } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 7b32afb29782..9976cc16b689 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -809,8 +809,11 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, unsigned int retrans) { struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs_client *mds_clp = mds_srv->nfs_client; + enum xprtsec_policies xprtsec_policy = mds_clp->cl_xprtsec.policy; struct nfs4_pnfs_ds_addr *da; unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10; + int ds_proto; int status = 0; dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); @@ -834,27 +837,28 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, .xprtsec = clp->cl_xprtsec, }; - if (da->da_transport != clp->cl_proto && - clp->cl_proto != XPRT_TRANSPORT_TCP_TLS) - continue; - if (da->da_transport == XPRT_TRANSPORT_TCP && - mds_srv->nfs_client->cl_proto == XPRT_TRANSPORT_TCP_TLS) + if (xprt_args.ident == XPRT_TRANSPORT_TCP && + clp->cl_proto == XPRT_TRANSPORT_TCP_TLS) xprt_args.ident = XPRT_TRANSPORT_TCP_TLS; - if (da->da_addr.ss_family != clp->cl_addr.ss_family) + if (xprt_args.ident != clp->cl_proto) + continue; + if (xprt_args.dstaddr->sa_family != + clp->cl_addr.ss_family) continue; /* Add this address as an alias */ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, - rpc_clnt_test_and_add_xprt, NULL); + rpc_clnt_test_and_add_xprt, NULL); continue; } - if (da->da_transport == XPRT_TRANSPORT_TCP && - mds_srv->nfs_client->cl_proto == XPRT_TRANSPORT_TCP_TLS) - da->da_transport = XPRT_TRANSPORT_TCP_TLS; - clp = get_v3_ds_connect(mds_srv, - &da->da_addr, - da->da_addrlen, da->da_transport, - timeo, retrans); + + ds_proto = da->da_transport; + if (ds_proto == XPRT_TRANSPORT_TCP && + xprtsec_policy != RPC_XPRTSEC_NONE) + ds_proto = XPRT_TRANSPORT_TCP_TLS; + + clp = get_v3_ds_connect(mds_srv, &da->da_addr, da->da_addrlen, + ds_proto, timeo, retrans); if (IS_ERR(clp)) continue; clp->cl_rpcclient->cl_softerr = 0; @@ -880,7 +884,10 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, u32 minor_version) { struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs_client *mds_clp = mds_srv->nfs_client; + enum xprtsec_policies xprtsec_policy = mds_clp->cl_xprtsec.policy; struct nfs4_pnfs_ds_addr *da; + int ds_proto; int status = 0; dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); @@ -908,12 +915,8 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, .data = &xprtdata, }; - if (da->da_transport != clp->cl_proto && - clp->cl_proto != XPRT_TRANSPORT_TCP_TLS) - continue; - if (da->da_transport == XPRT_TRANSPORT_TCP && - mds_srv->nfs_client->cl_proto == - XPRT_TRANSPORT_TCP_TLS) { + if (xprt_args.ident == XPRT_TRANSPORT_TCP && + clp->cl_proto == XPRT_TRANSPORT_TCP_TLS) { struct sockaddr *addr = (struct sockaddr *)&da->da_addr; struct sockaddr_in *sin = @@ -944,7 +947,10 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, xprt_args.ident = XPRT_TRANSPORT_TCP_TLS; xprt_args.servername = servername; } - if (da->da_addr.ss_family != clp->cl_addr.ss_family) + if (xprt_args.ident != clp->cl_proto) + continue; + if (xprt_args.dstaddr->sa_family != + clp->cl_addr.ss_family) continue; /** @@ -958,15 +964,14 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, if (xprtdata.cred) put_cred(xprtdata.cred); } else { - if (da->da_transport == XPRT_TRANSPORT_TCP && - mds_srv->nfs_client->cl_proto == - XPRT_TRANSPORT_TCP_TLS) - da->da_transport = XPRT_TRANSPORT_TCP_TLS; - clp = nfs4_set_ds_client(mds_srv, - &da->da_addr, - da->da_addrlen, - da->da_transport, timeo, - retrans, minor_version); + ds_proto = da->da_transport; + if (ds_proto == XPRT_TRANSPORT_TCP && + xprtsec_policy != RPC_XPRTSEC_NONE) + ds_proto = XPRT_TRANSPORT_TCP_TLS; + + clp = nfs4_set_ds_client(mds_srv, &da->da_addr, + da->da_addrlen, ds_proto, + timeo, retrans, minor_version); if (IS_ERR(clp)) continue; @@ -977,7 +982,6 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, clp = ERR_PTR(-EIO); continue; } - } } diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 545148d42dcc..ea6e6168092b 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -189,6 +189,7 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, return p; kobject_put(&p->kobject); + kobject_put(&p->nfs_net_kobj); } return NULL; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0fb6905736d5..336c510f3750 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1535,7 +1535,8 @@ static int nfs_writeback_done(struct rpc_task *task, /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE + | NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); } return 0; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e466cf52d7d7..7f7e6bb23a90 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -988,10 +988,11 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, static void nfsd4_read_release(union nfsd4_op_u *u) { - if (u->read.rd_nf) + if (u->read.rd_nf) { + trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, + u->read.rd_offset, u->read.rd_length); nfsd_file_put(u->read.rd_nf); - trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, - u->read.rd_offset, u->read.rd_length); + } } static __be32 @@ -2892,10 +2893,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) rqstp->rq_lease_breaker = (void **)&cstate->clp; - trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt); + trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; + if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { + /* If there are still more operations to process, + * stop here and report NFS4ERR_RESOURCE. */ + if (cstate->minorversion == 0 && + args->client_opcnt > resp->opcnt) { + op->status = nfserr_resource; + goto encode_op; + } + } + /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -2972,7 +2983,7 @@ encode_op: status = op->status; } - trace_nfsd_compound_status(args->opcnt, resp->opcnt, + trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, status, nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 81fa7cc6c77b..8a6960500217 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1542,7 +1542,8 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); - WARN_ON(!list_empty(&stid->sc_cp_list)); + if (!list_empty(&stid->sc_cp_list)) + nfs4_free_cpntf_statelist(stid->sc_client->net, stid); kmem_cache_free(stateid_slab, stid); } @@ -3486,7 +3487,20 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; - dprintk("--> %s slot %p\n", __func__, slot); + /* + * RFC 5661 Section 2.10.6.1.2: + * + * Any time SEQUENCE ... returns an error ... [t]he replier MUST NOT + * modify the reply cache entry for the slot whenever an error is + * returned from SEQUENCE ... + * + * Because nfsd4_store_cache_entry is called only by + * nfsd4_sequence_done(), nfsd4_store_cache_entry() is called only + * when a SEQUENCE operation was part of the COMPOUND. + * nfs41_check_op_ordering() ensures SEQUENCE is the first op. + */ + if (resp->opcnt == 1 && resp->cstate.status != nfs_ok) + return; slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; @@ -3902,6 +3916,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs ca->headerpadsz = 0; ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); + ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); @@ -4348,6 +4363,36 @@ static bool replay_matches_cache(struct svc_rqst *rqstp, return true; } +/* + * Note that the response is constructed here both for the case + * of a new SEQUENCE request and for a replayed SEQUENCE request. + * We do not cache SEQUENCE responses as SEQUENCE is idempotent. + */ +static void nfsd4_construct_sequence_response(struct nfsd4_session *session, + struct nfsd4_sequence *seq) +{ + struct nfs4_client *clp = session->se_client; + + seq->maxslots_response = max(session->se_target_maxslots, + seq->maxslots); + seq->target_maxslots = session->se_target_maxslots; + + switch (clp->cl_cb_state) { + case NFSD4_CB_DOWN: + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; + break; + case NFSD4_CB_FAULT: + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; + break; + default: + seq->status_flags = 0; + } + if (!list_empty(&clp->cl_revoked)) + seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; + if (atomic_read(&clp->cl_admin_revoked)) + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -4397,6 +4442,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("%s: slotid %d\n", __func__, seq->slotid); trace_nfsd_slot_seqid_sequence(clp, seq, slot); + + nfsd4_construct_sequence_response(session, seq); + status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags); if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; @@ -4494,23 +4542,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } out: - seq->maxslots = max(session->se_target_maxslots, seq->maxslots); - seq->target_maxslots = session->se_target_maxslots; - - switch (clp->cl_cb_state) { - case NFSD4_CB_DOWN: - seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; - break; - case NFSD4_CB_FAULT: - seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; - break; - default: - seq->status_flags = 0; - } - if (!list_empty(&clp->cl_revoked)) - seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; - if (atomic_read(&clp->cl_admin_revoked)) - seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; trace_nfsd_seq4_status(rqstp, seq); out_no_session: if (conn) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c0a3c6a7c8bb..67bb9c0b9fcb 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2488,8 +2488,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) return false; - if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) + if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) return false; + argp->opcnt = min_t(u32, argp->client_opcnt, + NFSD_MAX_OPS_PER_COMPOUND); if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops)); @@ -2628,10 +2630,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, __be32 *p; __be32 pathlen; int pathlen_offset; - int strlen, count=0; char *str, *end, *next; - - dprintk("nfsd4_encode_components(%s)\n", components); + int count = 0; pathlen_offset = xdr->buf->len; p = xdr_reserve_space(xdr, 4); @@ -2658,9 +2658,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, for (; *end && (*end != sep); end++) /* find sep or end of string */; - strlen = end - str; - if (strlen) { - if (xdr_stream_encode_opaque(xdr, str, strlen) < 0) + if (end > str) { + if (xdr_stream_encode_opaque(xdr, str, end - str) < 0) return nfserr_resource; count++; } else @@ -2939,6 +2938,12 @@ struct nfsd4_fattr_args { typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args); +static __be32 nfsd4_encode_fattr4__inval(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfserr_inval; +} + static __be32 nfsd4_encode_fattr4__noop(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { @@ -3560,6 +3565,8 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop, [FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support, + [FATTR4_TIME_DELEG_ACCESS] = nfsd4_encode_fattr4__inval, + [FATTR4_TIME_DELEG_MODIFY] = nfsd4_encode_fattr4__inval, [FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments, }; @@ -5066,7 +5073,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; /* Note slotid's are numbered from zero: */ /* sr_highest_slotid */ - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); + nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots_response - 1); if (nfserr != nfs_ok) return nfserr; /* sr_target_highest_slotid */ @@ -5918,8 +5925,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) */ warn_on_nonidempotent_op(op); xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT); - } - if (so) { + } else if (so) { int len = xdr->buf->len - (op_status_offset + XDR_UNIT); so->so_replay.rp_status = op->status; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index ea87b42894dd..b752433c3c2c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -57,6 +57,9 @@ struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ }; +/* Maximum number of operations per session compound */ +#define NFSD_MAX_OPS_PER_COMPOUND 200 + struct nfsd_genl_rqstp { struct sockaddr rq_daddr; struct sockaddr rq_saddr; @@ -455,6 +458,7 @@ enum { #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ FATTR4_WORD2_MODE_UMASK | \ + FATTR4_WORD2_CLONE_BLKSIZE | \ NFSD4_2_SECURITY_ATTRS | \ FATTR4_WORD2_XATTR_SUPPORT | \ FATTR4_WORD2_TIME_DELEG_ACCESS | \ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3eb724ec9566..ed85dd43da18 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -269,9 +269,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, dentry); } - fhp->fh_dentry = dentry; - fhp->fh_export = exp; - switch (fhp->fh_maxsize) { case NFS4_FHSIZE: if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR) @@ -293,6 +290,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, goto out; } + fhp->fh_dentry = dentry; + fhp->fh_export = exp; + return 0; out: exp_put(exp); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index d4b48602b2b0..1ce8e12ae335 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -574,8 +574,9 @@ struct nfsd4_sequence { struct nfs4_sessionid sessionid; /* request/response */ u32 seqid; /* request/response */ u32 slotid; /* request/response */ - u32 maxslots; /* request/response */ + u32 maxslots; /* request */ u32 cachethis; /* request */ + u32 maxslots_response; /* response */ u32 target_maxslots; /* response */ u32 status_flags; /* response */ }; @@ -903,6 +904,7 @@ struct nfsd4_compoundargs { char * tag; u32 taglen; u32 minorversion; + u32 client_opcnt; u32 opcnt; bool splice_ok; struct nfsd4_op *ops; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index f15ca6fc400d..deee16bc9d4e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2768,7 +2768,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) if (sci->sc_task) { wake_up(&sci->sc_wait_daemon); - kthread_stop(sci->sc_task); + if (kthread_stop(sci->sc_task)) { + spin_lock(&sci->sc_state_lock); + sci->sc_task = NULL; + timer_shutdown_sync(&sci->sc_timer); + spin_unlock(&sci->sc_state_lock); + } } spin_lock(&sci->sc_state_lock); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 1161eabf11ee..9cc7eb863643 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -17,6 +17,7 @@ #include "fanotify/fanotify.h" #include "fdinfo.h" #include "fsnotify.h" +#include "../internal.h" #if defined(CONFIG_PROC_FS) @@ -46,7 +47,12 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f->handle_bytes >> 2; + if (!super_trylock_shared(inode->i_sb)) + return; + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); + up_read(&inode->i_sb->s_umount); + if ((ret == FILEID_INVALID) || (ret < 0)) return; diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 86f2631e6360..10923bf7c8b8 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -867,6 +867,11 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, mlog_errno(ret); goto out; } + /* + * Invalidate extent cache after moving/defragging to prevent + * stale cached data with outdated extent flags. + */ + ocfs2_extent_map_trunc(inode, cpos); context->clusters_moved += alloc_size; next: diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 176281112273..501889856461 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -698,6 +698,12 @@ void pde_put(struct proc_dir_entry *pde) } } +static void pde_erase(struct proc_dir_entry *pde, struct proc_dir_entry *parent) +{ + rb_erase(&pde->subdir_node, &parent->subdir); + RB_CLEAR_NODE(&pde->subdir_node); +} + /* * Remove a /proc entry and free it if it's not currently in use. */ @@ -720,7 +726,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) WARN(1, "removing permanent /proc entry '%s'", de->name); de = NULL; } else { - rb_erase(&de->subdir_node, &parent->subdir); + pde_erase(de, parent); if (S_ISDIR(de->mode)) parent->nlink--; } @@ -764,7 +770,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) root->parent->name, root->name); return -EINVAL; } - rb_erase(&root->subdir_node, &parent->subdir); + pde_erase(root, parent); de = root; while (1) { @@ -776,7 +782,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) next->parent->name, next->name); return -EINVAL; } - rb_erase(&next->subdir_node, &de->subdir); + pde_erase(next, de); de = next; continue; } diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 4076336fbba6..572a9925bd6c 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1782,15 +1782,13 @@ int resctrl_mon_resource_init(void) mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; if (r->mon.mbm_cntr_assignable) { - if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) - resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); - if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) - resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); - mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; - mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & - (READS_TO_LOCAL_MEM | - READS_TO_LOCAL_S_MEM | - NON_TEMP_WRITE_TO_LOCAL_MEM); + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & + (READS_TO_LOCAL_MEM | + READS_TO_LOCAL_S_MEM | + NON_TEMP_WRITE_TO_LOCAL_MEM); r->mon.mbm_assign_on_mkdir = true; resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index a4c02199fef4..17bd368574e9 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -5,17 +5,16 @@ config CIFS select NLS select NLS_UCS2_UTILS select CRYPTO - select CRYPTO_MD5 - select CRYPTO_SHA256 - select CRYPTO_SHA512 select CRYPTO_CMAC - select CRYPTO_HMAC select CRYPTO_AEAD2 select CRYPTO_CCM select CRYPTO_GCM select CRYPTO_ECB select CRYPTO_AES select CRYPTO_LIB_ARC4 + select CRYPTO_LIB_MD5 + select CRYPTO_LIB_SHA256 + select CRYPTO_LIB_SHA512 select KEYS select DNS_RESOLVER select ASN1 diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index b8ac7b7faf61..018055fd2cdb 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -388,11 +388,11 @@ out: * lease. Release one here, and the second below. */ cfid->has_lease = false; - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } spin_unlock(&cfids->cfid_list_lock); - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } else { *ret_cfid = cfid; atomic_inc(&tcon->num_remote_opens); @@ -438,12 +438,14 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, static void smb2_close_cached_fid(struct kref *ref) +__releases(&cfid->cfids->cfid_list_lock) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); int rc; - spin_lock(&cfid->cfids->cfid_list_lock); + lockdep_assert_held(&cfid->cfids->cfid_list_lock); + if (cfid->on_list) { list_del(&cfid->entry); cfid->on_list = false; @@ -478,7 +480,7 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->has_lease) { cfid->has_lease = false; - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } spin_unlock(&cfid->cfids->cfid_list_lock); close_cached_dir(cfid); @@ -487,7 +489,7 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, void close_cached_dir(struct cached_fid *cfid) { - kref_put(&cfid->refcount, smb2_close_cached_fid); + kref_put_lock(&cfid->refcount, smb2_close_cached_fid, &cfid->cfids->cfid_list_lock); } /* @@ -596,7 +598,7 @@ cached_dir_offload_close(struct work_struct *work) WARN_ON(cfid->on_list); - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close); } @@ -762,7 +764,7 @@ static void cfids_laundromat_worker(struct work_struct *work) * Drop the ref-count from above, either the lease-ref (if there * was one) or the extra one acquired. */ - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 63b3b1290bed..ce2ebc213a1d 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -339,7 +339,6 @@ int sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, struct cifs_fattr *fattr, uint sidtype) { - int rc = 0; struct key *sidkey; char *sidstr; const struct cred *saved_cred; @@ -446,12 +445,12 @@ out_revert_creds: * fails then we just fall back to using the ctx->linux_uid/linux_gid. */ got_valid_id: - rc = 0; if (sidtype == SIDOWNER) fattr->cf_uid = fuid; else fattr->cf_gid = fgid; - return rc; + + return 0; } int diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 7b7c8c38fdd0..801824825ecf 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -24,14 +24,43 @@ #include <linux/iov_iter.h> #include <crypto/aead.h> #include <crypto/arc4.h> +#include <crypto/md5.h> +#include <crypto/sha2.h> -static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len, - void *priv, void *priv2) +static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx, + const u8 *data, size_t len) { - struct shash_desc *shash = priv; + if (ctx->md5) { + md5_update(ctx->md5, data, len); + return 0; + } + if (ctx->hmac) { + hmac_sha256_update(ctx->hmac, data, len); + return 0; + } + return crypto_shash_update(ctx->shash, data, len); +} + +static int cifs_sig_final(struct cifs_calc_sig_ctx *ctx, u8 *out) +{ + if (ctx->md5) { + md5_final(ctx->md5, out); + return 0; + } + if (ctx->hmac) { + hmac_sha256_final(ctx->hmac, out); + return 0; + } + return crypto_shash_final(ctx->shash, out); +} + +static size_t cifs_sig_step(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2) +{ + struct cifs_calc_sig_ctx *ctx = priv; int ret, *pret = priv2; - ret = crypto_shash_update(shash, iter_base, len); + ret = cifs_sig_update(ctx, iter_base, len); if (ret < 0) { *pret = ret; return len; @@ -42,21 +71,20 @@ static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len, /* * Pass the data from an iterator into a hash. */ -static int cifs_shash_iter(const struct iov_iter *iter, size_t maxsize, - struct shash_desc *shash) +static int cifs_sig_iter(const struct iov_iter *iter, size_t maxsize, + struct cifs_calc_sig_ctx *ctx) { struct iov_iter tmp_iter = *iter; int err = -EIO; - if (iterate_and_advance_kernel(&tmp_iter, maxsize, shash, &err, - cifs_shash_step) != maxsize) + if (iterate_and_advance_kernel(&tmp_iter, maxsize, ctx, &err, + cifs_sig_step) != maxsize) return err; return 0; } -int __cifs_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, char *signature, - struct shash_desc *shash) +int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + char *signature, struct cifs_calc_sig_ctx *ctx) { int i; ssize_t rc; @@ -82,8 +110,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst, return -EIO; } - rc = crypto_shash_update(shash, - iov[i].iov_base, iov[i].iov_len); + rc = cifs_sig_update(ctx, iov[i].iov_base, iov[i].iov_len); if (rc) { cifs_dbg(VFS, "%s: Could not update with payload\n", __func__); @@ -91,11 +118,11 @@ int __cifs_calc_signature(struct smb_rqst *rqst, } } - rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash); + rc = cifs_sig_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), ctx); if (rc < 0) return rc; - rc = crypto_shash_final(shash, signature); + rc = cifs_sig_final(ctx, signature); if (rc) cifs_dbg(VFS, "%s: Could not generate hash\n", __func__); @@ -112,29 +139,22 @@ int __cifs_calc_signature(struct smb_rqst *rqst, static int cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, char *signature) { - int rc; + struct md5_ctx ctx; if (!rqst->rq_iov || !signature || !server) return -EINVAL; - - rc = cifs_alloc_hash("md5", &server->secmech.md5); - if (rc) - return -1; - - rc = crypto_shash_init(server->secmech.md5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init md5\n", __func__); - return rc; + if (fips_enabled) { + cifs_dbg(VFS, + "MD5 signature support is disabled due to FIPS\n"); + return -EOPNOTSUPP; } - rc = crypto_shash_update(server->secmech.md5, - server->session_key.response, server->session_key.len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response\n", __func__); - return rc; - } + md5_init(&ctx); + md5_update(&ctx, server->session_key.response, server->session_key.len); - return __cifs_calc_signature(rqst, server, signature, server->secmech.md5); + return __cifs_calc_signature( + rqst, server, signature, + &(struct cifs_calc_sig_ctx){ .md5 = &ctx }); } /* must be called with server->srv_mutex held */ @@ -405,11 +425,11 @@ static __le64 find_timestamp(struct cifs_ses *ses) } static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, - const struct nls_table *nls_cp, struct shash_desc *hmacmd5) + const struct nls_table *nls_cp) { - int rc = 0; int len; char nt_hash[CIFS_NTHASH_SIZE]; + struct hmac_md5_ctx hmac_ctx; __le16 *user; wchar_t *domain; wchar_t *server; @@ -417,17 +437,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, /* calculate md4 hash of password */ E_md4hash(ses->password, nt_hash, nls_cp); - rc = crypto_shash_setkey(hmacmd5->tfm, nt_hash, CIFS_NTHASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NT hash as a key, rc=%d\n", __func__, rc); - return rc; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - return rc; - } + hmac_md5_init_usingrawkey(&hmac_ctx, nt_hash, CIFS_NTHASH_SIZE); /* convert ses->user_name to unicode */ len = ses->user_name ? strlen(ses->user_name) : 0; @@ -442,12 +452,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, *(u16 *)user = 0; } - rc = crypto_shash_update(hmacmd5, (char *)user, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)user, 2 * len); kfree(user); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with user, rc=%d\n", __func__, rc); - return rc; - } /* convert ses->domainName to unicode and uppercase */ if (ses->domainName) { @@ -459,12 +465,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len, nls_cp); - rc = crypto_shash_update(hmacmd5, (char *)domain, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)domain, 2 * len); kfree(domain); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with domain, rc=%d\n", __func__, rc); - return rc; - } } else { /* We use ses->ip_addr if no domain name available */ len = strlen(ses->ip_addr); @@ -474,25 +476,16 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, return -ENOMEM; len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len, nls_cp); - rc = crypto_shash_update(hmacmd5, (char *)server, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)server, 2 * len); kfree(server); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with server, rc=%d\n", __func__, rc); - return rc; - } } - rc = crypto_shash_final(hmacmd5, ntlmv2_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); - - return rc; + hmac_md5_final(&hmac_ctx, ntlmv2_hash); + return 0; } -static int -CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_desc *hmacmd5) +static void CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) { - int rc; struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); unsigned int hash_len; @@ -501,35 +494,15 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_ hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE + offsetof(struct ntlmv2_resp, challenge.key[0])); - rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc); - return rc; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - return rc; - } - if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) memcpy(ntlmv2->challenge.key, ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); else memcpy(ntlmv2->challenge.key, ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); - rc = crypto_shash_update(hmacmd5, ntlmv2->challenge.key, hash_len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc); - return rc; - } - - /* Note that the MD5 digest over writes anon.challenge_key.key */ - rc = crypto_shash_final(hmacmd5, ntlmv2->ntlmv2_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); - - return rc; + /* Note that the HMAC-MD5 value overwrites ntlmv2->challenge.key */ + hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ntlmv2->challenge.key, hash_len, + ntlmv2->ntlmv2_hash); } /* @@ -586,7 +559,6 @@ out: int setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) { - struct shash_desc *hmacmd5 = NULL; unsigned char *tiblob = NULL; /* target info blob */ struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; @@ -657,51 +629,29 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ntlmv2->client_chal = cc; ntlmv2->reserved2 = 0; - rc = cifs_alloc_hash("hmac(md5)", &hmacmd5); - if (rc) { - cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc); + if (fips_enabled) { + cifs_dbg(VFS, "NTLMv2 support is disabled due to FIPS\n"); + rc = -EOPNOTSUPP; goto unlock; } /* calculate ntlmv2_hash */ - rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp, hmacmd5); + rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp); if (rc) { cifs_dbg(VFS, "Could not get NTLMv2 hash, rc=%d\n", rc); goto unlock; } /* calculate first part of the client response (CR1) */ - rc = CalcNTLMv2_response(ses, ntlmv2_hash, hmacmd5); - if (rc) { - cifs_dbg(VFS, "Could not calculate CR1, rc=%d\n", rc); - goto unlock; - } + CalcNTLMv2_response(ses, ntlmv2_hash); /* now calculate the session key for NTLMv2 */ - rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_update(hmacmd5, ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_final(hmacmd5, ses->auth_key.response); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); + hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ses->auth_key.response); + rc = 0; unlock: cifs_server_unlock(ses->server); - cifs_free_hash(&hmacmd5); setup_ntlmv2_rsp_ret: kfree_sensitive(tiblob); @@ -743,9 +693,6 @@ void cifs_crypto_secmech_release(struct TCP_Server_Info *server) { cifs_free_hash(&server->secmech.aes_cmac); - cifs_free_hash(&server->secmech.hmacsha256); - cifs_free_hash(&server->secmech.md5); - cifs_free_hash(&server->secmech.sha512); if (server->secmech.enc) { crypto_free_aead(server->secmech.enc); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 05b1fa76e8cc..185ac41bd7e9 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -173,7 +173,7 @@ module_param(enable_oplocks, bool, 0644); MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1"); module_param(enable_gcm_256, bool, 0644); -MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/0"); +MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/1"); module_param(require_gcm_256, bool, 0644); MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0"); @@ -2139,13 +2139,9 @@ MODULE_DESCRIPTION "also older servers complying with the SNIA CIFS Specification)"); MODULE_VERSION(CIFS_VERSION); MODULE_SOFTDEP("ecb"); -MODULE_SOFTDEP("hmac"); -MODULE_SOFTDEP("md5"); MODULE_SOFTDEP("nls"); MODULE_SOFTDEP("aes"); MODULE_SOFTDEP("cmac"); -MODULE_SOFTDEP("sha256"); -MODULE_SOFTDEP("sha512"); MODULE_SOFTDEP("aead2"); MODULE_SOFTDEP("ccm"); MODULE_SOFTDEP("gcm"); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 8f6f567d7474..203e2aaa3c25 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -24,6 +24,7 @@ #include "cifsacl.h" #include <crypto/internal/hash.h> #include <uapi/linux/cifs/cifs_mount.h> +#include "../common/cifsglob.h" #include "../common/smb2pdu.h" #include "smb2pdu.h" #include <linux/filelock.h> @@ -221,9 +222,6 @@ struct session_key { /* crypto hashing related structure/fields, not specific to a sec mech */ struct cifs_secmech { - struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */ - struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */ - struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */ struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */ @@ -536,8 +534,6 @@ struct smb_version_operations { void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *ses, struct TCP_Server_Info *server); - int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *, - bool allocate_crypto); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, @@ -702,12 +698,6 @@ get_rfc1002_length(void *buf) return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } -static inline void -inc_rfc1001_len(void *buf, int count) -{ - be32_add_cpu((__be32 *)buf, count); -} - struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; @@ -740,7 +730,7 @@ struct TCP_Server_Info { bool nosharesock; bool tcp_nodelay; bool terminate; - unsigned int credits; /* send no more requests at once */ + int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ unsigned int max_in_flight; /* max number of requests that were on wire */ @@ -1021,8 +1011,6 @@ compare_mid(__u16 mid, const struct smb_hdr *smb) #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) -#define CIFS_DEFAULT_IOSIZE (1024 * 1024) - /* * Windows only supports a max of 60kb reads and 65535 byte writes. Default to * those values when posix extensions aren't in force. In actuality here, we @@ -2148,30 +2136,20 @@ extern mempool_t cifs_io_request_pool; extern mempool_t cifs_io_subrequest_pool; /* Operations for different SMB versions */ -#define SMB1_VERSION_STRING "1.0" -#define SMB20_VERSION_STRING "2.0" #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; #endif /* CIFS_ALLOW_INSECURE_LEGACY */ -#define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; -#define SMBDEFAULT_VERSION_STRING "default" extern struct smb_version_values smbdefault_values; -#define SMB3ANY_VERSION_STRING "3" extern struct smb_version_values smb3any_values; -#define SMB30_VERSION_STRING "3.0" extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; -#define SMB302_VERSION_STRING "3.02" -#define ALT_SMB302_VERSION_STRING "3.0.2" /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ extern struct smb_version_values smb302_values; -#define SMB311_VERSION_STRING "3.1.1" -#define ALT_SMB311_VERSION_STRING "3.11" extern struct smb_version_operations smb311_operations; extern struct smb_version_values smb311_values; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index e8fba98690ce..3528c365a452 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -9,6 +9,7 @@ #define _CIFSPROTO_H #include <linux/nls.h> #include <linux/ctype.h> +#include "cifsglob.h" #include "trace.h" #ifdef CONFIG_CIFS_DFS_UPCALL #include "dfs_cache.h" @@ -615,6 +616,8 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, extern struct TCP_Server_Info * cifs_find_tcp_session(struct smb3_fs_context *ctx); +struct cifs_tcon *cifs_setup_ipc(struct cifs_ses *ses, bool seal); + void __cifs_put_smb_ses(struct cifs_ses *ses); extern struct cifs_ses * @@ -632,9 +635,13 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const unsigned char *path, char *pbuf, unsigned int *pbytes_written); -int __cifs_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, char *signature, - struct shash_desc *shash); +struct cifs_calc_sig_ctx { + struct md5_ctx *md5; + struct hmac_sha256_ctx *hmac; + struct shash_desc *shash; +}; +int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + char *signature, struct cifs_calc_sig_ctx *ctx); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 2881efcbe09a..7da194f29fef 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1311,6 +1311,8 @@ cifs_readv_callback(struct mid_q_entry *mid) .rreq_debug_id = rdata->rreq->debug_id, .rreq_debug_index = rdata->subreq.debug_index, }; + unsigned int rreq_debug_id = rdata->rreq->debug_id; + unsigned int subreq_debug_index = rdata->subreq.debug_index; cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n", __func__, mid->mid, mid->mid_state, rdata->result, @@ -1374,6 +1376,9 @@ do_retry: __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags); } + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value, + server->credits, server->in_flight, + 0, cifs_trace_rw_credits_read_response_clear); rdata->credits.value = 0; rdata->subreq.error = rdata->result; rdata->subreq.transferred += rdata->got_bytes; @@ -1381,6 +1386,9 @@ do_retry: netfs_read_subreq_terminated(&rdata->subreq); release_mid(mid); add_credits(server, &credits, 0); + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, + server->credits, server->in_flight, + credits.value, cifs_trace_rw_credits_read_response_add); } /* cifs_async_readv - send an async write, and set up mid to handle result */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index dd12f3eb61dc..55cb4b0cbd48 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -310,6 +310,8 @@ cifs_abort_connection(struct TCP_Server_Info *server) server->ssocket->flags); sock_release(server->ssocket); server->ssocket = NULL; + } else if (cifs_rdma_enabled(server)) { + smbd_destroy(server); } server->sequence_number = 0; server->session_estab = false; @@ -338,12 +340,6 @@ cifs_abort_connection(struct TCP_Server_Info *server) mid_execute_callback(mid); release_mid(mid); } - - if (cifs_rdma_enabled(server)) { - cifs_server_lock(server); - smbd_destroy(server); - cifs_server_unlock(server); - } } static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets) @@ -2015,39 +2011,31 @@ static int match_session(struct cifs_ses *ses, /** * cifs_setup_ipc - helper to setup the IPC tcon for the session * @ses: smb session to issue the request on - * @ctx: the superblock configuration context to use for building the - * new tree connection for the IPC (interprocess communication RPC) + * @seal: if encryption is requested * * A new IPC connection is made and stored in the session * tcon_ipc. The IPC tcon has the same lifetime as the session. */ -static int -cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) +struct cifs_tcon *cifs_setup_ipc(struct cifs_ses *ses, bool seal) { int rc = 0, xid; struct cifs_tcon *tcon; char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0}; - bool seal = false; struct TCP_Server_Info *server = ses->server; /* * If the mount request that resulted in the creation of the * session requires encryption, force IPC to be encrypted too. */ - if (ctx->seal) { - if (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) - seal = true; - else { - cifs_server_dbg(VFS, - "IPC: server doesn't support encryption\n"); - return -EOPNOTSUPP; - } + if (seal && !(server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) { + cifs_server_dbg(VFS, "IPC: server doesn't support encryption\n"); + return ERR_PTR(-EOPNOTSUPP); } /* no need to setup directory caching on IPC share, so pass in false */ tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc); if (tcon == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); spin_lock(&server->srv_lock); scnprintf(unc, sizeof(unc), "\\\\%s\\IPC$", server->hostname); @@ -2057,13 +2045,13 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->ses = ses; tcon->ipc = true; tcon->seal = seal; - rc = server->ops->tree_connect(xid, ses, unc, tcon, ctx->local_nls); + rc = server->ops->tree_connect(xid, ses, unc, tcon, ses->local_nls); free_xid(xid); if (rc) { - cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc); + cifs_server_dbg(VFS | ONCE, "failed to connect to IPC (rc=%d)\n", rc); tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail); - goto out; + return ERR_PTR(rc); } cifs_dbg(FYI, "IPC tcon rc=%d ipc tid=0x%x\n", rc, tcon->tid); @@ -2071,9 +2059,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) spin_lock(&tcon->tc_lock); tcon->status = TID_GOOD; spin_unlock(&tcon->tc_lock); - ses->tcon_ipc = tcon; -out: - return rc; + return tcon; } static struct cifs_ses * @@ -2347,6 +2333,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; + struct cifs_tcon *ipc; struct cifs_ses *ses; unsigned int xid; int retries = 0; @@ -2525,7 +2512,12 @@ retry_new_session: list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - cifs_setup_ipc(ses, ctx); + ipc = cifs_setup_ipc(ses, ctx->seal); + spin_lock(&cifs_tcp_ses_lock); + spin_lock(&ses->ses_lock); + ses->tcon_ipc = !IS_ERR(ipc) ? ipc : NULL; + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); free_xid(xid); diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 4dada26d56b5..f2ad0ccd08a7 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1120,24 +1120,63 @@ static bool target_share_equal(struct cifs_tcon *tcon, const char *s1) return match; } -static bool is_ses_good(struct cifs_ses *ses) +static bool is_ses_good(struct cifs_tcon *tcon, struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; - struct cifs_tcon *tcon = ses->tcon_ipc; + struct cifs_tcon *ipc = NULL; bool ret; + spin_lock(&cifs_tcp_ses_lock); spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); + ret = !cifs_chan_needs_reconnect(ses, server) && - ses->ses_status == SES_GOOD && - !tcon->need_reconnect; + ses->ses_status == SES_GOOD; + spin_unlock(&ses->chan_lock); + + if (!ret) + goto out; + + if (likely(ses->tcon_ipc)) { + if (ses->tcon_ipc->need_reconnect) { + ret = false; + goto out; + } + } else { + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + + ipc = cifs_setup_ipc(ses, tcon->seal); + + spin_lock(&cifs_tcp_ses_lock); + spin_lock(&ses->ses_lock); + if (!IS_ERR(ipc)) { + if (!ses->tcon_ipc) { + ses->tcon_ipc = ipc; + ipc = NULL; + } + } else { + ret = false; + ipc = NULL; + } + } + +out: spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + if (ipc && server->ops->tree_disconnect) { + unsigned int xid = get_xid(); + + (void)server->ops->tree_disconnect(xid, ipc); + _free_xid(xid); + } + tconInfoFree(ipc, netfs_trace_tcon_ref_free_ipc); return ret; } /* Refresh dfs referral of @ses */ -static void refresh_ses_referral(struct cifs_ses *ses) +static void refresh_ses_referral(struct cifs_tcon *tcon, struct cifs_ses *ses) { struct cache_entry *ce; unsigned int xid; @@ -1153,7 +1192,7 @@ static void refresh_ses_referral(struct cifs_ses *ses) } ses = CIFS_DFS_ROOT_SES(ses); - if (!is_ses_good(ses)) { + if (!is_ses_good(tcon, ses)) { cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", __func__); goto out; @@ -1241,7 +1280,7 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) up_read(&htable_rw_lock); ses = CIFS_DFS_ROOT_SES(ses); - if (!is_ses_good(ses)) { + if (!is_ses_good(tcon, ses)) { cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", __func__); goto out; @@ -1309,7 +1348,7 @@ void dfs_cache_refresh(struct work_struct *work) tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work); list_for_each_entry(ses, &tcon->dfs_ses_list, dlist) - refresh_ses_referral(ses); + refresh_ses_referral(tcon, ses); refresh_tcon_referral(tcon, false); queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index e60927b2a7c8..0f894d09157b 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1435,12 +1435,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_errorf(fc, "Unknown error parsing devname\n"); goto cifs_parse_mount_err; } + kfree(ctx->source); ctx->source = smb3_fs_context_fullpath(ctx, '/'); if (IS_ERR(ctx->source)) { ctx->source = NULL; cifs_errorf(fc, "OOM when copying UNC string\n"); goto cifs_parse_mount_err; } + kfree(fc->source); fc->source = kstrdup(ctx->source, GFP_KERNEL); if (fc->source == NULL) { cifs_errorf(fc, "OOM when copying UNC string\n"); @@ -1468,7 +1470,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, break; } - if (strnlen(param->string, CIFS_MAX_USERNAME_LEN) > + if (strnlen(param->string, CIFS_MAX_USERNAME_LEN) == CIFS_MAX_USERNAME_LEN) { pr_warn("username too long\n"); goto cifs_parse_mount_err; diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 239dd84a336f..cac355364e43 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2431,8 +2431,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, tcon = tlink_tcon(tlink); server = tcon->ses->server; - if (!server->ops->rename) - return -ENOSYS; + if (!server->ops->rename) { + rc = -ENOSYS; + goto do_rename_exit; + } /* try path-based rename first */ rc = server->ops->rename(xid, tcon, from_dentry, @@ -2482,11 +2484,8 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ do_rename_exit: - if (rc == 0) { + if (rc == 0) d_move(from_dentry, to_dentry); - /* Force a new lookup */ - d_drop(from_dentry); - } cifs_put_tlink(tlink); return rc; } diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index fe80e711cd75..70f3c0c67eeb 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -5,6 +5,7 @@ * Author(s): Steve French (sfrench@us.ibm.com) * */ +#include <crypto/md5.h> #include <linux/fs.h> #include <linux/stat.h> #include <linux/slab.h> @@ -37,23 +38,6 @@ #define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) md5_hash static int -symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) -{ - int rc; - struct shash_desc *md5 = NULL; - - rc = cifs_alloc_hash("md5", &md5); - if (rc) - return rc; - - rc = crypto_shash_digest(md5, link_str, link_len, md5_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); - cifs_free_hash(&md5); - return rc; -} - -static int parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, char **_link_str) { @@ -77,11 +61,7 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) return -EINVAL; - rc = symlink_hash(link_len, link_str, md5_hash); - if (rc) { - cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); - return rc; - } + md5(link_str, link_len, md5_hash); scnprintf(md5_str2, sizeof(md5_str2), CIFS_MF_SYMLINK_MD5_FORMAT, @@ -103,7 +83,6 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, static int format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str) { - int rc; unsigned int link_len; unsigned int ofs; u8 md5_hash[16]; @@ -116,11 +95,7 @@ format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str) if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) return -ENAMETOOLONG; - rc = symlink_hash(link_len, link_str, md5_hash); - if (rc) { - cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); - return rc; - } + md5(link_str, link_len, md5_hash); scnprintf(buf, buf_len, CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index dda6dece802a..e10123d8cd7d 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -916,6 +916,14 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, char *data_end; struct dfs_referral_level_3 *ref; + if (rsp_size < sizeof(*rsp)) { + cifs_dbg(VFS | ONCE, + "%s: header is malformed (size is %u, must be %zu)\n", + __func__, rsp_size, sizeof(*rsp)); + rc = -EINVAL; + goto parse_DFS_referrals_exit; + } + *num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals); if (*num_of_nodes < 1) { @@ -925,6 +933,15 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, goto parse_DFS_referrals_exit; } + if (sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3) > rsp_size) { + cifs_dbg(VFS | ONCE, + "%s: malformed buffer (size is %u, must be at least %zu)\n", + __func__, rsp_size, + sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3)); + rc = -EINVAL; + goto parse_DFS_referrals_exit; + } + ref = (struct dfs_referral_level_3 *) &(rsp->referrals); if (ref->VersionNumber != cpu_to_le16(3)) { cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n", diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 0a8c2fcc9ded..ef3b498b0a02 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -584,7 +584,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, * to sign packets before we generate the channel signing key * (we sign with the session key) */ - rc = smb311_crypto_shash_allocate(chan->server); + rc = smb3_crypto_shash_allocate(chan->server); if (rc) { cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); mutex_unlock(&ses->session_mutex); diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 09e3fc81d7cb..69cb81fa0d3a 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1294,6 +1294,8 @@ static int smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb); if (smb2_to_name == NULL) { rc = -ENOMEM; + if (cfile) + cifsFileInfo_put(cfile); goto smb2_rename_path; } in_iov.iov_base = smb2_to_name; diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 89d933b4a8bc..96bfe4c63ccf 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -7,6 +7,7 @@ * Pavel Shilovsky (pshilovsky@samba.org) 2012 * */ +#include <crypto/sha2.h> #include <linux/ctype.h> #include "cifsglob.h" #include "cifsproto.h" @@ -888,13 +889,13 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve * @iov: array containing the SMB request we will send to the server * @nvec: number of array entries for the iov */ -int +void smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, struct kvec *iov, int nvec) { - int i, rc; + int i; struct smb2_hdr *hdr; - struct shash_desc *sha512 = NULL; + struct sha512_ctx sha_ctx; hdr = (struct smb2_hdr *)iov[0].iov_base; /* neg prot are always taken */ @@ -907,52 +908,22 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, * and we can test it. Preauth requires 3.1.1 for now. */ if (server->dialect != SMB311_PROT_ID) - return 0; + return; if (hdr->Command != SMB2_SESSION_SETUP) - return 0; + return; /* skip last sess setup response */ if ((hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR) && (hdr->Status == NT_STATUS_OK || (hdr->Status != cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED)))) - return 0; + return; ok: - rc = smb311_crypto_shash_allocate(server); - if (rc) - return rc; - - sha512 = server->secmech.sha512; - rc = crypto_shash_init(sha512); - if (rc) { - cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__); - return rc; - } - - rc = crypto_shash_update(sha512, ses->preauth_sha_hash, - SMB2_PREAUTH_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__); - return rc; - } - - for (i = 0; i < nvec; i++) { - rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update sha512 shash\n", - __func__); - return rc; - } - } - - rc = crypto_shash_final(sha512, ses->preauth_sha_hash); - if (rc) { - cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n", - __func__); - return rc; - } - - return 0; + sha512_init(&sha_ctx); + sha512_update(&sha_ctx, ses->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE); + for (i = 0; i < nvec; i++) + sha512_update(&sha_ctx, iov[i].iov_base, iov[i].iov_len); + sha512_final(&sha_ctx, ses->preauth_sha_hash); } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7c392cf5940b..1e39f2165e42 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2799,11 +2799,12 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; int rc; __le16 *utf16_path; - struct cached_fid *cfid = NULL; + struct cached_fid *cfid; int retries = 0, cur_sleep = 1; replay_again: /* reinitialize for possible replay */ + cfid = NULL; flags = CIFS_CP_CREATE_CLOSE_OP; oplock = SMB2_OPLOCK_LEVEL_NONE; server = cifs_pick_channel(ses); @@ -3212,8 +3213,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; - free_xid(xid); - return ERR_PTR(rc); + goto put_tlink; } oparms = (struct cifs_open_parms) { @@ -3245,6 +3245,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } +put_tlink: cifs_put_tlink(tlink); free_xid(xid); @@ -3285,8 +3286,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; - free_xid(xid); - return rc; + goto put_tlink; } oparms = (struct cifs_open_parms) { @@ -3307,6 +3307,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } +put_tlink: cifs_put_tlink(tlink); free_xid(xid); return rc; @@ -5446,7 +5447,6 @@ struct smb_version_operations smb20_operations = { .get_lease_key = smb2_get_lease_key, .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, - .calc_signature = smb2_calc_signature, .is_read_op = smb2_is_read_op, .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, @@ -5550,7 +5550,6 @@ struct smb_version_operations smb21_operations = { .get_lease_key = smb2_get_lease_key, .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, - .calc_signature = smb2_calc_signature, .is_read_op = smb21_is_read_op, .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, @@ -5660,7 +5659,6 @@ struct smb_version_operations smb30_operations = { .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb30signingkey, - .calc_signature = smb3_calc_signature, .set_integrity = smb3_set_integrity, .is_read_op = smb21_is_read_op, .set_oplock_level = smb3_set_oplock_level, @@ -5777,7 +5775,6 @@ struct smb_version_operations smb311_operations = { .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb311signingkey, - .calc_signature = smb3_calc_signature, .set_integrity = smb3_set_integrity, .is_read_op = smb21_is_read_op, .set_oplock_level = smb3_set_oplock_level, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index b0739a2661bf..8b4a4573e9c3 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4054,9 +4054,12 @@ replay_again: smb_rsp = (struct smb2_change_notify_rsp *)rsp_iov.iov_base; - smb2_validate_iov(le16_to_cpu(smb_rsp->OutputBufferOffset), - le32_to_cpu(smb_rsp->OutputBufferLength), &rsp_iov, + rc = smb2_validate_iov(le16_to_cpu(smb_rsp->OutputBufferOffset), + le32_to_cpu(smb_rsp->OutputBufferLength), + &rsp_iov, sizeof(struct file_notify_information)); + if (rc) + goto cnotify_exit; *out_data = kmemdup((char *)smb_rsp + le16_to_cpu(smb_rsp->OutputBufferOffset), le32_to_cpu(smb_rsp->OutputBufferLength), GFP_KERNEL); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index b3f1398c9f79..5241daaae543 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -39,12 +39,6 @@ extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid); -extern int smb2_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, - bool allocate_crypto); -extern int smb3_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, - bool allocate_crypto); extern void smb2_echo_request(struct work_struct *work); extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); extern bool smb2_is_valid_oplock_break(char *buffer, @@ -295,10 +289,10 @@ extern int smb2_validate_and_copy_iov(unsigned int offset, extern void smb2_copy_fs_info_to_kstatfs( struct smb2_fs_full_size_info *pfs_inf, struct kstatfs *kst); -extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); -extern int smb311_update_preauth_hash(struct cifs_ses *ses, - struct TCP_Server_Info *server, - struct kvec *iov, int nvec); +extern int smb3_crypto_shash_allocate(struct TCP_Server_Info *server); +extern void smb311_update_preauth_hash(struct cifs_ses *ses, + struct TCP_Server_Info *server, + struct kvec *iov, int nvec); extern int smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, const char *path, u32 desired_access, diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 33f33013b392..6a9b80385b86 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -19,6 +19,7 @@ #include <linux/mempool.h> #include <linux/highmem.h> #include <crypto/aead.h> +#include <crypto/sha2.h> #include "cifsglob.h" #include "cifsproto.h" #include "smb2proto.h" @@ -26,53 +27,14 @@ #include "../common/smb2status.h" #include "smb2glob.h" -static int +int smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; - int rc; - rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256); - if (rc) - goto err; - - rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac); - if (rc) - goto err; - - return 0; -err: - cifs_free_hash(&p->hmacsha256); - return rc; + return cifs_alloc_hash("cmac(aes)", &p->aes_cmac); } -int -smb311_crypto_shash_allocate(struct TCP_Server_Info *server) -{ - struct cifs_secmech *p = &server->secmech; - int rc = 0; - - rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256); - if (rc) - return rc; - - rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac); - if (rc) - goto err; - - rc = cifs_alloc_hash("sha512", &p->sha512); - if (rc) - goto err; - - return 0; - -err: - cifs_free_hash(&p->aes_cmac); - cifs_free_hash(&p->hmacsha256); - return rc; -} - - static int smb3_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { @@ -247,16 +209,15 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) return tcon; } -int +static int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, - bool allocate_crypto) + bool allocate_crypto) { int rc; unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; - unsigned char *sigptr = smb2_signature; struct kvec *iov = rqst->rq_iov; struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; - struct shash_desc *shash = NULL; + struct hmac_sha256_ctx hmac_ctx; struct smb_rqst drqst; __u64 sid = le64_to_cpu(shdr->SessionId); u8 key[SMB2_NTLMV2_SESSKEY_SIZE]; @@ -271,30 +232,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); - if (allocate_crypto) { - rc = cifs_alloc_hash("hmac(sha256)", &shash); - if (rc) { - cifs_server_dbg(VFS, - "%s: sha256 alloc failed\n", __func__); - goto out; - } - } else { - shash = server->secmech.hmacsha256; - } - - rc = crypto_shash_setkey(shash->tfm, key, sizeof(key)); - if (rc) { - cifs_server_dbg(VFS, - "%s: Could not update with response\n", - __func__); - goto out; - } - - rc = crypto_shash_init(shash); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not init sha256", __func__); - goto out; - } + hmac_sha256_init_usingrawkey(&hmac_ctx, key, sizeof(key)); /* * For SMB2+, __cifs_calc_signature() expects to sign only the actual @@ -305,25 +243,17 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, */ drqst = *rqst; if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { - rc = crypto_shash_update(shash, iov[0].iov_base, - iov[0].iov_len); - if (rc) { - cifs_server_dbg(VFS, - "%s: Could not update with payload\n", - __func__); - goto out; - } + hmac_sha256_update(&hmac_ctx, iov[0].iov_base, iov[0].iov_len); drqst.rq_iov++; drqst.rq_nvec--; } - rc = __cifs_calc_signature(&drqst, server, sigptr, shash); + rc = __cifs_calc_signature( + &drqst, server, smb2_signature, + &(struct cifs_calc_sig_ctx){ .hmac = &hmac_ctx }); if (!rc) - memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); + memcpy(shdr->Signature, smb2_signature, SMB2_SIGNATURE_SIZE); -out: - if (allocate_crypto) - cifs_free_hash(&shash); return rc; } @@ -336,8 +266,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, __u8 L256[4] = {0, 0, 1, 0}; int rc = 0; unsigned char prfhash[SMB2_HMACSHA256_SIZE]; - unsigned char *hashptr = prfhash; struct TCP_Server_Info *server = ses->server; + struct hmac_sha256_ctx hmac_ctx; memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); memset(key, 0x0, key_size); @@ -345,67 +275,26 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, rc = smb3_crypto_shash_allocate(server); if (rc) { cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_setkey(server->secmech.hmacsha256->tfm, - ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not set with session key\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_init(server->secmech.hmacsha256); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not init sign hmac\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, i, 4); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with n\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, label.iov_base, label.iov_len); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with label\n", __func__); - goto smb3signkey_ret; + return rc; } - rc = crypto_shash_update(server->secmech.hmacsha256, &zero, 1); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with zero\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, context.iov_base, context.iov_len); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with context\n", __func__); - goto smb3signkey_ret; - } + hmac_sha256_init_usingrawkey(&hmac_ctx, ses->auth_key.response, + SMB2_NTLMV2_SESSKEY_SIZE); + hmac_sha256_update(&hmac_ctx, i, 4); + hmac_sha256_update(&hmac_ctx, label.iov_base, label.iov_len); + hmac_sha256_update(&hmac_ctx, &zero, 1); + hmac_sha256_update(&hmac_ctx, context.iov_base, context.iov_len); if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) { - rc = crypto_shash_update(server->secmech.hmacsha256, L256, 4); + hmac_sha256_update(&hmac_ctx, L256, 4); } else { - rc = crypto_shash_update(server->secmech.hmacsha256, L128, 4); - } - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__); - goto smb3signkey_ret; + hmac_sha256_update(&hmac_ctx, L128, 4); } + hmac_sha256_final(&hmac_ctx, prfhash); - rc = crypto_shash_final(server->secmech.hmacsha256, hashptr); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); - goto smb3signkey_ret; - } - - memcpy(key, hashptr, key_size); - -smb3signkey_ret: - return rc; + memcpy(key, prfhash, key_size); + return 0; } struct derivation { @@ -576,19 +465,21 @@ generate_smb311signingkey(struct cifs_ses *ses, return generate_smb3signingkey(ses, server, &triplet); } -int +static int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, - bool allocate_crypto) + bool allocate_crypto) { int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; - unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; struct shash_desc *shash = NULL; struct smb_rqst drqst; u8 key[SMB3_SIGN_KEY_SIZE]; + if (server->vals->protocol_id <= SMB21_PROT_ID) + return smb2_calc_signature(rqst, server, allocate_crypto); + rc = smb3_get_sign_key(le64_to_cpu(shdr->SessionId), server, key); if (unlikely(rc)) { cifs_server_dbg(FYI, "%s: Could not get signing key\n", __func__); @@ -643,9 +534,11 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, drqst.rq_nvec--; } - rc = __cifs_calc_signature(&drqst, server, sigptr, shash); + rc = __cifs_calc_signature( + &drqst, server, smb3_signature, + &(struct cifs_calc_sig_ctx){ .shash = shash }); if (!rc) - memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); + memcpy(shdr->Signature, smb3_signature, SMB2_SIGNATURE_SIZE); out: if (allocate_crypto) @@ -657,7 +550,6 @@ out: static int smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - int rc = 0; struct smb2_hdr *shdr; struct smb2_sess_setup_req *ssr; bool is_binding; @@ -684,9 +576,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) return 0; } - rc = server->ops->calc_signature(rqst, server, false); - - return rc; + return smb3_calc_signature(rqst, server, false); } int @@ -722,7 +612,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(shdr->Signature, 0, SMB2_SIGNATURE_SIZE); - rc = server->ops->calc_signature(rqst, server, true); + rc = smb3_calc_signature(rqst, server, true); if (rc) return rc; diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 316f398c70f4..c6c428c2e08d 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -172,6 +172,7 @@ static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc) * in order to notice the broken connection. */ wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.lcredits.wait_queue); wake_up_all(&sc->send_io.credits.wait_queue); wake_up_all(&sc->send_io.pending.dec_wait_queue); wake_up_all(&sc->send_io.pending.zero_wait_queue); @@ -289,6 +290,9 @@ static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) break; case SMBDIRECT_SOCKET_CREATED: + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; + case SMBDIRECT_SOCKET_CONNECTED: sc->status = SMBDIRECT_SOCKET_ERROR; break; @@ -495,6 +499,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) struct smbdirect_send_io *request = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); struct smbdirect_socket *sc = request->socket; + int lcredits = 0; log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n", request, ib_wc_status_msg(wc->status)); @@ -504,22 +509,24 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); + mempool_free(request, sc->send_io.mem.pool); + lcredits += 1; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { if (wc->status != IB_WC_WR_FLUSH_ERR) log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", ib_wc_status_msg(wc->status), wc->opcode); - mempool_free(request, sc->send_io.mem.pool); smbd_disconnect_rdma_connection(sc); return; } + atomic_add(lcredits, &sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); wake_up(&sc->send_io.pending.dec_wait_queue); - - mempool_free(request, sc->send_io.mem.pool); } static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) @@ -567,6 +574,7 @@ static bool process_negotiation_response( log_rdma_event(ERR, "error: credits_granted==0\n"); return false; } + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted)); if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { @@ -1114,6 +1122,24 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, struct smbdirect_data_transfer *packet; int new_credits = 0; +wait_lcredit: + /* Wait for local send credits */ + rc = wait_event_interruptible(sc->send_io.lcredits.wait_queue, + atomic_read(&sc->send_io.lcredits.count) > 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + if (rc) + goto err_wait_lcredit; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + log_outgoing(ERR, "disconnected not sending on wait_credit\n"); + rc = -EAGAIN; + goto err_wait_lcredit; + } + if (unlikely(atomic_dec_return(&sc->send_io.lcredits.count) < 0)) { + atomic_inc(&sc->send_io.lcredits.count); + goto wait_lcredit; + } + wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(sc->send_io.credits.wait_queue, @@ -1132,23 +1158,6 @@ wait_credit: goto wait_credit; } -wait_send_queue: - wait_event(sc->send_io.pending.dec_wait_queue, - atomic_read(&sc->send_io.pending.count) < sp->send_credit_target || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); - rc = -EAGAIN; - goto err_wait_send_queue; - } - - if (unlikely(atomic_inc_return(&sc->send_io.pending.count) > - sp->send_credit_target)) { - atomic_dec(&sc->send_io.pending.count); - goto wait_send_queue; - } - request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); if (!request) { rc = -ENOMEM; @@ -1229,10 +1238,21 @@ wait_send_queue: le32_to_cpu(packet->data_length), le32_to_cpu(packet->remaining_data_length)); + /* + * Now that we got a local and a remote credit + * we add us as pending + */ + atomic_inc(&sc->send_io.pending.count); + rc = smbd_post_send(sc, request); if (!rc) return 0; + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); + + wake_up(&sc->send_io.pending.dec_wait_queue); + err_dma: for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) @@ -1246,14 +1266,14 @@ err_dma: atomic_sub(new_credits, &sc->recv_io.credits.count); err_alloc: - if (atomic_dec_and_test(&sc->send_io.pending.count)) - wake_up(&sc->send_io.pending.zero_wait_queue); - -err_wait_send_queue: - /* roll back send credits and pending */ atomic_inc(&sc->send_io.credits.count); + wake_up(&sc->send_io.credits.wait_queue); err_wait_credit: + atomic_inc(&sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + +err_wait_lcredit: return rc; } @@ -1575,12 +1595,12 @@ void smbd_destroy(struct TCP_Server_Info *server) disable_work_sync(&sc->disconnect_work); log_rdma_event(INFO, "destroying rdma session\n"); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) smbd_disconnect_rdma_work(&sc->disconnect_work); + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { log_rdma_event(INFO, "wait for transport being disconnected\n"); - wait_event_interruptible( - sc->status_wait, - sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + log_rdma_event(INFO, "waited for transport being disconnected\n"); } /* @@ -1624,19 +1644,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "free receive buffers\n"); destroy_receive_buffers(sc); - /* - * For performance reasons, memory registration and deregistration - * are not locked by srv_mutex. It is possible some processes are - * blocked on transport srv_mutex while holding memory registration. - * Release the transport srv_mutex to allow them to hit the failure - * path when sending data, and then release memory registrations. - */ log_rdma_event(INFO, "freeing mr list\n"); - while (atomic_read(&sc->mr_io.used.count)) { - cifs_server_unlock(server); - msleep(1000); - cifs_server_lock(server); - } destroy_mr_list(sc); ib_free_cq(sc->ib.send_cq); @@ -1779,6 +1787,7 @@ static struct smbd_connection *_smbd_get_connection( struct smbdirect_socket *sc; struct smbdirect_socket_parameters *sp; struct rdma_conn_param conn_param; + struct ib_qp_cap qp_cap; struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; struct ib_port_immutable port_immutable; @@ -1850,6 +1859,25 @@ static struct smbd_connection *_smbd_get_connection( goto config_failed; } + sp->responder_resources = + min_t(u8, sp->responder_resources, + sc->ib.dev->attrs.max_qp_rd_atom); + log_rdma_mr(INFO, "responder_resources=%d\n", + sp->responder_resources); + + /* + * We use allocate sp->responder_resources * 2 MRs + * and each MR needs WRs for REG and INV, so + * we use '* 4'. + * + * +1 for ib_drain_qp() + */ + memset(&qp_cap, 0, sizeof(qp_cap)); + qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; + qp_cap.max_recv_wr = sp->recv_credit_max + 1; + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); if (IS_ERR(sc->ib.pd)) { rc = PTR_ERR(sc->ib.pd); @@ -1860,7 +1888,7 @@ static struct smbd_connection *_smbd_get_connection( sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->send_credit_target, IB_POLL_SOFTIRQ); + qp_cap.max_send_wr, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.send_cq)) { sc->ib.send_cq = NULL; goto alloc_cq_failed; @@ -1868,7 +1896,7 @@ static struct smbd_connection *_smbd_get_connection( sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->recv_credit_max, IB_POLL_SOFTIRQ); + qp_cap.max_recv_wr, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.recv_cq)) { sc->ib.recv_cq = NULL; goto alloc_cq_failed; @@ -1877,11 +1905,7 @@ static struct smbd_connection *_smbd_get_connection( memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smbd_qp_async_error_upcall; qp_attr.qp_context = sc; - qp_attr.cap.max_send_wr = sp->send_credit_target; - qp_attr.cap.max_recv_wr = sp->recv_credit_max; - qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - qp_attr.cap.max_inline_data = 0; + qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = sc->ib.send_cq; @@ -1895,12 +1919,6 @@ static struct smbd_connection *_smbd_get_connection( } sc->ib.qp = sc->rdma.cm_id->qp; - sp->responder_resources = - min_t(u8, sp->responder_resources, - sc->ib.dev->attrs.max_qp_rd_atom); - log_rdma_mr(INFO, "responder_resources=%d\n", - sp->responder_resources); - memset(&conn_param, 0, sizeof(conn_param)); conn_param.initiator_depth = sp->initiator_depth; conn_param.responder_resources = sp->responder_resources; @@ -2352,18 +2370,84 @@ static void smbd_mr_recovery_work(struct work_struct *work) } } +static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr) +{ + struct smbdirect_socket *sc = mr->socket; + + lockdep_assert_held(&mr->mutex); + + if (mr->state == SMBDIRECT_MR_DISABLED) + return; + + if (mr->mr) + ib_dereg_mr(mr->mr); + if (mr->sgt.nents) + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + kfree(mr->sgt.sgl); + + mr->mr = NULL; + mr->sgt.sgl = NULL; + mr->sgt.nents = 0; + + mr->state = SMBDIRECT_MR_DISABLED; +} + +static void smbd_mr_free_locked(struct kref *kref) +{ + struct smbdirect_mr_io *mr = + container_of(kref, struct smbdirect_mr_io, kref); + + lockdep_assert_held(&mr->mutex); + + /* + * smbd_mr_disable_locked() should already be called! + */ + if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) + smbd_mr_disable_locked(mr); + + mutex_unlock(&mr->mutex); + mutex_destroy(&mr->mutex); + kfree(mr); +} + static void destroy_mr_list(struct smbdirect_socket *sc) { struct smbdirect_mr_io *mr, *tmp; + LIST_HEAD(all_list); + unsigned long flags; disable_work_sync(&sc->mr_io.recovery_work); - list_for_each_entry_safe(mr, tmp, &sc->mr_io.all.list, list) { - if (mr->state == SMBDIRECT_MR_INVALIDATED) - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, - mr->sgt.nents, mr->dir); - ib_dereg_mr(mr->mr); - kfree(mr->sgt.sgl); - kfree(mr); + + spin_lock_irqsave(&sc->mr_io.all.lock, flags); + list_splice_tail_init(&sc->mr_io.all.list, &all_list); + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); + + list_for_each_entry_safe(mr, tmp, &all_list, list) { + mutex_lock(&mr->mutex); + + smbd_mr_disable_locked(mr); + list_del(&mr->list); + mr->socket = NULL; + + /* + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock. + * + * If the mr is still registered it will + * be dangling (detached from the connection + * waiting for smbd_deregister_mr() to be + * called in order to free the memory. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); } } @@ -2377,10 +2461,9 @@ static void destroy_mr_list(struct smbdirect_socket *sc) static int allocate_mr_list(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; - int i; - struct smbdirect_mr_io *smbdirect_mr, *tmp; - - INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); + struct smbdirect_mr_io *mr; + int ret; + u32 i; if (sp->responder_resources == 0) { log_rdma_mr(ERR, "responder_resources negotiated as 0\n"); @@ -2389,42 +2472,52 @@ static int allocate_mr_list(struct smbdirect_socket *sc) /* Allocate more MRs (2x) than hardware responder_resources */ for (i = 0; i < sp->responder_resources * 2; i++) { - smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); - if (!smbdirect_mr) - goto cleanup_entries; - smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type, - sp->max_frmr_depth); - if (IS_ERR(smbdirect_mr->mr)) { + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + ret = -ENOMEM; + goto kzalloc_mr_failed; + } + + kref_init(&mr->kref); + mutex_init(&mr->mutex); + + mr->mr = ib_alloc_mr(sc->ib.pd, + sc->mr_io.type, + sp->max_frmr_depth); + if (IS_ERR(mr->mr)) { + ret = PTR_ERR(mr->mr); log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", sc->mr_io.type, sp->max_frmr_depth); - goto out; + goto ib_alloc_mr_failed; } - smbdirect_mr->sgt.sgl = kcalloc(sp->max_frmr_depth, - sizeof(struct scatterlist), - GFP_KERNEL); - if (!smbdirect_mr->sgt.sgl) { + + mr->sgt.sgl = kcalloc(sp->max_frmr_depth, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!mr->sgt.sgl) { + ret = -ENOMEM; log_rdma_mr(ERR, "failed to allocate sgl\n"); - ib_dereg_mr(smbdirect_mr->mr); - goto out; + goto kcalloc_sgl_failed; } - smbdirect_mr->state = SMBDIRECT_MR_READY; - smbdirect_mr->socket = sc; + mr->state = SMBDIRECT_MR_READY; + mr->socket = sc; - list_add_tail(&smbdirect_mr->list, &sc->mr_io.all.list); + list_add_tail(&mr->list, &sc->mr_io.all.list); atomic_inc(&sc->mr_io.ready.count); } + + INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); + return 0; -out: - kfree(smbdirect_mr); -cleanup_entries: - list_for_each_entry_safe(smbdirect_mr, tmp, &sc->mr_io.all.list, list) { - list_del(&smbdirect_mr->list); - ib_dereg_mr(smbdirect_mr->mr); - kfree(smbdirect_mr->sgt.sgl); - kfree(smbdirect_mr); - } - return -ENOMEM; +kcalloc_sgl_failed: + ib_dereg_mr(mr->mr); +ib_alloc_mr_failed: + mutex_destroy(&mr->mutex); + kfree(mr); +kzalloc_mr_failed: + destroy_mr_list(sc); + return ret; } /* @@ -2458,6 +2551,7 @@ again: list_for_each_entry(ret, &sc->mr_io.all.list, list) { if (ret->state == SMBDIRECT_MR_READY) { ret->state = SMBDIRECT_MR_REGISTERED; + kref_get(&ret->kref); spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); atomic_dec(&sc->mr_io.ready.count); atomic_inc(&sc->mr_io.used.count); @@ -2504,9 +2598,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, { struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_mr_io *smbdirect_mr; + struct smbdirect_mr_io *mr; int rc, num_pages; - enum dma_data_direction dir; struct ib_reg_wr *reg_wr; num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); @@ -2517,49 +2610,47 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, return NULL; } - smbdirect_mr = get_mr(sc); - if (!smbdirect_mr) { + mr = get_mr(sc); + if (!mr) { log_rdma_mr(ERR, "get_mr returning NULL\n"); return NULL; } - dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - smbdirect_mr->dir = dir; - smbdirect_mr->need_invalidate = need_invalidate; - smbdirect_mr->sgt.nents = 0; - smbdirect_mr->sgt.orig_nents = 0; + mutex_lock(&mr->mutex); + + mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + mr->need_invalidate = need_invalidate; + mr->sgt.nents = 0; + mr->sgt.orig_nents = 0; log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", num_pages, iov_iter_count(iter), sp->max_frmr_depth); - smbd_iter_to_mr(iter, &smbdirect_mr->sgt, sp->max_frmr_depth); + smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth); - rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, dir); + rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); if (!rc) { log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", - num_pages, dir, rc); + num_pages, mr->dir, rc); goto dma_map_error; } - rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, NULL, PAGE_SIZE); - if (rc != smbdirect_mr->sgt.nents) { + rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); + if (rc != mr->sgt.nents) { log_rdma_mr(ERR, - "ib_map_mr_sg failed rc = %d nents = %x\n", - rc, smbdirect_mr->sgt.nents); + "ib_map_mr_sg failed rc = %d nents = %x\n", + rc, mr->sgt.nents); goto map_mr_error; } - ib_update_fast_reg_key(smbdirect_mr->mr, - ib_inc_rkey(smbdirect_mr->mr->rkey)); - reg_wr = &smbdirect_mr->wr; + ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); + reg_wr = &mr->wr; reg_wr->wr.opcode = IB_WR_REG_MR; - smbdirect_mr->cqe.done = register_mr_done; - reg_wr->wr.wr_cqe = &smbdirect_mr->cqe; + mr->cqe.done = register_mr_done; + reg_wr->wr.wr_cqe = &mr->cqe; reg_wr->wr.num_sge = 0; reg_wr->wr.send_flags = IB_SEND_SIGNALED; - reg_wr->mr = smbdirect_mr->mr; - reg_wr->key = smbdirect_mr->mr->rkey; + reg_wr->mr = mr->mr; + reg_wr->key = mr->mr->rkey; reg_wr->access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; @@ -2570,24 +2661,51 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, * on the next ib_post_send when we actually send I/O to remote peer */ rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); - if (!rc) - return smbdirect_mr; + if (!rc) { + /* + * get_mr() gave us a reference + * via kref_get(&mr->kref), we keep that and let + * the caller use smbd_deregister_mr() + * to remove it again. + */ + mutex_unlock(&mr->mutex); + return mr; + } log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", rc, reg_wr->key); /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ map_mr_error: - ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, smbdirect_mr->dir); + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); dma_map_error: - smbdirect_mr->state = SMBDIRECT_MR_ERROR; + mr->sgt.nents = 0; + mr->state = SMBDIRECT_MR_ERROR; if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); smbd_disconnect_rdma_connection(sc); + /* + * get_mr() gave us a reference + * via kref_get(&mr->kref), we need to remove it again + * on error. + * + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); + return NULL; } @@ -2612,44 +2730,55 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) +void smbd_deregister_mr(struct smbdirect_mr_io *mr) { - struct ib_send_wr *wr; - struct smbdirect_socket *sc = smbdirect_mr->socket; - int rc = 0; + struct smbdirect_socket *sc = mr->socket; + + mutex_lock(&mr->mutex); + if (mr->state == SMBDIRECT_MR_DISABLED) + goto put_kref; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbd_mr_disable_locked(mr); + goto put_kref; + } + + if (mr->need_invalidate) { + struct ib_send_wr *wr = &mr->inv_wr; + int rc; - if (smbdirect_mr->need_invalidate) { /* Need to finish local invalidation before returning */ - wr = &smbdirect_mr->inv_wr; wr->opcode = IB_WR_LOCAL_INV; - smbdirect_mr->cqe.done = local_inv_done; - wr->wr_cqe = &smbdirect_mr->cqe; + mr->cqe.done = local_inv_done; + wr->wr_cqe = &mr->cqe; wr->num_sge = 0; - wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey; + wr->ex.invalidate_rkey = mr->mr->rkey; wr->send_flags = IB_SEND_SIGNALED; - init_completion(&smbdirect_mr->invalidate_done); + init_completion(&mr->invalidate_done); rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); + smbd_mr_disable_locked(mr); smbd_disconnect_rdma_connection(sc); goto done; } - wait_for_completion(&smbdirect_mr->invalidate_done); - smbdirect_mr->need_invalidate = false; + wait_for_completion(&mr->invalidate_done); + mr->need_invalidate = false; } else /* * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED * and defer to mr_recovery_work to recover the MR for next use */ - smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; + mr->state = SMBDIRECT_MR_INVALIDATED; - if (smbdirect_mr->state == SMBDIRECT_MR_INVALIDATED) { - ib_dma_unmap_sg( - sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, - smbdirect_mr->dir); - smbdirect_mr->state = SMBDIRECT_MR_READY; + if (mr->sgt.nents) { + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + mr->sgt.nents = 0; + } + + if (mr->state == SMBDIRECT_MR_INVALIDATED) { + mr->state = SMBDIRECT_MR_READY; if (atomic_inc_return(&sc->mr_io.ready.count) == 1) wake_up(&sc->mr_io.ready.wait_queue); } else @@ -2663,7 +2792,23 @@ done: if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); - return rc; +put_kref: + /* + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock + * and keep the mr in SMBDIRECT_MR_READY or + * SMBDIRECT_MR_ERROR state. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); } static bool smb_set_sge(struct smb_extract_to_rdma *rdma, diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index d67ac5ddaff4..577d37dbeb8a 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -60,7 +60,7 @@ int smbd_send(struct TCP_Server_Info *server, struct smbdirect_mr_io *smbd_register_mr( struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate); -int smbd_deregister_mr(struct smbdirect_mr_io *mr); +void smbd_deregister_mr(struct smbdirect_mr_io *mr); #else #define cifs_rdma_enabled(server) 0 diff --git a/fs/smb/client/trace.c b/fs/smb/client/trace.c index 465483787193..16b0e719731f 100644 --- a/fs/smb/client/trace.c +++ b/fs/smb/client/trace.c @@ -4,5 +4,6 @@ * * Author(s): Steve French <stfrench@microsoft.com> */ +#include "cifsglob.h" #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 051cd9dbba13..915cedde5d66 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -830,7 +830,7 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) if (!server || server->terminate) continue; - if (CIFS_CHAN_NEEDS_RECONNECT(ses, i)) + if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur)) continue; /* diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index b88fa04f5792..029910d56c22 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -178,7 +178,6 @@ static int cifs_xattr_set(const struct xattr_handler *handler, memcpy(pacl, value, size); if (pTcon->ses->server->ops->set_acl) { int aclflags = 0; - rc = 0; switch (handler->flags) { case XATTR_CIFS_NTSD_FULL: diff --git a/fs/smb/common/cifsglob.h b/fs/smb/common/cifsglob.h new file mode 100644 index 000000000000..00fd215e3eb5 --- /dev/null +++ b/fs/smb/common/cifsglob.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * + * Copyright (C) International Business Machines Corp., 2002,2008 + * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) + * + */ +#ifndef _COMMON_CIFS_GLOB_H +#define _COMMON_CIFS_GLOB_H + +static inline void inc_rfc1001_len(void *buf, int count) +{ + be32_add_cpu((__be32 *)buf, count); +} + +#define SMB1_VERSION_STRING "1.0" +#define SMB20_VERSION_STRING "2.0" +#define SMB21_VERSION_STRING "2.1" +#define SMBDEFAULT_VERSION_STRING "default" +#define SMB3ANY_VERSION_STRING "3" +#define SMB30_VERSION_STRING "3.0" +#define SMB302_VERSION_STRING "3.02" +#define ALT_SMB302_VERSION_STRING "3.0.2" +#define SMB311_VERSION_STRING "3.1.1" +#define ALT_SMB311_VERSION_STRING "3.11" + +#define CIFS_DEFAULT_IOSIZE (1024 * 1024) + +#endif /* _COMMON_CIFS_GLOB_H */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index db22a1d0546b..ee5a90d691c8 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -142,7 +142,15 @@ struct smbdirect_socket { } mem; /* - * The credit state for the send side + * The local credit state for ib_post_send() + */ + struct { + atomic_t count; + wait_queue_head_t wait_queue; + } lcredits; + + /* + * The remote credit state for the send side */ struct { atomic_t count; @@ -337,6 +345,9 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); disable_delayed_work_sync(&sc->idle.timer_work); + atomic_set(&sc->send_io.lcredits.count, 0); + init_waitqueue_head(&sc->send_io.lcredits.wait_queue); + atomic_set(&sc->send_io.credits.count, 0); init_waitqueue_head(&sc->send_io.credits.wait_queue); @@ -437,13 +448,22 @@ enum smbdirect_mr_state { SMBDIRECT_MR_READY, SMBDIRECT_MR_REGISTERED, SMBDIRECT_MR_INVALIDATED, - SMBDIRECT_MR_ERROR + SMBDIRECT_MR_ERROR, + SMBDIRECT_MR_DISABLED }; struct smbdirect_mr_io { struct smbdirect_socket *socket; struct ib_cqe cqe; + /* + * We can have up to two references: + * 1. by the connection + * 2. by the registration + */ + struct kref kref; + struct mutex mutex; + struct list_head list; enum smbdirect_mr_state state; diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index d742ba754348..863716207a0d 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -10,6 +10,7 @@ #include "glob.h" #include "nterr.h" +#include "../common/cifsglob.h" #include "../common/smb2pdu.h" #include "smb2pdu.h" @@ -26,16 +27,8 @@ #define SMB311_PROT 6 #define BAD_PROT 0xFFFF -#define SMB1_VERSION_STRING "1.0" -#define SMB20_VERSION_STRING "2.0" -#define SMB21_VERSION_STRING "2.1" -#define SMB30_VERSION_STRING "3.0" -#define SMB302_VERSION_STRING "3.02" -#define SMB311_VERSION_STRING "3.1.1" - #define SMB_ECHO_INTERVAL (60 * HZ) -#define CIFS_DEFAULT_IOSIZE (64 * 1024) #define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */ #define MAX_STREAM_PROT_LEN 0x00FFFFFF @@ -464,9 +457,4 @@ static inline unsigned int get_rfc1002_len(void *buf) { return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } - -static inline void inc_rfc1001_len(void *buf, int count) -{ - be32_add_cpu((__be32 *)buf, count); -} #endif /* __SMB_COMMON_H__ */ diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 46f87fd1ce1c..2c08cccfa680 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -263,10 +263,16 @@ static void ipc_msg_handle_free(int handle) static int handle_response(int type, void *payload, size_t sz) { - unsigned int handle = *(unsigned int *)payload; + unsigned int handle; struct ipc_msg_table_entry *entry; int ret = 0; + /* Prevent 4-byte read beyond declared payload size */ + if (sz < sizeof(unsigned int)) + return -EINVAL; + + handle = *(unsigned int *)payload; + ipc_update_last_active(); down_read(&ipc_msg_table_lock); hash_for_each_possible(ipc_msg_table, entry, ipc_table_hlist, handle) { diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index a201c5871a77..e2be9a496154 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -219,6 +219,7 @@ static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc) * in order to notice the broken connection. */ wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.lcredits.wait_queue); wake_up_all(&sc->send_io.credits.wait_queue); wake_up_all(&sc->send_io.pending.zero_wait_queue); wake_up_all(&sc->recv_io.reassembly.wait_queue); @@ -333,6 +334,9 @@ smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) break; case SMBDIRECT_SOCKET_CREATED: + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; + case SMBDIRECT_SOCKET_CONNECTED: sc->status = SMBDIRECT_SOCKET_ERROR; break; @@ -417,9 +421,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) sc->ib.dev = sc->rdma.cm_id->device; - INIT_WORK(&sc->recv_io.posted.refill_work, - smb_direct_post_recv_credits); - INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer); conn = ksmbd_conn_alloc(); @@ -450,11 +451,10 @@ static void free_transport(struct smb_direct_transport *t) struct smbdirect_recv_io *recvmsg; disable_work_sync(&sc->disconnect_work); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) smb_direct_disconnect_rdma_work(&sc->disconnect_work); - wait_event_interruptible(sc->status_wait, - sc->status == SMBDIRECT_SOCKET_DISCONNECTED); - } + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); /* * Wake up all waiters in all wait queues @@ -469,9 +469,11 @@ static void free_transport(struct smb_direct_transport *t) disable_delayed_work_sync(&sc->idle.timer_work); disable_work_sync(&sc->idle.immediate_work); + if (sc->rdma.cm_id) + rdma_lock_handler(sc->rdma.cm_id); + if (sc->ib.qp) { ib_drain_qp(sc->ib.qp); - ib_mr_pool_destroy(sc->ib.qp, &sc->ib.qp->rdma_mrs); sc->ib.qp = NULL; rdma_destroy_qp(sc->rdma.cm_id); } @@ -498,8 +500,10 @@ static void free_transport(struct smb_direct_transport *t) ib_free_cq(sc->ib.recv_cq); if (sc->ib.pd) ib_dealloc_pd(sc->ib.pd); - if (sc->rdma.cm_id) + if (sc->rdma.cm_id) { + rdma_unlock_handler(sc->rdma.cm_id); rdma_destroy_id(sc->rdma.cm_id); + } smb_direct_destroy_pools(sc); ksmbd_conn_free(KSMBD_TRANS(t)->conn); @@ -524,6 +528,12 @@ static void smb_direct_free_sendmsg(struct smbdirect_socket *sc, { int i; + /* + * The list needs to be empty! + * The caller should take care of it. + */ + WARN_ON_ONCE(!list_empty(&msg->sibling_list)); + if (msg->num_sge > 0) { ib_dma_unmap_single(sc->ib.dev, msg->sge[0].addr, msg->sge[0].length, @@ -909,9 +919,9 @@ static void smb_direct_post_recv_credits(struct work_struct *work) static void send_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbdirect_send_io *sendmsg, *sibling; + struct smbdirect_send_io *sendmsg, *sibling, *next; struct smbdirect_socket *sc; - struct list_head *pos, *prev, *end; + int lcredits = 0; sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); sc = sendmsg->socket; @@ -920,27 +930,31 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) ib_wc_status_msg(wc->status), wc->status, wc->opcode); + /* + * Free possible siblings and then the main send_io + */ + list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { + list_del_init(&sibling->sibling_list); + smb_direct_free_sendmsg(sc, sibling); + lcredits += 1; + } + /* Note this frees wc->wr_cqe, but not wc */ + smb_direct_free_sendmsg(sc, sendmsg); + lcredits += 1; + if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { pr_err("Send error. status='%s (%d)', opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); smb_direct_disconnect_rdma_connection(sc); + return; } + atomic_add(lcredits, &sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); - - /* iterate and free the list of messages in reverse. the list's head - * is invalid. - */ - for (pos = &sendmsg->sibling_list, prev = pos->prev, end = sendmsg->sibling_list.next; - prev != end; pos = prev, prev = prev->prev) { - sibling = container_of(pos, struct smbdirect_send_io, sibling_list); - smb_direct_free_sendmsg(sc, sibling); - } - - sibling = container_of(pos, struct smbdirect_send_io, sibling_list); - smb_direct_free_sendmsg(sc, sibling); } static int manage_credits_prior_sending(struct smbdirect_socket *sc) @@ -988,8 +1002,6 @@ static int smb_direct_post_send(struct smbdirect_socket *sc, ret = ib_post_send(sc->ib.qp, wr, NULL); if (ret) { pr_err("failed to post send: %d\n", ret); - if (atomic_dec_and_test(&sc->send_io.pending.count)) - wake_up(&sc->send_io.pending.zero_wait_queue); smb_direct_disconnect_rdma_connection(sc); } return ret; @@ -1032,19 +1044,29 @@ static int smb_direct_flush_send_list(struct smbdirect_socket *sc, last->wr.send_flags = IB_SEND_SIGNALED; last->wr.wr_cqe = &last->cqe; + /* + * Remove last from send_ctx->msg_list + * and splice the rest of send_ctx->msg_list + * to last->sibling_list. + * + * send_ctx->msg_list is a valid empty list + * at the end. + */ + list_del_init(&last->sibling_list); + list_splice_tail_init(&send_ctx->msg_list, &last->sibling_list); + send_ctx->wr_cnt = 0; + ret = smb_direct_post_send(sc, &first->wr); - if (!ret) { - smb_direct_send_ctx_init(send_ctx, - send_ctx->need_invalidate_rkey, - send_ctx->remote_key); - } else { - atomic_add(send_ctx->wr_cnt, &sc->send_io.credits.count); - wake_up(&sc->send_io.credits.wait_queue); - list_for_each_entry_safe(first, last, &send_ctx->msg_list, - sibling_list) { - smb_direct_free_sendmsg(sc, first); + if (ret) { + struct smbdirect_send_io *sibling, *next; + + list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { + list_del_init(&sibling->sibling_list); + smb_direct_free_sendmsg(sc, sibling); } + smb_direct_free_sendmsg(sc, last); } + return ret; } @@ -1070,6 +1092,23 @@ static int wait_for_credits(struct smbdirect_socket *sc, } while (true); } +static int wait_for_send_lcredit(struct smbdirect_socket *sc, + struct smbdirect_send_batch *send_ctx) +{ + if (send_ctx && (atomic_read(&sc->send_io.lcredits.count) <= 1)) { + int ret; + + ret = smb_direct_flush_send_list(sc, send_ctx, false); + if (ret) + return ret; + } + + return wait_for_credits(sc, + &sc->send_io.lcredits.wait_queue, + &sc->send_io.lcredits.count, + 1); +} + static int wait_for_send_credits(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx) { @@ -1257,9 +1296,13 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, int data_length; struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1]; + ret = wait_for_send_lcredit(sc, send_ctx); + if (ret) + goto lcredit_failed; + ret = wait_for_send_credits(sc, send_ctx); if (ret) - return ret; + goto credit_failed; data_length = 0; for (i = 0; i < niov; i++) @@ -1267,10 +1310,8 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, ret = smb_direct_create_header(sc, data_length, remaining_data_length, &msg); - if (ret) { - atomic_inc(&sc->send_io.credits.count); - return ret; - } + if (ret) + goto header_failed; for (i = 0; i < niov; i++) { struct ib_sge *sge; @@ -1308,7 +1349,11 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, return 0; err: smb_direct_free_sendmsg(sc, msg); +header_failed: atomic_inc(&sc->send_io.credits.count); +credit_failed: + atomic_inc(&sc->send_io.lcredits.count); +lcredit_failed: return ret; } @@ -1687,10 +1732,10 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, } case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: { - ib_drain_qp(sc->ib.qp); - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; smb_direct_disconnect_rdma_work(&sc->disconnect_work); + if (sc->ib.qp) + ib_drain_qp(sc->ib.qp); break; } case RDMA_CM_EVENT_CONNECT_ERROR: { @@ -1841,6 +1886,7 @@ static int smb_direct_accept_client(struct smbdirect_socket *sc) static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) { struct smbdirect_recv_io *recvmsg; + bool recv_posted = false; int ret; WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); @@ -1857,6 +1903,7 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) pr_err("Can't post recv: %d\n", ret); goto out_err; } + recv_posted = true; ret = smb_direct_accept_client(sc); if (ret) { @@ -1864,27 +1911,24 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) goto out_err; } - smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); return 0; out_err: - put_recvmsg(sc, recvmsg); + /* + * If the recv was never posted, return it to the free list. + * If it was posted, leave it alone so disconnect teardown can + * drain the QP and complete it (flush) and the completion path + * will unmap it exactly once. + */ + if (!recv_posted) + put_recvmsg(sc, recvmsg); return ret; } -static unsigned int smb_direct_get_max_fr_pages(struct smbdirect_socket *sc) -{ - return min_t(unsigned int, - sc->ib.dev->attrs.max_fast_reg_page_list_len, - 256); -} - -static int smb_direct_init_params(struct smbdirect_socket *sc, - struct ib_qp_cap *cap) +static int smb_direct_init_params(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; - struct ib_device *device = sc->ib.dev; - int max_send_sges, max_rw_wrs, max_send_wrs; - unsigned int max_sge_per_wr, wrs_per_credit; + int max_send_sges; + unsigned int maxpages; /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, * SMB2 response could be mapped. @@ -1895,67 +1939,20 @@ static int smb_direct_init_params(struct smbdirect_socket *sc, return -EINVAL; } - /* Calculate the number of work requests for RDMA R/W. - * The maximum number of pages which can be registered - * with one Memory region can be transferred with one - * R/W credit. And at least 4 work requests for each credit - * are needed for MR registration, RDMA R/W, local & remote - * MR invalidation. - */ - sc->rw_io.credits.num_pages = smb_direct_get_max_fr_pages(sc); - sc->rw_io.credits.max = DIV_ROUND_UP(sp->max_read_write_size, - (sc->rw_io.credits.num_pages - 1) * - PAGE_SIZE); - - max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, - device->attrs.max_sge_rd); - max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, - max_send_sges); - wrs_per_credit = max_t(unsigned int, 4, - DIV_ROUND_UP(sc->rw_io.credits.num_pages, - max_sge_per_wr) + 1); - max_rw_wrs = sc->rw_io.credits.max * wrs_per_credit; - - max_send_wrs = sp->send_credit_target + max_rw_wrs; - if (max_send_wrs > device->attrs.max_cqe || - max_send_wrs > device->attrs.max_qp_wr) { - pr_err("consider lowering send_credit_target = %d\n", - sp->send_credit_target); - pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - device->attrs.max_cqe, device->attrs.max_qp_wr); - return -EINVAL; - } + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); - if (sp->recv_credit_max > device->attrs.max_cqe || - sp->recv_credit_max > device->attrs.max_qp_wr) { - pr_err("consider lowering receive_credit_max = %d\n", - sp->recv_credit_max); - pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", - device->attrs.max_cqe, device->attrs.max_qp_wr); - return -EINVAL; - } - - if (device->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE) { - pr_err("warning: device max_send_sge = %d too small\n", - device->attrs.max_send_sge); - return -EINVAL; - } - if (device->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { - pr_err("warning: device max_recv_sge = %d too small\n", - device->attrs.max_recv_sge); - return -EINVAL; - } + maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); + sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, + sc->rdma.cm_id->port_num, + maxpages); + sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); + /* add one extra in order to handle unaligned pages */ + sc->rw_io.credits.max += 1; sc->recv_io.credits.target = 1; atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); - cap->max_send_wr = max_send_wrs; - cap->max_recv_wr = sp->recv_credit_max; - cap->max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - cap->max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - cap->max_inline_data = 0; - cap->max_rdma_ctxs = sc->rw_io.credits.max; return 0; } @@ -2029,13 +2026,129 @@ err: return -ENOMEM; } -static int smb_direct_create_qpair(struct smbdirect_socket *sc, - struct ib_qp_cap *cap) +static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) +{ + /* + * This could be split out of rdma_rw_init_qp() + * and be a helper function next to rdma_rw_mr_factor() + * + * We can't check unlikely(rdma_rw_force_mr) here, + * but that is most likely 0 anyway. + */ + u32 factor; + + WARN_ON_ONCE(attr->port_num == 0); + + /* + * Each context needs at least one RDMA READ or WRITE WR. + * + * For some hardware we might need more, eventually we should ask the + * HCA driver for a multiplier here. + */ + factor = 1; + + /* + * If the device needs MRs to perform RDMA READ or WRITE operations, + * we'll need two additional MRs for the registrations and the + * invalidation. + */ + if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) + factor += 2; /* inv + reg */ + + return factor * attr->cap.max_rdma_ctxs; +} + +static int smb_direct_create_qpair(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; int ret; + struct ib_qp_cap qp_cap; struct ib_qp_init_attr qp_attr; - int pages_per_rw; + u32 max_send_wr; + u32 rdma_send_wr; + + /* + * Note that {rdma,ib}_create_qp() will call + * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0. + * It will adjust cap->max_send_wr to the required + * number of additional WRs for the RDMA RW operations. + * It will cap cap->max_send_wr to the device limit. + * + * +1 for ib_drain_qp + */ + qp_cap.max_send_wr = sp->send_credit_target + 1; + qp_cap.max_recv_wr = sp->recv_credit_max + 1; + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; + qp_cap.max_inline_data = 0; + qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; + + /* + * Find out the number of max_send_wr + * after rdma_rw_init_qp() adjusted it. + * + * We only do it on a temporary variable, + * as rdma_create_qp() will trigger + * rdma_rw_init_qp() again. + */ + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.cap = qp_cap; + qp_attr.port_num = sc->rdma.cm_id->port_num; + rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); + max_send_wr = qp_cap.max_send_wr + rdma_send_wr; + + if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_send_wr %d\n", + qp_cap.max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d\n", + sp->send_credit_target); + return -EINVAL; + } + + if (qp_cap.max_rdma_ctxs && + (max_send_wr >= sc->ib.dev->attrs.max_cqe || + max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { + pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", + rdma_send_wr, qp_cap.max_send_wr, max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", + sp->send_credit_target, qp_cap.max_rdma_ctxs); + return -EINVAL; + } + + if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_recv_wr %d\n", + qp_cap.max_recv_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering receive_credit_max = %d\n", + sp->recv_credit_max); + return -EINVAL; + } + + if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || + qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { + pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_send_sge, + sc->ib.dev->attrs.max_recv_sge); + return -EINVAL; + } sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); if (IS_ERR(sc->ib.pd)) { @@ -2046,8 +2159,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, } sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->send_credit_target + - cap->max_rdma_ctxs, + max_send_wr, IB_POLL_WORKQUEUE); if (IS_ERR(sc->ib.send_cq)) { pr_err("Can't create RDMA send CQ\n"); @@ -2057,7 +2169,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, } sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->recv_credit_max, + qp_cap.max_recv_wr, IB_POLL_WORKQUEUE); if (IS_ERR(sc->ib.recv_cq)) { pr_err("Can't create RDMA recv CQ\n"); @@ -2066,10 +2178,18 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, goto err; } + /* + * We reset completely here! + * As the above use was just temporary + * to calc max_send_wr and rdma_send_wr. + * + * rdma_create_qp() will trigger rdma_rw_init_qp() + * again if max_rdma_ctxs is not 0. + */ memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smb_direct_qpair_handler; qp_attr.qp_context = sc; - qp_attr.cap = *cap; + qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = sc->ib.send_cq; @@ -2085,18 +2205,6 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, sc->ib.qp = sc->rdma.cm_id->qp; sc->rdma.cm_id->event_handler = smb_direct_cm_handler; - pages_per_rw = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE) + 1; - if (pages_per_rw > sc->ib.dev->attrs.max_sgl_rd) { - ret = ib_mr_pool_init(sc->ib.qp, &sc->ib.qp->rdma_mrs, - sc->rw_io.credits.max, IB_MR_TYPE_MEM_REG, - sc->rw_io.credits.num_pages, 0); - if (ret) { - pr_err("failed to init mr pool count %zu pages %zu\n", - sc->rw_io.credits.max, sc->rw_io.credits.num_pages); - goto err; - } - } - return 0; err: if (sc->ib.qp) { @@ -2154,8 +2262,8 @@ static int smb_direct_prepare(struct ksmbd_transport *t) return -ECONNABORTED; ret = smb_direct_check_recvmsg(recvmsg); - if (ret == -ECONNABORTED) - goto out; + if (ret) + goto put; req = (struct smbdirect_negotiate_req *)recvmsg->packet; sp->max_recv_size = min_t(int, sp->max_recv_size, @@ -2170,23 +2278,46 @@ static int smb_direct_prepare(struct ksmbd_transport *t) sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1); - ret = smb_direct_send_negotiate_response(sc, ret); -out: +put: spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); sc->recv_io.reassembly.queue_length--; list_del(&recvmsg->list); spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); put_recvmsg(sc, recvmsg); + if (ret == -ECONNABORTED) + return ret; + + if (ret) + goto respond; + + /* + * We negotiated with success, so we need to refill the recv queue. + * We do that with sc->idle.immediate_work still being disabled + * via smbdirect_socket_init(), so that queue_work(sc->workqueue, + * &sc->idle.immediate_work) in smb_direct_post_recv_credits() + * is a no-op. + * + * The message that grants the credits to the client is + * the negotiate response. + */ + INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits); + smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); + if (unlikely(sc->first_error)) + return sc->first_error; + INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); + +respond: + ret = smb_direct_send_negotiate_response(sc, ret); + return ret; } static int smb_direct_connect(struct smbdirect_socket *sc) { - struct ib_qp_cap qp_cap; int ret; - ret = smb_direct_init_params(sc, &qp_cap); + ret = smb_direct_init_params(sc); if (ret) { pr_err("Can't configure RDMA parameters\n"); return ret; @@ -2198,7 +2329,7 @@ static int smb_direct_connect(struct smbdirect_socket *sc) return ret; } - ret = smb_direct_create_qpair(sc, &qp_cap); + ret = smb_direct_create_qpair(sc); if (ret) { pr_err("Can't accept RDMA client: %d\n", ret); return ret; @@ -2487,7 +2618,7 @@ void ksmbd_rdma_destroy(void) } } -bool ksmbd_rdma_capable_netdev(struct net_device *netdev) +static bool ksmbd_find_rdma_capable_netdev(struct net_device *netdev) { struct smb_direct_device *smb_dev; int i; @@ -2529,6 +2660,28 @@ out: return rdma_capable; } +bool ksmbd_rdma_capable_netdev(struct net_device *netdev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (ksmbd_find_rdma_capable_netdev(netdev)) + return true; + + /* check if netdev is bridge or VLAN */ + if (netif_is_bridge_master(netdev) || + netdev->priv_flags & IFF_802_1Q_VLAN) + netdev_for_each_lower_dev(netdev, lower_dev, iter) + if (ksmbd_find_rdma_capable_netdev(lower_dev)) + return true; + + /* check if netdev is IPoIB safely without layer violation */ + if (netdev->type == ARPHRD_INFINIBAND) + return true; + + return false; +} + static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .prepare = smb_direct_prepare, .disconnect = smb_direct_disconnect, diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 7a1e3dcc2cde..d2e391c29464 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -290,8 +290,11 @@ static int ksmbd_kthread_fn(void *p) } } up_read(&conn_list_lock); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { + /* Per-IP limit hit: release the just-accepted socket. */ + sock_release(client_sk); continue; + } skip_max_ip_conns_limit: if (server_conf.max_connections && diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 2d78e94072a0..e142bac4f9f8 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -498,17 +498,26 @@ int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, } EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj); -static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, +static int sysfs_group_attrs_change_owner(struct kobject *kobj, + struct kernfs_node *grp_kn, const struct attribute_group *grp, struct iattr *newattrs) { struct kernfs_node *kn; - int error; + int error, i; + umode_t mode; if (grp->attrs) { struct attribute *const *attr; - for (attr = grp->attrs; *attr; attr++) { + for (i = 0, attr = grp->attrs; *attr; i++, attr++) { + if (grp->is_visible) { + mode = grp->is_visible(kobj, *attr, i); + if (mode & SYSFS_GROUP_INVISIBLE) + break; + if (!mode) + continue; + } kn = kernfs_find_and_get(grp_kn, (*attr)->name); if (!kn) return -ENOENT; @@ -523,7 +532,14 @@ static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, if (grp->bin_attrs) { const struct bin_attribute *const *bin_attr; - for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { + for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) { + if (grp->is_bin_visible) { + mode = grp->is_bin_visible(kobj, *bin_attr, i); + if (mode & SYSFS_GROUP_INVISIBLE) + break; + if (!mode) + continue; + } kn = kernfs_find_and_get(grp_kn, (*bin_attr)->attr.name); if (!kn) return -ENOENT; @@ -573,7 +589,7 @@ int sysfs_group_change_owner(struct kobject *kobj, error = kernfs_setattr(grp_kn, &newattrs); if (!error) - error = sysfs_group_attrs_change_owner(grp_kn, grp, &newattrs); + error = sysfs_group_attrs_change_owner(kobj, grp_kn, grp, &newattrs); kernfs_put(grp_kn); diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 8930d5254e1d..b99da294e9a3 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -119,6 +119,15 @@ config XFS_RT See the xfs man page in section 5 for additional information. + This option is mandatory to support zoned block devices. For these + devices, the realtime subvolume must be backed by a zoned block + device and a regular block device used as the main device (for + metadata). If the zoned block device is a host-managed SMR hard-disk + containing conventional zones at the beginning of its address space, + XFS will use the disk conventional zones as the main device and the + remaining sequential write required zones as the backing storage for + the realtime subvolume. + If unsure, say N. config XFS_DRAIN_INTENTS @@ -156,7 +165,7 @@ config XFS_ONLINE_SCRUB_STATS bool "XFS online metadata check usage data collection" default y depends on XFS_ONLINE_SCRUB - select DEBUG_FS + depends on DEBUG_FS help If you say Y here, the kernel will gather usage data about the online metadata check subsystem. This includes the number diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h index d36a6ae0abe5..d4fcf591e63d 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.h +++ b/fs/xfs/libxfs/xfs_rtgroup.h @@ -50,6 +50,12 @@ struct xfs_rtgroup { uint8_t *rtg_rsum_cache; struct xfs_open_zone *rtg_open_zone; }; + + /* + * Count of outstanding GC operations for zoned XFS. Any RTG with a + * non-zero rtg_gccount will not be picked as new GC victim. + */ + atomic_t rtg_gccount; }; /* diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 26721fab5cab..091c79e432e5 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -376,6 +376,36 @@ out_incomplete: return error; } +static uint +xchk_nlinks_ilock_dir( + struct xfs_inode *ip) +{ + uint lock_mode = XFS_ILOCK_SHARED; + + /* + * We're going to scan the directory entries, so we must be ready to + * pull the data fork mappings into memory if they aren't already. + */ + if (xfs_need_iread_extents(&ip->i_df)) + lock_mode = XFS_ILOCK_EXCL; + + /* + * We're going to scan the parent pointers, so we must be ready to + * pull the attr fork mappings into memory if they aren't already. + */ + if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) && + xfs_need_iread_extents(&ip->i_af)) + lock_mode = XFS_ILOCK_EXCL; + + /* + * Take the IOLOCK so that other threads cannot start a directory + * update while we're scanning. + */ + lock_mode |= XFS_IOLOCK_SHARED; + xfs_ilock(ip, lock_mode); + return lock_mode; +} + /* Walk a directory to bump the observed link counts of the children. */ STATIC int xchk_nlinks_collect_dir( @@ -394,8 +424,7 @@ xchk_nlinks_collect_dir( return 0; /* Prevent anyone from changing this directory while we walk it. */ - xfs_ilock(dp, XFS_IOLOCK_SHARED); - lock_mode = xfs_ilock_data_map_shared(dp); + lock_mode = xchk_nlinks_ilock_dir(dp); /* * The dotdot entry of an unlinked directory still points to the last @@ -452,7 +481,6 @@ out_abort: xchk_iscan_abort(&xnc->collect_iscan); out_unlock: xfs_iunlock(dp, lock_mode); - xfs_iunlock(dp, XFS_IOLOCK_SHARED); return error; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 773d959965dc..47edf3041631 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1751,7 +1751,7 @@ xfs_init_buftarg( const char *descr) { /* The maximum size of the buftarg is only known once the sb is read. */ - btp->bt_nr_sectors = (xfs_daddr_t)-1; + btp->bt_nr_sectors = XFS_BUF_DADDR_MAX; /* Set up device logical sector size mask */ btp->bt_logical_sectorsize = logical_sectorsize; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 8fa7bdf59c91..e25cd2a160f3 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -22,6 +22,7 @@ extern struct kmem_cache *xfs_buf_cache; */ struct xfs_buf; +#define XFS_BUF_DADDR_MAX ((xfs_daddr_t) S64_MAX) #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) #define XBF_READ (1u << 0) /* buffer intended for reading from device */ diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index ee49f20875af..6917de832191 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -726,8 +726,10 @@ xfs_trim_rtgroup_extents( break; } - if (!tr.queued) + if (!tr.queued) { + kfree(tr.extents); break; + } /* * We hand the extent list to the discard function here so the diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d3f6e3e42a11..490e12cb99be 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1091,6 +1091,29 @@ const struct iomap_ops xfs_zoned_direct_write_iomap_ops = { }; #endif /* CONFIG_XFS_RT */ +#ifdef DEBUG +static void +xfs_check_atomic_cow_conversion( + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + xfs_filblks_t count_fsb, + const struct xfs_bmbt_irec *cmap) +{ + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec cmap2 = { }; + + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap2)) + xfs_trim_extent(&cmap2, offset_fsb, count_fsb); + + ASSERT(cmap2.br_startoff == cmap->br_startoff); + ASSERT(cmap2.br_blockcount == cmap->br_blockcount); + ASSERT(cmap2.br_startblock == cmap->br_startblock); + ASSERT(cmap2.br_state == cmap->br_state); +} +#else +# define xfs_check_atomic_cow_conversion(...) ((void)0) +#endif + static int xfs_atomic_write_cow_iomap_begin( struct inode *inode, @@ -1102,9 +1125,10 @@ xfs_atomic_write_cow_iomap_begin( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); - xfs_filblks_t count_fsb = end_fsb - offset_fsb; + const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + const xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); + const xfs_filblks_t count_fsb = end_fsb - offset_fsb; + xfs_filblks_t hole_count_fsb; int nmaps = 1; xfs_filblks_t resaligned; struct xfs_bmbt_irec cmap; @@ -1130,7 +1154,7 @@ xfs_atomic_write_cow_iomap_begin( return -EAGAIN; trace_xfs_iomap_atomic_write_cow(ip, offset, length); - +retry: xfs_ilock(ip, XFS_ILOCK_EXCL); if (!ip->i_cowfp) { @@ -1141,14 +1165,22 @@ xfs_atomic_write_cow_iomap_begin( if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) cmap.br_startoff = end_fsb; if (cmap.br_startoff <= offset_fsb) { + if (isnullstartblock(cmap.br_startblock)) + goto convert_delay; + + /* + * cmap could extend outside the write range due to previous + * speculative preallocations. We must trim cmap to the write + * range because the cow fork treats written mappings to mean + * "write in progress". + */ xfs_trim_extent(&cmap, offset_fsb, count_fsb); goto found; } - end_fsb = cmap.br_startoff; - count_fsb = end_fsb - offset_fsb; + hole_count_fsb = cmap.br_startoff - offset_fsb; - resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, + resaligned = xfs_aligned_fsb_count(offset_fsb, hole_count_fsb, xfs_get_cowextsz_hint(ip)); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1169,8 +1201,10 @@ xfs_atomic_write_cow_iomap_begin( if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) cmap.br_startoff = end_fsb; if (cmap.br_startoff <= offset_fsb) { - xfs_trim_extent(&cmap, offset_fsb, count_fsb); xfs_trans_cancel(tp); + if (isnullstartblock(cmap.br_startblock)) + goto convert_delay; + xfs_trim_extent(&cmap, offset_fsb, count_fsb); goto found; } @@ -1182,7 +1216,7 @@ xfs_atomic_write_cow_iomap_begin( * atomic writes to that same range will be aligned (and don't require * this COW-based method). */ - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + error = xfs_bmapi_write(tp, ip, offset_fsb, hole_count_fsb, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC | XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps); if (error) { @@ -1195,21 +1229,43 @@ xfs_atomic_write_cow_iomap_begin( if (error) goto out_unlock; + /* + * cmap could map more blocks than the range we passed into bmapi_write + * because of EXTSZALIGN or adjacent pre-existing unwritten mappings + * that were merged. Trim cmap to the original write range so that we + * don't convert more than we were asked to do for this write. + */ + xfs_trim_extent(&cmap, offset_fsb, count_fsb); + found: if (cmap.br_state != XFS_EXT_NORM) { - error = xfs_reflink_convert_cow_locked(ip, offset_fsb, - count_fsb); + error = xfs_reflink_convert_cow_locked(ip, cmap.br_startoff, + cmap.br_blockcount); if (error) goto out_unlock; cmap.br_state = XFS_EXT_NORM; + xfs_check_atomic_cow_conversion(ip, offset_fsb, count_fsb, + &cmap); } - length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); - trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); + trace_xfs_iomap_found(ip, offset, length, XFS_COW_FORK, &cmap); seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); +convert_delay: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + error = xfs_bmapi_convert_delalloc(ip, XFS_COW_FORK, offset, iomap, + NULL); + if (error) + return error; + + /* + * Try the lookup again, because the delalloc conversion might have + * turned the COW mapping into unwritten, but we need it to be in + * written state. + */ + goto retry; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f046d1215b04..b871dfde372b 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -236,7 +236,6 @@ typedef struct xfs_mount { bool m_update_sb; /* sb needs update in mount */ unsigned int m_max_open_zones; unsigned int m_zonegc_low_space; - struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */ /* max_atomic_write mount option value */ unsigned long long m_awu_max_bytes; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index fbb8009f1c0f..bc71aa9dcee8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -102,7 +102,7 @@ static const struct constant_table dax_param_enums[] = { * Table driven mount option parser. */ enum { - Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, + Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, @@ -114,7 +114,21 @@ enum { Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, }; +#define fsparam_dead(NAME) \ + __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL) + static const struct fs_parameter_spec xfs_fs_parameters[] = { + /* + * These mount options were supposed to be deprecated in September 2025 + * but the deprecation warning was buggy, so not all users were + * notified. The deprecation is now obnoxiously loud and postponed to + * September 2030. + */ + fsparam_dead("attr2"), + fsparam_dead("noattr2"), + fsparam_dead("ikeep"), + fsparam_dead("noikeep"), + fsparam_u32("logbufs", Opt_logbufs), fsparam_string("logbsize", Opt_logbsize), fsparam_string("logdev", Opt_logdev), @@ -786,6 +800,12 @@ xfs_fs_evict_inode( truncate_inode_pages_final(&inode->i_data); clear_inode(inode); + + if (IS_ENABLED(CONFIG_XFS_RT) && + S_ISREG(inode->i_mode) && inode->i_private) { + xfs_open_zone_put(inode->i_private); + inode->i_private = NULL; + } } static void @@ -1373,16 +1393,25 @@ suffix_kstrtoull( static inline void xfs_fs_warn_deprecated( struct fs_context *fc, - struct fs_parameter *param, - uint64_t flag, - bool value) + struct fs_parameter *param) { - /* Don't print the warning if reconfiguring and current mount point - * already had the flag set + /* + * Always warn about someone passing in a deprecated mount option. + * Previously we wouldn't print the warning if we were reconfiguring + * and current mount point already had the flag set, but that was not + * the right thing to do. + * + * Many distributions mount the root filesystem with no options in the + * initramfs and rely on mount -a to remount the root fs with the + * options in fstab. However, the old behavior meant that there would + * never be a warning about deprecated mount options for the root fs in + * /etc/fstab. On a single-fs system, that means no warning at all. + * + * Compounding this problem are distribution scripts that copy + * /proc/mounts to fstab, which means that we can't remove mount + * options unless we're 100% sure they have only ever been advertised + * in /proc/mounts in response to explicitly provided mount options. */ - if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && - !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) - return; xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); } @@ -1408,6 +1437,9 @@ xfs_fs_parse_param( return opt; switch (opt) { + case Op_deprecated: + xfs_fs_warn_deprecated(fc, param); + return 0; case Opt_logbufs: parsing_mp->m_logbufs = result.uint_32; return 0; @@ -1528,7 +1560,6 @@ xfs_fs_parse_param( xfs_mount_set_dax_mode(parsing_mp, result.uint_32); return 0; #endif - /* Following mount options will be removed in September 2025 */ case Opt_max_open_zones: parsing_mp->m_max_open_zones = result.uint_32; return 0; @@ -2224,7 +2255,7 @@ xfs_init_fs_context( struct xfs_mount *mp; int i; - mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL); + mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) return -ENOMEM; diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 1147bacb2da8..ef7a931ebde5 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -26,14 +26,22 @@ #include "xfs_trace.h" #include "xfs_mru_cache.h" +static void +xfs_open_zone_free_rcu( + struct callback_head *cb) +{ + struct xfs_open_zone *oz = container_of(cb, typeof(*oz), oz_rcu); + + xfs_rtgroup_rele(oz->oz_rtg); + kfree(oz); +} + void xfs_open_zone_put( struct xfs_open_zone *oz) { - if (atomic_dec_and_test(&oz->oz_ref)) { - xfs_rtgroup_rele(oz->oz_rtg); - kfree(oz); - } + if (atomic_dec_and_test(&oz->oz_ref)) + call_rcu(&oz->oz_rcu, xfs_open_zone_free_rcu); } static inline uint32_t @@ -238,6 +246,14 @@ xfs_zoned_map_extent( * If a data write raced with this GC write, keep the existing data in * the data fork, mark our newly written GC extent as reclaimable, then * move on to the next extent. + * + * Note that this can also happen when racing with operations that do + * not actually invalidate the data, but just move it to a different + * inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the + * inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE). If the + * data was just moved around, GC fails to free the zone, but the zone + * becomes a GC candidate again as soon as all previous GC I/O has + * finished and these blocks will be moved out eventually. */ if (old_startblock != NULLFSBLOCK && old_startblock != data.br_startblock) @@ -599,7 +615,7 @@ xfs_select_open_zone_mru( lockdep_assert_held(&zi->zi_open_zones_lock); list_for_each_entry_reverse(oz, &zi->zi_open_zones, oz_entry) - if (xfs_try_use_zone(zi, file_hint, oz, false)) + if (xfs_try_use_zone(zi, file_hint, oz, XFS_ZONE_ALLOC_OK)) return oz; cond_resched_lock(&zi->zi_open_zones_lock); @@ -614,14 +630,25 @@ static inline enum rw_hint xfs_inode_write_hint(struct xfs_inode *ip) } /* - * Try to pack inodes that are written back after they were closed tight instead - * of trying to open new zones for them or spread them to the least recently - * used zone. This optimizes the data layout for workloads that untar or copy - * a lot of small files. Right now this does not separate multiple such + * Try to tightly pack small files that are written back after they were closed + * instead of trying to open new zones for them or spread them to the least + * recently used zone. This optimizes the data layout for workloads that untar + * or copy a lot of small files. Right now this does not separate multiple such * streams. */ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip) { + struct xfs_mount *mp = ip->i_mount; + size_t zone_capacity = + XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].blocks); + + /* + * Do not pack write files that are already using a full zone to avoid + * fragmentation. + */ + if (i_size_read(VFS_I(ip)) >= zone_capacity) + return false; + return !inode_is_open_for_write(VFS_I(ip)) && !(ip->i_diflags & XFS_DIFLAG_APPEND); } @@ -746,97 +773,54 @@ xfs_mark_rtg_boundary( } /* - * Cache the last zone written to for an inode so that it is considered first - * for subsequent writes. - */ -struct xfs_zone_cache_item { - struct xfs_mru_cache_elem mru; - struct xfs_open_zone *oz; -}; - -static inline struct xfs_zone_cache_item * -xfs_zone_cache_item(struct xfs_mru_cache_elem *mru) -{ - return container_of(mru, struct xfs_zone_cache_item, mru); -} - -static void -xfs_zone_cache_free_func( - void *data, - struct xfs_mru_cache_elem *mru) -{ - struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru); - - xfs_open_zone_put(item->oz); - kfree(item); -} - -/* * Check if we have a cached last open zone available for the inode and * if yes return a reference to it. */ static struct xfs_open_zone * -xfs_cached_zone( - struct xfs_mount *mp, - struct xfs_inode *ip) +xfs_get_cached_zone( + struct xfs_inode *ip) { - struct xfs_mru_cache_elem *mru; - struct xfs_open_zone *oz; + struct xfs_open_zone *oz; - mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino); - if (!mru) - return NULL; - oz = xfs_zone_cache_item(mru)->oz; + rcu_read_lock(); + oz = VFS_I(ip)->i_private; if (oz) { /* * GC only steals open zones at mount time, so no GC zones * should end up in the cache. */ ASSERT(!oz->oz_is_gc); - ASSERT(atomic_read(&oz->oz_ref) > 0); - atomic_inc(&oz->oz_ref); + if (!atomic_inc_not_zero(&oz->oz_ref)) + oz = NULL; } - xfs_mru_cache_done(mp->m_zone_cache); + rcu_read_unlock(); + return oz; } /* - * Update the last used zone cache for a given inode. + * Stash our zone in the inode so that is is reused for future allocations. * - * The caller must have a reference on the open zone. + * The open_zone structure will be pinned until either the inode is freed or + * until the cached open zone is replaced with a different one because the + * current one was full when we tried to use it. This means we keep any + * open zone around forever as long as any inode that used it for the last + * write is cached, which slightly increases the memory use of cached inodes + * that were every written to, but significantly simplifies the cached zone + * lookup. Because the open_zone is clearly marked as full when all data + * in the underlying RTG was written, the caching is always safe. */ static void -xfs_zone_cache_create_association( - struct xfs_inode *ip, - struct xfs_open_zone *oz) +xfs_set_cached_zone( + struct xfs_inode *ip, + struct xfs_open_zone *oz) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_zone_cache_item *item = NULL; - struct xfs_mru_cache_elem *mru; + struct xfs_open_zone *old_oz; - ASSERT(atomic_read(&oz->oz_ref) > 0); atomic_inc(&oz->oz_ref); - - mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino); - if (mru) { - /* - * If we have an association already, update it to point to the - * new zone. - */ - item = xfs_zone_cache_item(mru); - xfs_open_zone_put(item->oz); - item->oz = oz; - xfs_mru_cache_done(mp->m_zone_cache); - return; - } - - item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) { - xfs_open_zone_put(oz); - return; - } - item->oz = oz; - xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); + old_oz = xchg(&VFS_I(ip)->i_private, oz); + if (old_oz) + xfs_open_zone_put(old_oz); } static void @@ -880,15 +864,14 @@ xfs_zone_alloc_and_submit( * the inode is still associated with a zone and use that if so. */ if (!*oz) - *oz = xfs_cached_zone(mp, ip); + *oz = xfs_get_cached_zone(ip); if (!*oz) { select_zone: *oz = xfs_select_zone(mp, write_hint, pack_tight); if (!*oz) goto out_error; - - xfs_zone_cache_create_association(ip, *oz); + xfs_set_cached_zone(ip, *oz); } alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size), @@ -966,6 +949,12 @@ xfs_free_open_zones( xfs_open_zone_put(oz); } spin_unlock(&zi->zi_open_zones_lock); + + /* + * Wait for all open zones to be freed so that they drop the group + * references: + */ + rcu_barrier(); } struct xfs_init_zones { @@ -1260,8 +1249,10 @@ xfs_mount_zones( while ((rtg = xfs_rtgroup_next(mp, rtg))) { error = xfs_init_zone(&iz, rtg, NULL); - if (error) + if (error) { + xfs_rtgroup_rele(rtg); goto out_free_zone_info; + } } } @@ -1279,14 +1270,6 @@ xfs_mount_zones( error = xfs_zone_gc_mount(mp); if (error) goto out_free_zone_info; - - /* - * Set up a mru cache to track inode to open zone for data placement - * purposes. The magic values for group count and life time is the - * same as the defaults for file streams, which seems sane enough. - */ - xfs_mru_cache_create(&mp->m_zone_cache, mp, - 5000, 10, xfs_zone_cache_free_func); return 0; out_free_zone_info: @@ -1300,5 +1283,4 @@ xfs_unmount_zones( { xfs_zone_gc_unmount(mp); xfs_free_zone_info(mp->m_zone_info); - xfs_mru_cache_destroy(mp->m_zone_cache); } diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 064cd1a857a0..4ade54445532 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -114,6 +114,8 @@ struct xfs_gc_bio { /* Open Zone being written to */ struct xfs_open_zone *oz; + struct xfs_rtgroup *victim_rtg; + /* Bio used for reads and writes, including the bvec used by it */ struct bio_vec bv; struct bio bio; /* must be last */ @@ -264,6 +266,7 @@ xfs_zone_gc_iter_init( iter->rec_count = 0; iter->rec_idx = 0; iter->victim_rtg = victim_rtg; + atomic_inc(&victim_rtg->rtg_gccount); } /* @@ -362,6 +365,7 @@ xfs_zone_gc_query( return 0; done: + atomic_dec(&iter->victim_rtg->rtg_gccount); xfs_rtgroup_rele(iter->victim_rtg); iter->victim_rtg = NULL; return 0; @@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from( if (!rtg) continue; + /* + * If the zone is already undergoing GC, don't pick it again. + * + * This prevents us from picking one of the zones for which we + * already submitted GC I/O, but for which the remapping hasn't + * concluded yet. This won't cause data corruption, but + * increases write amplification and slows down GC, so this is + * a bad thing. + */ + if (atomic_read(&rtg->rtg_gccount)) { + xfs_rtgroup_rele(rtg); + continue; + } + /* skip zones that are just waiting for a reset */ if (rtg_rmap(rtg)->i_used_blocks == 0 || rtg_rmap(rtg)->i_used_blocks >= victim_used) { @@ -491,21 +509,6 @@ xfs_zone_gc_select_victim( struct xfs_rtgroup *victim_rtg = NULL; unsigned int bucket; - if (xfs_is_shutdown(mp)) - return false; - - if (iter->victim_rtg) - return true; - - /* - * Don't start new work if we are asked to stop or park. - */ - if (kthread_should_stop() || kthread_should_park()) - return false; - - if (!xfs_zoned_need_gc(mp)) - return false; - spin_lock(&zi->zi_used_buckets_lock); for (bucket = 0; bucket < XFS_ZONE_USED_BUCKETS; bucket++) { victim_rtg = xfs_zone_gc_pick_victim_from(mp, bucket); @@ -703,6 +706,9 @@ xfs_zone_gc_start_chunk( chunk->scratch = &data->scratch[data->scratch_idx]; chunk->data = data; chunk->oz = oz; + chunk->victim_rtg = iter->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); bio->bi_end_io = xfs_zone_gc_end_io; @@ -725,6 +731,8 @@ static void xfs_zone_gc_free_chunk( struct xfs_gc_bio *chunk) { + atomic_dec(&chunk->victim_rtg->rtg_gccount); + xfs_rtgroup_rele(chunk->victim_rtg); list_del(&chunk->entry); xfs_open_zone_put(chunk->oz); xfs_irele(chunk->ip); @@ -785,6 +793,10 @@ xfs_zone_gc_split_write( split_chunk->oz = chunk->oz; atomic_inc(&chunk->oz->oz_ref); + split_chunk->victim_rtg = chunk->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); + chunk->offset += split_len; chunk->len -= split_len; chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len); @@ -975,6 +987,27 @@ xfs_zone_gc_reset_zones( } while (next); } +static bool +xfs_zone_gc_should_start_new_work( + struct xfs_zone_gc_data *data) +{ + if (xfs_is_shutdown(data->mp)) + return false; + if (!xfs_zone_gc_space_available(data)) + return false; + + if (!data->iter.victim_rtg) { + if (kthread_should_stop() || kthread_should_park()) + return false; + if (!xfs_zoned_need_gc(data->mp)) + return false; + if (!xfs_zone_gc_select_victim(data)) + return false; + } + + return true; +} + /* * Handle the work to read and write data for GC and to reset the zones, * including handling all completions. @@ -982,7 +1015,7 @@ xfs_zone_gc_reset_zones( * Note that the order of the chunks is preserved so that we don't undo the * optimal order established by xfs_zone_gc_query(). */ -static bool +static void xfs_zone_gc_handle_work( struct xfs_zone_gc_data *data) { @@ -996,30 +1029,22 @@ xfs_zone_gc_handle_work( zi->zi_reset_list = NULL; spin_unlock(&zi->zi_reset_list_lock); - if (!xfs_zone_gc_select_victim(data) || - !xfs_zone_gc_space_available(data)) { - if (list_empty(&data->reading) && - list_empty(&data->writing) && - list_empty(&data->resetting) && - !reset_list) - return false; - } - - __set_current_state(TASK_RUNNING); - try_to_freeze(); - - if (reset_list) + if (reset_list) { + set_current_state(TASK_RUNNING); xfs_zone_gc_reset_zones(data, reset_list); + } list_for_each_entry_safe(chunk, next, &data->resetting, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_finish_reset(chunk); } list_for_each_entry_safe(chunk, next, &data->writing, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_finish_chunk(chunk); } @@ -1027,15 +1052,18 @@ xfs_zone_gc_handle_work( list_for_each_entry_safe(chunk, next, &data->reading, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_write_chunk(chunk); } blk_finish_plug(&plug); - blk_start_plug(&plug); - while (xfs_zone_gc_start_chunk(data)) - ; - blk_finish_plug(&plug); - return true; + if (xfs_zone_gc_should_start_new_work(data)) { + set_current_state(TASK_RUNNING); + blk_start_plug(&plug); + while (xfs_zone_gc_start_chunk(data)) + ; + blk_finish_plug(&plug); + } } /* @@ -1059,8 +1087,18 @@ xfs_zoned_gcd( for (;;) { set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE); xfs_set_zonegc_running(mp); - if (xfs_zone_gc_handle_work(data)) + + xfs_zone_gc_handle_work(data); + + /* + * Only sleep if nothing set the state to running. Else check for + * work again as someone might have queued up more work and woken + * us in the meantime. + */ + if (get_current_state() == TASK_RUNNING) { + try_to_freeze(); continue; + } if (list_empty(&data->reading) && list_empty(&data->writing) && diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h index 35e6de3d25ed..4322e26dd99a 100644 --- a/fs/xfs/xfs_zone_priv.h +++ b/fs/xfs/xfs_zone_priv.h @@ -44,6 +44,8 @@ struct xfs_open_zone { * the life time of an open zone. */ struct xfs_rtgroup *oz_rtg; + + struct rcu_head oz_rcu; }; /* diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a9a2e732a65..e04d56a5332e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -832,7 +832,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) /* Required sections not related to debugging. */ #define ELF_DETAILS \ - .modinfo : { *(.modinfo) } \ + .modinfo : { *(.modinfo) . = ALIGN(8); } \ .comment 0 : { *(.comment) } \ .symtab 0 : { *(.symtab) } \ .strtab 0 : { *(.strtab) } \ diff --git a/include/drm/Makefile b/include/drm/Makefile index 1df6962556ef..48fae3f167c7 100644 --- a/include/drm/Makefile +++ b/include/drm/Makefile @@ -11,7 +11,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = \ $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \ - PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ + PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 8890ded1d907..476990e761f8 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1078,7 +1078,7 @@ struct drm_gpuva_ops { */ struct drm_gpuvm_map_req { /** - * @op_map: struct drm_gpuva_op_map + * @map: struct drm_gpuva_op_map */ struct drm_gpuva_op_map map; }; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 681cf0c8b9df..7310841f4512 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,8 +51,6 @@ struct arch_timer_vm_data { }; struct arch_timer_context { - struct kvm_vcpu *vcpu; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; u64 ns_frac; @@ -71,6 +69,9 @@ struct arch_timer_context { bool level; } irq; + /* Who am I? */ + enum kvm_arch_timers timer_id; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; }; @@ -106,9 +107,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); void kvm_timer_init_vm(struct kvm *kvm); -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); -int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); - int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); @@ -127,9 +125,9 @@ void kvm_timer_init_vhe(void); #define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER]) #define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) -#define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) - -#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) +#define arch_timer_ctx_index(ctx) ((ctx)->timer_id) +#define timer_context_to_vcpu(ctx) container_of((ctx), struct kvm_vcpu, arch.timer_cpu.timers[(ctx)->timer_id]) +#define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data)) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, @@ -178,4 +176,14 @@ static inline u64 timer_get_offset(struct arch_timer_context *ctxt) return offset; } +static inline void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) +{ + if (!ctxt->offset.vm_offset) { + WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt)); + return; + } + + WRITE_ONCE(*ctxt->offset.vm_offset, offset); +} + #endif diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cd7ee4df9045..81e603839c4a 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -338,6 +338,7 @@ struct ffa_mem_region_attributes { * an `struct ffa_mem_region_addr_range`. */ u32 composite_off; + u8 impdef_val[16]; u64 reserved; }; @@ -417,15 +418,31 @@ struct ffa_mem_region { #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) +#define FFA_EMAD_HAS_IMPDEF_FIELD(version) ((version) >= FFA_VERSION_1_2) +#define FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version) ((version) > FFA_VERSION_1_0) + +static inline u32 ffa_emad_size_get(u32 ffa_version) +{ + u32 sz; + struct ffa_mem_region_attributes *ep_mem_access; + + if (FFA_EMAD_HAS_IMPDEF_FIELD(ffa_version)) + sz = sizeof(*ep_mem_access); + else + sz = sizeof(*ep_mem_access) - sizeof(ep_mem_access->impdef_val); + + return sz; +} + static inline u32 ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) { - u32 offset = count * sizeof(struct ffa_mem_region_attributes); + u32 offset = count * ffa_emad_size_get(ffa_version); /* * Earlier to v1.1, the endpoint memory descriptor array started at * offset 32(i.e. offset of ep_mem_offset in the current structure) */ - if (ffa_version <= FFA_VERSION_1_0) + if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(ffa_version)) offset += offsetof(struct ffa_mem_region, ep_mem_offset); else offset += sizeof(struct ffa_mem_region); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8e8d1cc8b06c..44c30183ecc3 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -341,15 +341,15 @@ enum req_op { /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ - REQ_OP_ZONE_OPEN = (__force blk_opf_t)10, + REQ_OP_ZONE_OPEN = (__force blk_opf_t)11, /* Close a zone */ - REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, + REQ_OP_ZONE_CLOSE = (__force blk_opf_t)13, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = (__force blk_opf_t)13, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)15, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = (__force blk_opf_t)15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)17, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)19, /* Driver private requests */ REQ_OP_DRV_IN = (__force blk_opf_t)34, @@ -478,6 +478,7 @@ static inline bool op_is_zone_mgmt(enum req_op op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a98c83346134..d808253f2e94 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2499,6 +2499,8 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid, #ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); +void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, + int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags); @@ -2511,6 +2513,8 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, */ #define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ kmalloc_node(_size, _flags, _node) +#define bpf_map_kmalloc_nolock(_map, _size, _flags, _node) \ + kmalloc_nolock(_size, _flags, _node) #define bpf_map_kzalloc(_map, _size, _flags) \ kzalloc(_size, _flags) #define bpf_map_kvcalloc(_map, _n, _size, _flags) \ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93318fce31f3..b760a3c470a5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -452,7 +452,7 @@ struct cgroup_freezer_state { int nr_frozen_tasks; /* Freeze time data consistency protection */ - seqcount_t freeze_seq; + seqcount_spinlock_t freeze_seq; /* * Most recent time the cgroup was requested to freeze. diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 59288a2c1ad2..0a1b9598940d 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -250,10 +250,9 @@ struct ftrace_likely_data { /* * GCC does not warn about unused static inline functions for -Wunused-function. * Suppress the warning in clang as well by using __maybe_unused, but enable it - * for W=1 build. This will allow clang to find unused functions. Remove the - * __inline_maybe_unused entirely after fixing most of -Wunused-function warnings. + * for W=2 build. This will allow clang to find unused functions. */ -#ifdef KBUILD_EXTRA_WARN1 +#ifdef KBUILD_EXTRA_WARN2 #define __inline_maybe_unused #else #define __inline_maybe_unused __maybe_unused @@ -461,6 +460,12 @@ struct ftrace_likely_data { # define __nocfi #endif +#if defined(CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS) +# define __nocfi_generic __nocfi +#else +# define __nocfi_generic +#endif + /* * Any place that could be marked with the "alloc_size" attribute is also * a place to be marked with the "malloc" attribute, except those that may diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 8248ff9363ee..2ceda49c609f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -90,7 +90,7 @@ */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) -#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +#define DMA_BIT_MASK(n) GENMASK_ULL(n - 1, 0) struct dma_iova_state { dma_addr_t addr; diff --git a/include/linux/entry-virt.h b/include/linux/entry-virt.h index 42c89e3e5ca7..bfa767702d9a 100644 --- a/include/linux/entry-virt.h +++ b/include/linux/entry-virt.h @@ -32,7 +32,7 @@ */ static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); -#ifndef arch_xfer_to_guest_mode_work +#ifndef arch_xfer_to_guest_mode_handle_work static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) { return 0; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c2d8b4ec62eb..5c9162193d26 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -492,7 +492,7 @@ struct ethtool_pause_stats { }; #define ETHTOOL_MAX_LANES 8 -/** +/* * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for * the end-of-list marker, total 17 items */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d0cf10d5e0f7..f0cf2714ec52 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -320,9 +320,6 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { - if (!nop) - return false; - /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -330,6 +327,10 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, if (fh_flags & EXPORT_FH_FID) return exportfs_can_encode_fid(nop); + /* Normal file handles cannot be created without export ops */ + if (!nop) + return false; + /* * If a connectable file handle was requested, we need to make sure that * filesystem can also decode connected file handles. diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 81f0e698acbf..f206370060e1 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -18,6 +18,7 @@ void fbcon_suspended(struct fb_info *info); void fbcon_resumed(struct fb_info *info); int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode); +void fbcon_delete_modelist(struct list_head *head); void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); @@ -38,6 +39,7 @@ static inline void fbcon_suspended(struct fb_info *info) {} static inline void fbcon_resumed(struct fb_info *info) {} static inline int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { return 0; } +static inline void fbcon_delete_modelist(struct list_head *head) {} static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} diff --git a/include/linux/filter.h b/include/linux/filter.h index f5c859b8131a..973233b82dc1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -901,6 +901,26 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) cb->data_end = skb->data + skb_headlen(skb); } +static inline int bpf_prog_run_data_pointers( + const struct bpf_prog *prog, + struct sk_buff *skb) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + void *save_data_meta, *save_data_end; + int res; + + save_data_meta = cb->data_meta; + save_data_end = cb->data_end; + + bpf_compute_data_pointers(skb); + res = bpf_prog_run(prog, skb); + + cb->data_meta = save_data_meta; + cb->data_end = save_data_end; + + return res; +} + /* Similar to bpf_compute_data_pointers(), except that save orginal * data in cb->data and cb->meta_data for restore. */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7ded7df6e9b5..07f8c309e432 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -193,6 +193,10 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs #if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \ defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS) +#ifndef arch_ftrace_partial_regs +#define arch_ftrace_partial_regs(regs) do {} while (0) +#endif + static __always_inline struct pt_regs * ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) { @@ -202,7 +206,11 @@ ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) * Since arch_ftrace_get_regs() will check some members and may return * NULL, we can not use it. */ - return &arch_ftrace_regs(fregs)->regs; + regs = &arch_ftrace_regs(fregs)->regs; + + /* Allow arch specific updates to regs. */ + arch_ftrace_partial_regs(regs); + return regs; } #endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0ceb4e09306c..623bee335383 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -7,6 +7,7 @@ #include <linux/mmzone.h> #include <linux/topology.h> #include <linux/alloc_tag.h> +#include <linux/cleanup.h> #include <linux/sched.h> struct vm_area_struct; @@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, /* This should be paired with folio_put() rather than free_contig_range(). */ #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) +DEFINE_FREE(free_page, void *, free_page((unsigned long)_T)) + #endif /* __LINUX_GFP_H */ diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h index 622a2939ebe0..87983a5f3681 100644 --- a/include/linux/gpio/regmap.h +++ b/include/linux/gpio/regmap.h @@ -38,6 +38,10 @@ struct regmap; * offset to a register/bitmask pair. If not * given the default gpio_regmap_simple_xlate() * is used. + * @fixed_direction_output: + * (Optional) Bitmap representing the fixed direction of + * the GPIO lines. Useful when there are GPIO lines with a + * fixed direction mixed together in the same register. * @drvdata: (Optional) Pointer to driver specific data which is * not used by gpio-remap but is provided "as is" to the * driver callback(s). @@ -85,6 +89,7 @@ struct gpio_regmap_config { int reg_stride; int ngpio_per_reg; struct irq_domain *irq_domain; + unsigned long *fixed_direction_output; #ifdef CONFIG_REGMAP_IRQ struct regmap_irq_chip *regmap_irq_chip; diff --git a/include/linux/hid.h b/include/linux/hid.h index e1b673ad7457..a4ddb94e3ee5 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1292,4 +1292,15 @@ void hid_quirks_exit(__u16 bus); #define hid_dbg_once(hid, fmt, ...) \ dev_dbg_once(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_err_ratelimited(hid, fmt, ...) \ + dev_err_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_notice_ratelimited(hid, fmt, ...) \ + dev_notice_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_warn_ratelimited(hid, fmt, ...) \ + dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_info_ratelimited(hid, fmt, ...) \ + dev_info_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_dbg_ratelimited(hid, fmt, ...) \ + dev_dbg_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) + #endif diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f327d62fc985..71ac78b9f834 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -376,45 +376,30 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, int folio_split(struct folio *folio, unsigned int new_order, struct page *page, struct list_head *list); /* - * try_folio_split - try to split a @folio at @page using non uniform split. + * try_folio_split_to_order - try to split a @folio at @page to @new_order using + * non uniform split. * @folio: folio to be split - * @page: split to order-0 at the given page - * @list: store the after-split folios + * @page: split to @new_order at the given page + * @new_order: the target split order * - * Try to split a @folio at @page using non uniform split to order-0, if - * non uniform split is not supported, fall back to uniform split. + * Try to split a @folio at @page using non uniform split to @new_order, if + * non uniform split is not supported, fall back to uniform split. After-split + * folios are put back to LRU list. Use min_order_for_split() to get the lower + * bound of @new_order. * * Return: 0: split is successful, otherwise split failed. */ -static inline int try_folio_split(struct folio *folio, struct page *page, - struct list_head *list) +static inline int try_folio_split_to_order(struct folio *folio, + struct page *page, unsigned int new_order) { - int ret = min_order_for_split(folio); - - if (ret < 0) - return ret; - - if (!non_uniform_split_supported(folio, 0, false)) - return split_huge_page_to_list_to_order(&folio->page, list, - ret); - return folio_split(folio, ret, page, list); + if (!non_uniform_split_supported(folio, new_order, /* warns= */ false)) + return split_huge_page_to_list_to_order(&folio->page, NULL, + new_order); + return folio_split(folio, new_order, page, NULL); } static inline int split_huge_page(struct page *page) { - struct folio *folio = page_folio(page); - int ret = min_order_for_split(folio); - - if (ret < 0) - return ret; - - /* - * split_huge_page() locks the page before splitting and - * expects the same page that has been split to be locked when - * returned. split_folio(page_folio(page)) cannot be used here - * because it converts the page to folio and passes the head - * page to be split. - */ - return split_huge_page_to_list_to_order(page, NULL, ret); + return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio, bool partially_mapped); @@ -597,14 +582,20 @@ static inline int split_huge_page(struct page *page) return -EINVAL; } +static inline int min_order_for_split(struct folio *folio) +{ + VM_WARN_ON_ONCE_FOLIO(1, folio); + return -EINVAL; +} + static inline int split_folio_to_list(struct folio *folio, struct list_head *list) { VM_WARN_ON_ONCE_FOLIO(1, folio); return -EINVAL; } -static inline int try_folio_split(struct folio *folio, struct page *page, - struct list_head *list) +static inline int try_folio_split_to_order(struct folio *folio, + struct page *page, unsigned int new_order) { VM_WARN_ON_ONCE_FOLIO(1, folio); return -EINVAL; diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h index 34e615c76ca5..c4403eeb7144 100644 --- a/include/linux/hung_task.h +++ b/include/linux/hung_task.h @@ -20,6 +20,10 @@ * always zero. So we can use these bits to encode the specific blocking * type. * + * Note that on architectures where this is not guaranteed, or for any + * unaligned lock, this tracking mechanism is silently skipped for that + * lock. + * * Type encoding: * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) @@ -45,7 +49,7 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type) * If the lock pointer matches the BLOCKER_TYPE_MASK, return * without writing anything. */ - if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK)) + if (lock_ptr & BLOCKER_TYPE_MASK) return; WRITE_ONCE(current->blocker, lock_ptr | type); @@ -53,8 +57,6 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type) static inline void hung_task_clear_blocker(void) { - WARN_ON_ONCE(!READ_ONCE(current->blocker)); - WRITE_ONCE(current->blocker, 0UL); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fa36e70df088..5bd76cf394fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -729,7 +729,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) #endif #ifdef CONFIG_KVM_GUEST_MEMFD -bool kvm_arch_supports_gmem_mmap(struct kvm *kvm); +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm); + +static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm) +{ + u64 flags = GUEST_MEMFD_FLAG_MMAP; + + if (!kvm || kvm_arch_supports_gmem_init_shared(kvm)) + flags |= GUEST_MEMFD_FLAG_INIT_SHARED; + + return flags; +} #endif #ifndef kvm_arch_has_readonly_mem diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h index 62674c83bde4..48e2ff95332f 100644 --- a/include/linux/map_benchmark.h +++ b/include/linux/map_benchmark.h @@ -27,5 +27,6 @@ struct map_benchmark { __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ }; #endif /* _KERNEL_DMA_BENCHMARK_H */ diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 71cf5bfc6349..0cb36a3ffc47 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -19,7 +19,7 @@ enum misc_res_type { MISC_CG_RES_SEV_ES, #endif #ifdef CONFIG_INTEL_TDX_HOST - /* Intel TDX HKIDs resource */ + /** @MISC_CG_RES_TDX: Intel TDX HKIDs resource */ MISC_CG_RES_TDX, #endif /** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */ diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 7ef2c7c7d803..9d47cdc727ad 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -183,6 +183,7 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq) complete(&cq->free); } +void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe); int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 07614cd95bed..1b0b36aa2a76 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10833,7 +10833,9 @@ struct mlx5_ifc_pcam_regs_5000_to_507f_bits { u8 port_access_reg_cap_mask_127_to_96[0x20]; u8 port_access_reg_cap_mask_95_to_64[0x20]; - u8 port_access_reg_cap_mask_63_to_36[0x1c]; + u8 port_access_reg_cap_mask_63[0x1]; + u8 pphcr[0x1]; + u8 port_access_reg_cap_mask_61_to_36[0x1a]; u8 pplm[0x1]; u8 port_access_reg_cap_mask_34_to_32[0x3]; diff --git a/include/linux/mm.h b/include/linux/mm.h index d16b33bacc32..7c79b3369b82 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2074,7 +2074,7 @@ static inline unsigned long folio_nr_pages(const struct folio *folio) return folio_large_nr_pages(folio); } -#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) +#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS) /* * We don't expect any folios that exceed buddy sizes (and consequently * memory sections). @@ -2087,10 +2087,17 @@ static inline unsigned long folio_nr_pages(const struct folio *folio) * pages are guaranteed to be contiguous. */ #define MAX_FOLIO_ORDER PFN_SECTION_SHIFT -#else +#elif defined(CONFIG_HUGETLB_PAGE) /* * There is no real limit on the folio size. We limit them to the maximum we - * currently expect (e.g., hugetlb, dax). + * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect + * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit. + */ +#define MAX_FOLIO_ORDER get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G) +#else +/* + * Without hugetlb, gigantic folios that are bigger than a single PUD are + * currently impossible. */ #define MAX_FOLIO_ORDER PUD_ORDER #endif diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h index 36b13fabca9e..7273c78c826b 100644 --- a/include/linux/net/intel/libie/fwlog.h +++ b/include/linux/net/intel/libie/fwlog.h @@ -78,8 +78,20 @@ struct libie_fwlog { ); }; +#if IS_ENABLED(CONFIG_LIBIE_FWLOG) int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api); void libie_fwlog_deinit(struct libie_fwlog *fwlog); void libie_fwlog_reregister(struct libie_fwlog *fwlog); void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len); +#else +static inline int libie_fwlog_init(struct libie_fwlog *fwlog, + struct libie_fwlog_api *api) +{ + return -EOPNOTSUPP; +} +static inline void libie_fwlog_deinit(struct libie_fwlog *fwlog) { } +static inline void libie_fwlog_reregister(struct libie_fwlog *fwlog) { } +static inline void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, + u16 len) { } +#endif /* CONFIG_LIBIE_FWLOG */ #endif /* _LIBIE_FWLOG_H_ */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d56583572c98..31463286402f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1659,6 +1659,7 @@ struct nfs_pgio_header { void *netfs; #endif + unsigned short retrans; int pnfs_error; int error; /* merge with pnfs_error */ unsigned int good_bytes; /* boundary of good data */ diff --git a/include/linux/pci.h b/include/linux/pci.h index d1fdf81fbe1e..bf97d49c23cf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -412,6 +412,8 @@ struct pci_dev { u16 l1ss; /* L1SS Capability pointer */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ + unsigned int aspm_l0s_support:1; /* ASPM L0s support */ + unsigned int aspm_l1_support:1; /* ASPM L1 support */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ #endif diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 1571e9157fa5..b1b837583d54 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -100,7 +100,6 @@ struct int3472_gpio_regulator { struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2]; char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH]; char regulator_name[GPIO_REGULATOR_NAME_LENGTH]; - struct gpio_desc *ena_gpio; struct regulator_dev *rdev; struct regulator_desc rdesc; }; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index a3f44f6c2da1..0b436e15f4cd 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -629,13 +629,13 @@ DEFINE_GUARD(pm_runtime_active_auto, struct device *, * device. */ DEFINE_GUARD_COND(pm_runtime_active, _try, - pm_runtime_get_active(_T, RPM_TRANSPARENT)) + pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active, _try_enabled, - pm_runtime_resume_and_get(_T)) + pm_runtime_resume_and_get(_T), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active_auto, _try, - pm_runtime_get_active(_T, RPM_TRANSPARENT)) + pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled, - pm_runtime_resume_and_get(_T)) + pm_runtime_resume_and_get(_T), _RET == 0) /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4e1ac1fbcec4..55343795644b 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1643,7 +1643,7 @@ struct regmap_irq_chip_data; * @status_invert: Inverted status register: cleared bits are active interrupts. * @status_is_level: Status register is actuall signal level: Xor status * register with previous value to get active interrupts. - * @wake_invert: Inverted wake register: cleared bits are wake enabled. + * @wake_invert: Inverted wake register: cleared bits are wake disabled. * @type_in_mask: Use the mask registers for controlling irq type. Use this if * the hardware provides separate bits for rising/falling edge * or low/high level interrupts and they should be combined into diff --git a/include/linux/rpmb.h b/include/linux/rpmb.h index cccda73eea4d..ed3f8e431eff 100644 --- a/include/linux/rpmb.h +++ b/include/linux/rpmb.h @@ -61,6 +61,50 @@ struct rpmb_dev { #define to_rpmb_dev(x) container_of((x), struct rpmb_dev, dev) +/** + * struct rpmb_frame - RPMB frame structure for authenticated access + * + * @stuff : stuff bytes, a padding/reserved area of 196 bytes at the + * beginning of the RPMB frame. They don’t carry meaningful + * data but are required to make the frame exactly 512 bytes. + * @key_mac : The authentication key or the message authentication + * code (MAC) depending on the request/response type. + * The MAC will be delivered in the last (or the only) + * block of data. + * @data : Data to be written or read by signed access. + * @nonce : Random number generated by the host for the requests + * and copied to the response by the RPMB engine. + * @write_counter: Counter value for the total amount of the successful + * authenticated data write requests made by the host. + * @addr : Address of the data to be programmed to or read + * from the RPMB. Address is the serial number of + * the accessed block (half sector 256B). + * @block_count : Number of blocks (half sectors, 256B) requested to be + * read/programmed. + * @result : Includes information about the status of the write counter + * (valid, expired) and result of the access made to the RPMB. + * @req_resp : Defines the type of request and response to/from the memory. + * + * The stuff bytes and big-endian properties are modeled to fit to the spec. + */ +struct rpmb_frame { + u8 stuff[196]; + u8 key_mac[32]; + u8 data[256]; + u8 nonce[16]; + __be32 write_counter; + __be16 addr; + __be16 block_count; + __be16 result; + __be16 req_resp; +}; + +#define RPMB_PROGRAM_KEY 0x1 /* Program RPMB Authentication Key */ +#define RPMB_GET_WRITE_COUNTER 0x2 /* Read RPMB write counter */ +#define RPMB_WRITE_DATA 0x3 /* Write data to RPMB partition */ +#define RPMB_READ_DATA 0x4 /* Read data from RPMB partition */ +#define RPMB_RESULT_READ 0x5 /* Read result request (Internal) */ + #if IS_ENABLED(CONFIG_RPMB) struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev); void rpmb_dev_put(struct rpmb_dev *rdev); diff --git a/include/linux/sched.h b/include/linux/sched.h index cbb7340c5866..b469878de25c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2407,12 +2407,12 @@ static inline void __migrate_enable(void) { } * be defined in kernel/sched/core.c. */ #ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE -static inline void migrate_disable(void) +static __always_inline void migrate_disable(void) { __migrate_disable(); } -static inline void migrate_enable(void) +static __always_inline void migrate_enable(void) { __migrate_enable(); } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fb3fec9affaa..a7cc3d1f4fd1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4204,6 +4204,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); +__poll_t datagram_poll_queue(struct file *file, struct socket *sock, + struct poll_table_struct *wait, + struct sk_buff_head *rcv_queue); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 20e0584db1dd..b673c31569f3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -401,6 +401,11 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (!tnl_hdr_negotiated) return -EINVAL; + vhdr->hash_hdr.hash_value_lo = 0; + vhdr->hash_hdr.hash_value_hi = 0; + vhdr->hash_hdr.hash_report = 0; + vhdr->hash_hdr.padding = 0; + /* Let the basic parsing deal with plain GSO features. */ skb_shinfo(skb)->gso_type &= ~tnl_gso_type; ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 9ecc70baaca9..cb4c02d00759 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -434,6 +434,7 @@ enum { HCI_USER_CHANNEL, HCI_EXT_CONFIGURED, HCI_LE_ADV, + HCI_LE_ADV_0, HCI_LE_PER_ADV, HCI_LE_SCAN, HCI_SSP_ENABLED, @@ -2782,6 +2783,11 @@ struct hci_ev_le_per_adv_report { __u8 data[]; } __packed; +#define HCI_EV_LE_PA_SYNC_LOST 0x10 +struct hci_ev_le_pa_sync_lost { + __le16 handle; +} __packed; + #define LE_PA_DATA_COMPLETE 0x00 #define LE_PA_DATA_MORE_TO_COME 0x01 #define LE_PA_DATA_TRUNCATED 0x02 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2924c2bf2a98..b8100dbfe5d7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -244,6 +244,7 @@ struct adv_info { bool enabled; bool pending; bool periodic; + bool periodic_enabled; __u8 mesh; __u8 instance; __u8 handle; diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 4bb0eaedda18..00e182a22720 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -38,8 +38,8 @@ #define L2CAP_DEFAULT_TX_WINDOW 63 #define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF #define L2CAP_DEFAULT_MAX_TX 3 -#define L2CAP_DEFAULT_RETRANS_TO 2 /* seconds */ -#define L2CAP_DEFAULT_MONITOR_TO 12 /* seconds */ +#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */ +#define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */ #define L2CAP_DEFAULT_ACK_TO 200 #define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 74edea06985b..f5be96f08b9d 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -780,7 +780,7 @@ struct mgmt_adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[31]; + __u8 value[HCI_MAX_AD_LENGTH]; } __packed; #define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052 @@ -853,7 +853,7 @@ struct mgmt_cp_set_mesh { __le16 window; __le16 period; __u8 num_ad_types; - __u8 ad_types[]; + __u8 ad_types[] __counted_by(num_ad_types); } __packed; #define MGMT_SET_MESH_RECEIVER_SIZE 6 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 781624f5913a..820e299f06b5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6435,6 +6435,11 @@ static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork, * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can * use just cancel_work() instead of cancel_work_sync(), it requires * being in a section protected by wiphy_lock(). + * + * Note that these are scheduled with a timer where the accuracy + * becomes less the longer in the future the scheduled timer is. Use + * wiphy_hrtimer_work_queue() if the timer must be not be late by more + * than approximately 10 percent. */ void wiphy_delayed_work_queue(struct wiphy *wiphy, struct wiphy_delayed_work *dwork, @@ -6506,6 +6511,79 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy, bool wiphy_delayed_work_pending(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +struct wiphy_hrtimer_work { + struct wiphy_work work; + struct wiphy *wiphy; + struct hrtimer timer; +}; + +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t); + +static inline void wiphy_hrtimer_work_init(struct wiphy_hrtimer_work *hrwork, + wiphy_work_func_t func) +{ + hrtimer_setup(&hrwork->timer, wiphy_hrtimer_work_timer, + CLOCK_BOOTTIME, HRTIMER_MODE_REL); + wiphy_work_init(&hrwork->work, func); +} + +/** + * wiphy_hrtimer_work_queue - queue hrtimer work for the wiphy + * @wiphy: the wiphy to queue for + * @hrwork: the high resolution timer worker + * @delay: the delay given as a ktime_t + * + * Please refer to wiphy_delayed_work_queue(). The difference is that + * the hrtimer work uses a high resolution timer for scheduling. This + * may be needed if timeouts might be scheduled further in the future + * and the accuracy of the normal timer is not sufficient. + * + * Expect a delay of a few milliseconds as the timer is scheduled + * with some slack and some more time may pass between queueing the + * work and its start. + */ +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay); + +/** + * wiphy_hrtimer_work_cancel - cancel previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrtimer: the hrtimer work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrtimer); + +/** + * wiphy_hrtimer_work_flush - flush previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work to flush + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + +/** + * wiphy_hrtimer_work_pending - Find out whether a wiphy hrtimer + * work item is currently pending. + * + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work in question + * + * Return: true if timer is pending, false otherwise + * + * Please refer to the wiphy_delayed_work_pending() documentation as + * this is the equivalent function for hrtimer based delayed work + * items. + */ +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + /** * enum ieee80211_ap_reg_power - regulatory power for an Access Point * diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index bc3507edd589..898723ab62e8 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -513,7 +513,7 @@ struct libeth_xdp_tx_desc { * can't fail, but can send less frames if there's no enough free descriptors * available. The actual free space is returned by @prep from the driver. */ -static __always_inline u32 +static __always_inline __nocfi_generic u32 libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, u32 n, bool unroll, u64 priv, u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), diff --git a/include/net/tcp.h b/include/net/tcp.h index 5ca230ed526a..ab20f549b8f9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,7 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); -void tcp_rcvbuf_grow(struct sock *sk); +void tcp_rcvbuf_grow(struct sock *sk, u32 newval); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); diff --git a/include/net/tls.h b/include/net/tls.h index 857340338b69..c7bcdb3afad7 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -451,25 +451,26 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) /* Log all TLS record header TCP sequences in [seq, seq+len] */ static inline void -tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) +tls_offload_rx_resync_async_request_start(struct tls_offload_resync_async *resync_async, + __be32 seq, u16 len) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); - rx_ctx->resync_async->loglen = 0; - rx_ctx->resync_async->rcd_delta = 0; + resync_async->loglen = 0; + resync_async->rcd_delta = 0; } static inline void -tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq) +tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_async, + __be32 seq) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); +} - atomic64_set(&rx_ctx->resync_async->req, - ((u64)ntohl(seq) << 32) | RESYNC_REQ); +static inline void +tls_offload_rx_resync_async_request_cancel(struct tls_offload_resync_async *resync_async) +{ + atomic64_set(&resync_async->req, 0); } static inline void diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6d6500148c4b..993008cdea65 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -252,8 +252,8 @@ struct scsi_device { unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ - unsigned int ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */ - unsigned int ua_por_ctr; /* Counter for Power On / Reset UAs */ + atomic_t ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */ + atomic_t ua_por_ctr; /* Counter for Power On / Reset UAs */ atomic_t disk_events_disable_depth; /* disable depth for disk events */ @@ -693,10 +693,8 @@ static inline int scsi_device_busy(struct scsi_device *sdev) } /* Macros to access the UNIT ATTENTION counters */ -#define scsi_get_ua_new_media_ctr(sdev) \ - ((const unsigned int)(sdev->ua_new_media_ctr)) -#define scsi_get_ua_por_ctr(sdev) \ - ((const unsigned int)(sdev->ua_por_ctr)) +#define scsi_get_ua_new_media_ctr(sdev) atomic_read(&sdev->ua_new_media_ctr) +#define scsi_get_ua_por_ctr(sdev) atomic_read(&sdev->ua_por_ctr) #define MODULE_ALIAS_SCSI_DEVICE(type) \ MODULE_ALIAS("scsi:t-" __stringify(type) "*") diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index ddd997ac3216..0fbcdb15c74b 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -120,8 +120,11 @@ enum audio_device { TAS2570, TAS2572, TAS2781, + TAS5802, + TAS5815, TAS5825, TAS5827, + TAS5828, TAS_OTHERS, }; diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 9d2c36c6a0ed..6757233bd064 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -218,6 +218,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, __field(__u32, space) __field(__u32, ooo_space) __field(__u32, rcvbuf) + __field(__u32, rcv_ssthresh) + __field(__u32, window_clamp) + __field(__u32, rcv_wnd) __field(__u8, scaling_ratio) __field(__u16, sport) __field(__u16, dport) @@ -245,6 +248,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, tp->rcv_nxt; __entry->rcvbuf = sk->sk_rcvbuf; + __entry->rcv_ssthresh = tp->rcv_ssthresh; + __entry->window_clamp = tp->window_clamp; + __entry->rcv_wnd = tp->rcv_wnd; __entry->scaling_ratio = tp->scaling_ratio; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); @@ -264,11 +270,14 @@ TRACE_EVENT(tcp_rcvbuf_grow, ), TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u " + "rcv_ssthresh=%u window_clamp=%u rcv_wnd=%u " "family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 " "saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx", __entry->time, __entry->rtt_us, __entry->copied, __entry->inq, __entry->space, __entry->ooo_space, __entry->scaling_ratio, __entry->rcvbuf, + __entry->rcv_ssthresh, __entry->window_clamp, + __entry->rcv_wnd, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index cd7402e36b6d..406a42be429b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1555,27 +1555,6 @@ struct drm_amdgpu_info_hw_ip { __u32 userq_num_slots; }; -/* GFX metadata BO sizes and alignment info (in bytes) */ -struct drm_amdgpu_info_uq_fw_areas_gfx { - /* shadow area size */ - __u32 shadow_size; - /* shadow area base virtual mem alignment */ - __u32 shadow_alignment; - /* context save area size */ - __u32 csa_size; - /* context save area base virtual mem alignment */ - __u32 csa_alignment; -}; - -/* IP specific fw related information used in the - * subquery AMDGPU_INFO_UQ_FW_AREAS - */ -struct drm_amdgpu_info_uq_fw_areas { - union { - struct drm_amdgpu_info_uq_fw_areas_gfx gfx; - }; -}; - struct drm_amdgpu_info_num_handles { /** Max handles as supported by firmware for UVD */ __u32 uvd_max_handles; diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index ea91aa8afde9..e527b24bd824 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -979,14 +979,20 @@ extern "C" { * 2 = Gob Height 8, Turing+ Page Kind mapping * 3 = Reserved for future use. * - * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further - * bit remapping step that occurs at an even lower level than the - * page kind and block linear swizzles. This causes the layout of - * surfaces mapped in those SOC's GPUs to be incompatible with the - * equivalent mapping on other GPUs in the same system. - * - * 0 = Tegra K1 - Tegra Parker/TX2 Layout. - * 1 = Desktop GPU and Tegra Xavier+ Layout + * 22:22 s Sector layout. There is a further bit remapping step that occurs + * 26:27 at an even lower level than the page kind and block linear + * swizzles. This causes the bit arrangement of surfaces in memory + * to differ subtly, and prevents direct sharing of surfaces between + * GPUs with different layouts. + * + * 0 = Tegra K1 - Tegra Parker/TX2 Layout + * 1 = Pre-GB20x, GB20x 32+ bpp, GB10, Tegra Xavier-Orin Layout + * 2 = GB20x(Blackwell 2)+ 8 bpp surface layout + * 3 = GB20x(Blackwell 2)+ 16 bpp surface layout + * 4 = Reserved for future use. + * 5 = Reserved for future use. + * 6 = Reserved for future use. + * 7 = Reserved for future use. * * 25:23 c Lossless Framebuffer Compression type. * @@ -1001,7 +1007,7 @@ extern "C" { * 6 = Reserved for future use * 7 = Reserved for future use * - * 55:25 - Reserved for future use. Must be zero. + * 55:28 - Reserved for future use. Must be zero. */ #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ fourcc_mod_code(NVIDIA, (0x10 | \ @@ -1009,6 +1015,7 @@ extern "C" { (((k) & 0xff) << 12) | \ (((g) & 0x3) << 20) | \ (((s) & 0x1) << 22) | \ + (((s) & 0x6) << 25) | \ (((c) & 0x7) << 23))) /* To grandfather in prior block linear format modifiers to the above layout, diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 40ff19f52a8d..517489a7ec60 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1013,6 +1013,20 @@ struct drm_xe_vm_destroy { * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. + * - %DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET - Can be used in combination with + * %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR to reset madvises when the underlying + * CPU address space range is unmapped (typically with munmap(2) or brk(2)). + * The madvise values set with &DRM_IOCTL_XE_MADVISE are reset to the values + * that were present immediately after the &DRM_IOCTL_XE_VM_BIND. + * The reset GPU virtual address range is the intersection of the range bound + * using &DRM_IOCTL_XE_VM_BIND and the virtual CPU address space range + * unmapped. + * This functionality is present to mimic the behaviour of CPU address space + * madvises set using madvise(2), which are typically reset on unmap. + * Note: free(3) may or may not call munmap(2) and/or brk(2), and may thus + * not invoke autoreset. Neither will stack variables going out of scope. + * Therefore it's recommended to always explicitly reset the madvises when + * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call. * * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be: * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in @@ -1119,6 +1133,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) #define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) #define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) +#define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6) /** @flags: Bind flags */ __u32 flags; diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h index cde8f173f566..22acaaec7b1c 100644 --- a/include/uapi/linux/fb.h +++ b/include/uapi/linux/fb.h @@ -319,7 +319,7 @@ enum { #define FB_VBLANK_HAVE_VCOUNT 0x020 /* the vcount field is valid */ #define FB_VBLANK_HAVE_HCOUNT 0x040 /* the hcount field is valid */ #define FB_VBLANK_VSYNCING 0x080 /* currently in a vsync */ -#define FB_VBLANK_HAVE_VSYNC 0x100 /* verical syncs can be detected */ +#define FB_VBLANK_HAVE_VSYNC 0x100 /* vertical syncs can be detected */ struct fb_vblank { __u32 flags; /* FB_VBLANK flags */ diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4a9fbf42aa9f..9cd89bcc1d9c 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -631,6 +631,18 @@ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ +/* + * Keycodes for hotkeys toggling the electronic privacy screen found on some + * laptops on/off. Note when the embedded-controller turns on/off the eprivacy + * screen itself then the state should be reported through drm connecter props: + * https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties + * Except when implementing the drm connecter properties API is not possible + * because e.g. the firmware does not allow querying the presence and/or status + * of the eprivacy screen at boot. + */ +#define KEY_EPRIVACY_SCREEN_ON 0x252 +#define KEY_EPRIVACY_SCREEN_OFF 0x253 + #define KEY_KBDINPUTASSIST_PREV 0x260 #define KEY_KBDINPUTASSIST_NEXT 0x261 #define KEY_KBDINPUTASSIST_PREVGROUP 0x262 diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 263bed13473e..b7c8dad26690 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -689,9 +689,6 @@ enum io_uring_register_op { /* query various aspects of io_uring, see linux/io_uring/query.h */ IORING_REGISTER_QUERY = 35, - /* return zcrx buffers back into circulation */ - IORING_REGISTER_ZCRX_REFILL = 36, - /* this goes last */ IORING_REGISTER_LAST, @@ -1073,15 +1070,6 @@ struct io_uring_zcrx_ifq_reg { __u64 __resv[3]; }; -struct io_uring_zcrx_sync_refill { - __u32 zcrx_id; - /* the number of entries to return */ - __u32 nr_entries; - /* pointer to an array of struct io_uring_zcrx_rqe */ - __u64 rqes; - __u64 __resv[2]; -}; - #ifdef __cplusplus } #endif diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 5d754322a27c..3539ccbfd064 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -36,6 +36,9 @@ struct io_uring_query_opcode { __u64 enter_flags; /* Bitmask of all supported IOSQE_* flags */ __u64 sqe_flags; + /* The number of available query opcodes */ + __u32 nr_query_opcodes; + __u32 __pad; }; #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6efa98a57ec1..52f6000ab020 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -962,7 +962,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 -#define KVM_CAP_GUEST_MEMFD_MMAP 244 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1599,7 +1599,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) -#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 8bf27ab8bcb4..1db45b01532b 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -193,7 +193,8 @@ struct virtio_net_hdr_v1 { struct virtio_net_hdr_v1_hash { struct virtio_net_hdr_v1 hdr; - __le32 hash_value; + __le16 hash_value_lo; + __le16 hash_value_hi; #define VIRTIO_NET_HASH_REPORT_NONE 0 #define VIRTIO_NET_HASH_REPORT_IPv4 1 #define VIRTIO_NET_HASH_REPORT_TCPv4 2 diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9425cfd9d00e..0f95576bf1f6 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -688,6 +688,13 @@ enum ufshcd_quirks { * single doorbell mode. */ UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25, + + /* + * This quirk indicates that DME_LINKSTARTUP should not be issued a 2nd + * time (refer link_startup_again) after the 1st time was successful, + * because it causes link startup to become unreliable. + */ + UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE = 1 << 26, }; enum ufshcd_caps { diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index ff3364531c77..294c75a8a3bd 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings; - struct rusage sq_usage; unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; unsigned int sq_head = READ_ONCE(r->sq.head); unsigned int sq_tail = READ_ONCE(r->sq.tail); @@ -152,14 +151,15 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) * thread termination. */ if (tsk) { + u64 usec; + get_task_struct(tsk); rcu_read_unlock(); - getrusage(tsk, RUSAGE_SELF, &sq_usage); + usec = io_sq_cpu_usec(tsk); put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 - + sq_usage.ru_stime.tv_usec); + sq_total_time = usec; sq_work_time = sq->work_time; } else { rcu_read_unlock(); diff --git a/io_uring/filetable.c b/io_uring/filetable.c index a21660e3145a..794ef95df293 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -57,7 +57,7 @@ void io_free_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table) static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, u32 slot_index) - __must_hold(&req->ctx->uring_lock) + __must_hold(&ctx->uring_lock) { struct io_rsrc_node *node; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 820ef0527666..296667ba712c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -879,7 +879,7 @@ static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags) } static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe, - struct io_big_cqe *big_cqe) + struct io_big_cqe *big_cqe) { struct io_overflow_cqe *ocqe; diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index aad655e38672..a727e020fe03 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -155,6 +155,27 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len, return 1; } +static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags) +{ + /* + * If we came in unlocked, we have no choice but to consume the + * buffer here, otherwise nothing ensures that the buffer won't + * get used by others. This does mean it'll be pinned until the + * IO completes, coming in unlocked means we're being called from + * io-wq context and there may be further retries in async hybrid + * mode. For the locked case, the caller must call commit when + * the transfer completes (or if we get -EAGAIN and must poll of + * retry). + */ + if (issue_flags & IO_URING_F_UNLOCKED) + return true; + + /* uring_cmd commits kbuf upfront, no need to auto-commit */ + if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD) + return true; + return false; +} + static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl, unsigned int issue_flags) @@ -181,17 +202,7 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, sel.buf_list = bl; sel.addr = u64_to_user_ptr(buf->addr); - if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { - /* - * If we came in unlocked, we have no choice but to consume the - * buffer here, otherwise nothing ensures that the buffer won't - * get used by others. This does mean it'll be pinned until the - * IO completes, coming in unlocked means we're being called from - * io-wq context and there may be further retries in async hybrid - * mode. For the locked case, the caller must call commit when - * the transfer completes (or if we get -EAGAIN and must poll of - * retry). - */ + if (io_should_commit(req, issue_flags)) { io_kbuf_commit(req, sel.buf_list, *len, 1); sel.buf_list = NULL; } diff --git a/io_uring/memmap.c b/io_uring/memmap.c index 2e99dffddfc5..add03ca75cb9 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -135,7 +135,7 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg) { - unsigned long size = mr->nr_pages << PAGE_SHIFT; + unsigned long size = (size_t) mr->nr_pages << PAGE_SHIFT; struct page **pages; int nr_pages; diff --git a/io_uring/net.c b/io_uring/net.c index f99b90c762fc..a95cc9ca2a4d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -383,7 +383,7 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; if (sr->flags & IORING_SEND_VECTORIZED) - return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); + return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); } diff --git a/io_uring/query.c b/io_uring/query.c index 645301bd2c82..cf02893ba911 100644 --- a/io_uring/query.c +++ b/io_uring/query.c @@ -20,6 +20,8 @@ static ssize_t io_query_ops(void *data) e->ring_setup_flags = IORING_SETUP_FLAGS; e->enter_flags = IORING_ENTER_FLAGS; e->sqe_flags = SQE_VALID_FLAGS; + e->nr_query_opcodes = __IO_URING_QUERY_MAX; + e->__pad = 0; return sizeof(*e); } diff --git a/io_uring/register.c b/io_uring/register.c index 43f04c47522c..d189b266b8cc 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -421,13 +421,6 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) if (unlikely(ret)) return ret; - /* nothing to do, but copy params back */ - if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) { - if (copy_to_user(arg, &p, sizeof(p))) - return -EFAULT; - return 0; - } - size = rings_size(p.flags, p.sq_entries, p.cq_entries, &sq_array_offset); if (size == SIZE_MAX) @@ -613,6 +606,7 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) if (ret) return ret; if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { + guard(mutex)(&ctx->mmap_lock); io_free_region(ctx, &ctx->param_region); return -EFAULT; } @@ -833,9 +827,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, case IORING_REGISTER_QUERY: ret = io_query(ctx, arg, nr_args); break; - case IORING_REGISTER_ZCRX_REFILL: - ret = io_zcrx_return_bufs(ctx, arg, nr_args); - break; default: ret = -EINVAL; break; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d787c16dc1c3..0010c4992490 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -943,8 +943,8 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, struct req_iterator rq_iter; struct io_mapped_ubuf *imu; struct io_rsrc_node *node; - struct bio_vec bv, *bvec; - u16 nr_bvecs; + struct bio_vec bv; + unsigned int nr_bvecs = 0; int ret = 0; io_ring_submit_lock(ctx, issue_flags); @@ -965,8 +965,11 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, goto unlock; } - nr_bvecs = blk_rq_nr_phys_segments(rq); - imu = io_alloc_imu(ctx, nr_bvecs); + /* + * blk_rq_nr_phys_segments() may overestimate the number of bvecs + * but avoids needing to iterate over the bvecs + */ + imu = io_alloc_imu(ctx, blk_rq_nr_phys_segments(rq)); if (!imu) { kfree(node); ret = -ENOMEM; @@ -977,16 +980,15 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, imu->len = blk_rq_bytes(rq); imu->acct_pages = 0; imu->folio_shift = PAGE_SHIFT; - imu->nr_bvecs = nr_bvecs; refcount_set(&imu->refs, 1); imu->release = release; imu->priv = rq; imu->is_kbuf = true; imu->dir = 1 << rq_data_dir(rq); - bvec = imu->bvec; rq_for_each_bvec(bv, rq, rq_iter) - *bvec++ = bv; + imu->bvec[nr_bvecs++] = bv; + imu->nr_bvecs = nr_bvecs; node->buf = imu; data->nodes[index] = node; @@ -1403,8 +1405,11 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs, size_t max_segs = 0; unsigned i; - for (i = 0; i < nr_iovs; i++) + for (i = 0; i < nr_iovs; i++) { max_segs += (iov[i].iov_len >> shift) + 2; + if (max_segs > INT_MAX) + return -EOVERFLOW; + } return max_segs; } @@ -1510,7 +1515,11 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, if (unlikely(ret)) return ret; } else { - nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + int ret = io_estimate_bvec_size(iov, nr_iovs, imu); + + if (ret < 0) + return ret; + nr_segs = ret; } if (sizeof(struct bio_vec) > sizeof(struct iovec)) { diff --git a/io_uring/rw.c b/io_uring/rw.c index 08882648d569..abe68ba9c9dc 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -463,7 +463,10 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) void io_readv_writev_cleanup(struct io_kiocb *req) { + struct io_async_rw *rw = req->async_data; + lockdep_assert_held(&req->ctx->uring_lock); + io_vec_free(&rw->vec); io_rw_recycle(req, 0); } @@ -542,7 +545,7 @@ static void __io_complete_rw_common(struct io_kiocb *req, long res) { if (res == req->cqe.res) return; - if (res == -EAGAIN && io_rw_should_reissue(req)) { + if ((res == -EOPNOTSUPP || res == -EAGAIN) && io_rw_should_reissue(req)) { req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; } else { req_set_fail(req); @@ -655,13 +658,17 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, if (ret >= 0 && req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; if (ret >= 0 && !(req->ctx->flags & IORING_SETUP_IOPOLL)) { + u32 cflags = 0; + __io_complete_rw_common(req, ret); /* * Safe to call io_end from here as we're inline * from the submission path. */ io_req_io_end(req); - io_req_set_res(req, final_ret, io_put_kbuf(req, ret, sel->buf_list)); + if (sel) + cflags = io_put_kbuf(req, ret, sel->buf_list); + io_req_set_res(req, final_ret, cflags); io_req_rw_cleanup(req, issue_flags); return IOU_COMPLETE; } else { diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index a3f11349ce06..e22f072c7d5f 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -11,6 +11,7 @@ #include <linux/audit.h> #include <linux/security.h> #include <linux/cpuset.h> +#include <linux/sched/cputime.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> @@ -169,7 +170,38 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd) return READ_ONCE(sqd->state); } -static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) +struct io_sq_time { + bool started; + u64 usec; +}; + +u64 io_sq_cpu_usec(struct task_struct *tsk) +{ + u64 utime, stime; + + task_cputime_adjusted(tsk, &utime, &stime); + do_div(stime, 1000); + return stime; +} + +static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist) +{ + if (!ist->started) + return; + ist->started = false; + sqd->work_time += io_sq_cpu_usec(current) - ist->usec; +} + +static void io_sq_start_worktime(struct io_sq_time *ist) +{ + if (ist->started) + return; + ist->started = true; + ist->usec = io_sq_cpu_usec(current); +} + +static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd, + bool cap_entries, struct io_sq_time *ist) { unsigned int to_submit; int ret = 0; @@ -182,6 +214,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (to_submit || !wq_list_empty(&ctx->iopoll_list)) { const struct cred *creds = NULL; + io_sq_start_worktime(ist); + if (ctx->sq_creds != current_cred()) creds = override_creds(ctx->sq_creds); @@ -255,23 +289,11 @@ static bool io_sq_tw_pending(struct llist_node *retry_list) return retry_list || !llist_empty(&tctx->task_list); } -static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start) -{ - struct rusage end; - - getrusage(current, RUSAGE_SELF, &end); - end.ru_stime.tv_sec -= start->ru_stime.tv_sec; - end.ru_stime.tv_usec -= start->ru_stime.tv_usec; - - sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000; -} - static int io_sq_thread(void *data) { struct llist_node *retry_list = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; - struct rusage start; unsigned long timeout = 0; char buf[TASK_COMM_LEN] = {}; DEFINE_WAIT(wait); @@ -309,6 +331,7 @@ static int io_sq_thread(void *data) mutex_lock(&sqd->lock); while (1) { bool cap_entries, sqt_spin = false; + struct io_sq_time ist = { }; if (io_sqd_events_pending(sqd) || signal_pending(current)) { if (io_sqd_handle_event(sqd)) @@ -317,9 +340,8 @@ static int io_sq_thread(void *data) } cap_entries = !list_is_singular(&sqd->ctx_list); - getrusage(current, RUSAGE_SELF, &start); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { - int ret = __io_sq_thread(ctx, cap_entries); + int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist); if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) sqt_spin = true; @@ -327,15 +349,18 @@ static int io_sq_thread(void *data) if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) sqt_spin = true; - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) - if (io_napi(ctx)) + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + if (io_napi(ctx)) { + io_sq_start_worktime(&ist); io_napi_sqpoll_busy_poll(ctx); + } + } + + io_sq_update_worktime(sqd, &ist); if (sqt_spin || !time_after(jiffies, timeout)) { - if (sqt_spin) { - io_sq_update_worktime(sqd, &start); + if (sqt_spin) timeout = jiffies + sqd->sq_thread_idle; - } if (unlikely(need_resched())) { mutex_unlock(&sqd->lock); cond_resched(); diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index b83dcdec9765..fd2f6f29b516 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); +u64 io_sq_cpu_usec(struct task_struct *tsk); static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) { diff --git a/io_uring/waitid.c b/io_uring/waitid.c index f25110fb1b12..53532ae6256c 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -250,7 +250,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; iwa = io_uring_alloc_async_data(NULL, req); - if (!unlikely(iwa)) + if (unlikely(!iwa)) return -ENOMEM; iwa->req = req; diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index a816f5902091..b1b723222cdb 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -928,74 +928,6 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = { .uninstall = io_pp_uninstall, }; -#define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16) -#define IO_ZCRX_SYS_REFILL_BATCH 32 - -static void io_return_buffers(struct io_zcrx_ifq *ifq, - struct io_uring_zcrx_rqe *rqes, unsigned nr) -{ - int i; - - for (i = 0; i < nr; i++) { - struct net_iov *niov; - netmem_ref netmem; - - if (!io_parse_rqe(&rqes[i], ifq, &niov)) - continue; - - scoped_guard(spinlock_bh, &ifq->rq_lock) { - if (!io_zcrx_put_niov_uref(niov)) - continue; - } - - netmem = net_iov_to_netmem(niov); - if (!page_pool_unref_and_test(netmem)) - continue; - io_zcrx_return_niov(niov); - } -} - -int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg) -{ - struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH]; - struct io_uring_zcrx_rqe __user *user_rqes; - struct io_uring_zcrx_sync_refill zr; - struct io_zcrx_ifq *ifq; - unsigned nr, i; - - if (nr_arg) - return -EINVAL; - if (copy_from_user(&zr, arg, sizeof(zr))) - return -EFAULT; - if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS) - return -EINVAL; - if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv))) - return -EINVAL; - - ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id); - if (!ifq) - return -EINVAL; - nr = zr.nr_entries; - user_rqes = u64_to_user_ptr(zr.rqes); - - for (i = 0; i < nr;) { - unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH); - size_t size = batch * sizeof(rqes[0]); - - if (copy_from_user(rqes, user_rqes + i, size)) - return i ? i : -EFAULT; - io_return_buffers(ifq, rqes, batch); - - i += batch; - - if (fatal_signal_pending(current)) - return i; - cond_resched(); - } - return nr; -} - static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov, struct io_zcrx_ifq *ifq, int off, int len) { diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 33ef61503092..a48871b5adad 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -63,8 +63,6 @@ struct io_zcrx_ifq { }; #if defined(CONFIG_IO_URING_ZCRX) -int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg); int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg); void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); @@ -97,11 +95,6 @@ static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ct { return NULL; } -static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg) -{ - return -EOPNOTSUPP; -} #endif int io_recvzc(struct io_kiocb *req, unsigned int issue_flags); diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 422270d64820..54e581072617 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -109,6 +109,15 @@ config KEXEC_HANDOVER to keep data or state alive across the kexec. For this to work, both source and target kernels need to have this option enabled. +config KEXEC_HANDOVER_DEBUG + bool "Enable Kexec Handover debug checks" + depends on KEXEC_HANDOVER + help + This option enables extra sanity checks for the Kexec Handover + subsystem. Since, KHO performance is crucial in live update + scenarios and the extra code might be adding overhead it is + only optionally enabled. + config CRASH_DUMP bool "kernel crash dumps" default ARCH_DEFAULT_CRASH_DUMP diff --git a/kernel/Makefile b/kernel/Makefile index df3dd8291bb6..9fe722305c9b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o +obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup/ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index c9fab9a356df..e4007fea4909 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1215,13 +1215,20 @@ static void bpf_wq_work(struct work_struct *work) rcu_read_unlock_trace(); } +static void bpf_async_cb_rcu_free(struct rcu_head *rcu) +{ + struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu); + + kfree_nolock(cb); +} + static void bpf_wq_delete_work(struct work_struct *work) { struct bpf_work *w = container_of(work, struct bpf_work, delete_work); cancel_work_sync(&w->work); - kfree_rcu(w, cb.rcu); + call_rcu(&w->cb.rcu, bpf_async_cb_rcu_free); } static void bpf_timer_delete_work(struct work_struct *work) @@ -1230,13 +1237,13 @@ static void bpf_timer_delete_work(struct work_struct *work) /* Cancel the timer and wait for callback to complete if it was running. * If hrtimer_cancel() can be safely called it's safe to call - * kfree_rcu(t) right after for both preallocated and non-preallocated + * call_rcu() right after for both preallocated and non-preallocated * maps. The async->cb = NULL was already done and no code path can see * address 't' anymore. Timer if armed for existing bpf_hrtimer before * bpf_timer_cancel_and_free will have been cancelled. */ hrtimer_cancel(&t->timer); - kfree_rcu(t, cb.rcu); + call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); } static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, @@ -1270,11 +1277,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u goto out; } - /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until - * kmalloc_nolock() is available, avoid locking issues by using - * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM). - */ - cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); + cb = bpf_map_kmalloc_nolock(map, size, 0, map->numa_node); if (!cb) { ret = -ENOMEM; goto out; @@ -1315,7 +1318,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u * or pinned in bpffs. */ WRITE_ONCE(async->cb, NULL); - kfree(cb); + kfree_nolock(cb); ret = -EPERM; } out: @@ -1580,7 +1583,7 @@ void bpf_timer_cancel_and_free(void *val) * timer _before_ calling us, such that failing to cancel it here will * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. * Therefore, we _need_ to cancel any outstanding timers before we do - * kfree_rcu, even though no more timers can be armed. + * call_rcu, even though no more timers can be armed. * * Moreover, we need to schedule work even if timer does not belong to * the calling callback_fn, as on two different CPUs, we can end up in a @@ -1607,7 +1610,7 @@ void bpf_timer_cancel_and_free(void *val) * completion. */ if (hrtimer_try_to_cancel(&t->timer) >= 0) - kfree_rcu(t, cb.rcu); + call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); else queue_work(system_dfl_wq, &t->cb.delete_work); } else { @@ -4166,7 +4169,8 @@ release_prog: } /** - * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode + * bpf_task_work_schedule_signal_impl - Schedule BPF callback using task_work_add with TWA_SIGNAL + * mode * @task: Task struct for which callback should be scheduled * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping * @map__map: bpf_map that embeds struct bpf_task_work in the values @@ -4175,15 +4179,17 @@ release_prog: * * Return: 0 if task work has been scheduled successfully, negative error code otherwise */ -__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw, - void *map__map, bpf_task_work_callback_t callback, - void *aux__prog) +__bpf_kfunc int bpf_task_work_schedule_signal_impl(struct task_struct *task, + struct bpf_task_work *tw, void *map__map, + bpf_task_work_callback_t callback, + void *aux__prog) { return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL); } /** - * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode + * bpf_task_work_schedule_resume_impl - Schedule BPF callback using task_work_add with TWA_RESUME + * mode * @task: Task struct for which callback should be scheduled * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping * @map__map: bpf_map that embeds struct bpf_task_work in the values @@ -4192,9 +4198,10 @@ __bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct b * * Return: 0 if task work has been scheduled successfully, negative error code otherwise */ -__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw, - void *map__map, bpf_task_work_callback_t callback, - void *aux__prog) +__bpf_kfunc int bpf_task_work_schedule_resume_impl(struct task_struct *task, + struct bpf_task_work *tw, void *map__map, + bpf_task_work_callback_t callback, + void *aux__prog) { return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME); } @@ -4342,6 +4349,7 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_local_irq_save) BTF_ID_FLAGS(func, bpf_local_irq_restore) +#ifdef CONFIG_BPF_EVENTS BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr) BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr) BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr) @@ -4350,6 +4358,7 @@ BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS) +#endif #ifdef CONFIG_DMA_SHARED_BUFFER BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) @@ -4371,9 +4380,9 @@ BTF_ID_FLAGS(func, bpf_strnstr); #if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS) BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) #endif -BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_stream_vprintk_impl, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_work_schedule_signal_impl, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_work_schedule_resume_impl, KF_TRUSTED_ARGS) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 3c611aba7f52..1e6538f59a78 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c @@ -195,8 +195,10 @@ static struct func_instance *__lookup_instance(struct bpf_verifier_env *env, return ERR_PTR(-ENOMEM); result->must_write_set = kvcalloc(subprog_sz, sizeof(*result->must_write_set), GFP_KERNEL_ACCOUNT); - if (!result->must_write_set) + if (!result->must_write_set) { + kvfree(result); return ERR_PTR(-ENOMEM); + } memcpy(&result->callchain, callchain, sizeof(*callchain)); result->insn_cnt = subprog_sz; hash_add(liveness->func_instances, &result->hl_node, key); diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 719d73299397..d706c4b7f532 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -216,6 +216,8 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) static void bpf_ringbuf_free(struct bpf_ringbuf *rb) { + irq_work_sync(&rb->work); + /* copy pages pointer and nr_pages to local variable, as we are going * to unmap rb itself with vunmap() below */ diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c index eb6c5a21c2ef..ff16c631951b 100644 --- a/kernel/bpf/stream.c +++ b/kernel/bpf/stream.c @@ -355,7 +355,8 @@ __bpf_kfunc_start_defs(); * Avoid using enum bpf_stream_id so that kfunc users don't have to pull in the * enum in headers. */ -__bpf_kfunc int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, u32 len__sz, void *aux__prog) +__bpf_kfunc int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args, + u32 len__sz, void *aux__prog) { struct bpf_bprintf_data data = { .get_bin_args = true, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2a9456a3e730..8a129746bd6c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -520,6 +520,21 @@ void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, return ptr; } +void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, + int node) +{ + struct mem_cgroup *memcg, *old_memcg; + void *ptr; + + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); + ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); + set_active_memcg(old_memcg); + mem_cgroup_put(memcg); + + return ptr; +} + void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) { struct mem_cgroup *memcg, *old_memcg; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5949095e51c3..f2cb0b097093 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -479,11 +479,6 @@ again: * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the * trampoline again, and retry register. */ - /* reset fops->func and fops->trampoline for re-register */ - tr->fops->func = NULL; - tr->fops->trampoline = 0; - - /* free im memory and reallocate later */ bpf_tramp_image_free(im); goto again; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ff40e5e65c43..fbe4bb91c564 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8866,7 +8866,7 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env, struct bpf_verifier_state *cur) { struct bpf_func_state *fold, *fcur; - int i, fr; + int i, fr, num_slots; reset_idmap_scratch(env); for (fr = old->curframe; fr >= 0; fr--) { @@ -8879,7 +8879,9 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env, &fcur->regs[i], &env->idmap_scratch); - for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) { + num_slots = min(fold->allocated_stack / BPF_REG_SIZE, + fcur->allocated_stack / BPF_REG_SIZE); + for (i = 0; i < num_slots; i++) { if (!is_spilled_reg(&fold->stack[i]) || !is_spilled_reg(&fcur->stack[i])) continue; @@ -12259,8 +12261,8 @@ enum special_kfunc_type { KF_bpf_res_spin_lock_irqsave, KF_bpf_res_spin_unlock_irqrestore, KF___bpf_trap, - KF_bpf_task_work_schedule_signal, - KF_bpf_task_work_schedule_resume, + KF_bpf_task_work_schedule_signal_impl, + KF_bpf_task_work_schedule_resume_impl, }; BTF_ID_LIST(special_kfunc_list) @@ -12331,13 +12333,13 @@ BTF_ID(func, bpf_res_spin_unlock) BTF_ID(func, bpf_res_spin_lock_irqsave) BTF_ID(func, bpf_res_spin_unlock_irqrestore) BTF_ID(func, __bpf_trap) -BTF_ID(func, bpf_task_work_schedule_signal) -BTF_ID(func, bpf_task_work_schedule_resume) +BTF_ID(func, bpf_task_work_schedule_signal_impl) +BTF_ID(func, bpf_task_work_schedule_resume_impl) static bool is_task_work_add_kfunc(u32 func_id) { - return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] || - func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume]; + return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal_impl] || + func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume_impl]; } static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6ae5f48cf64e..fdee387f0d6b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5892,7 +5892,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, * if the parent has to be frozen, the child has too. */ cgrp->freezer.e_freeze = parent->freezer.e_freeze; - seqcount_init(&cgrp->freezer.freeze_seq); + seqcount_spinlock_init(&cgrp->freezer.freeze_seq, &css_set_lock); if (cgrp->freezer.e_freeze) { /* * Set the CGRP_FREEZE flag, so when a process will be diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 3b1c43382eec..99dac1aa972a 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -373,7 +373,7 @@ static int __crash_shrink_memory(struct resource *old_res, old_res->start = 0; old_res->end = 0; } else { - crashk_res.end = ram_res->start - 1; + old_res->end = ram_res->start - 1; } crash_free_reserved_phys_range(ram_res->start, ram_res->end); diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 1e5c64cb6a42..138ede653de4 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -23,6 +23,7 @@ #include <linux/ctype.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/swiotlb.h> #include <asm/sections.h> #include "debug.h" @@ -594,7 +595,9 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) if (rc == -ENOMEM) { pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; - } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && + is_swiotlb_active(entry->dev))) { err_printk(entry->dev, entry, "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } diff --git a/kernel/events/core.c b/kernel/events/core.c index 7541f6f85fcb..1fd347da9026 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9403,7 +9403,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) flags |= MAP_HUGETLB; if (file) { - struct inode *inode; + const struct inode *inode; dev_t dev; buf = kmalloc(PATH_MAX, GFP_KERNEL); @@ -9416,12 +9416,12 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ - name = file_path(file, buf, PATH_MAX - sizeof(u64)); + name = d_path(file_user_path(file), buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { name = "//toolong"; goto cpy_name; } - inode = file_inode(vma->vm_file); + inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; gen = inode->i_generation; @@ -9492,7 +9492,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, if (!filter->path.dentry) return false; - if (d_inode(filter->path.dentry) != file_inode(file)) + if (d_inode(filter->path.dentry) != file_user_inode(file)) return false; if (filter->offset > offset + size) @@ -11773,7 +11773,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) event = container_of(hrtimer, struct perf_event, hw.hrtimer); - if (event->state != PERF_EVENT_STATE_ACTIVE) + if (event->state != PERF_EVENT_STATE_ACTIVE || + event->hw.state & PERF_HES_STOPPED) return HRTIMER_NORESTART; event->pmu->read(event); @@ -11819,15 +11820,20 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; /* - * The throttle can be triggered in the hrtimer handler. - * The HRTIMER_NORESTART should be used to stop the timer, - * rather than hrtimer_cancel(). See perf_swevent_hrtimer() + * Careful: this function can be triggered in the hrtimer handler, + * for cpu-clock events, so hrtimer_cancel() would cause a + * deadlock. + * + * So use hrtimer_try_to_cancel() to try to stop the hrtimer, + * and the cpu-clock handler also sets the PERF_HES_STOPPED flag, + * which guarantees that perf_swevent_hrtimer() will stop the + * hrtimer once it sees the PERF_HES_STOPPED flag. */ if (is_sampling_event(event) && (hwc->interrupts != MAX_INTERRUPTS)) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); - hrtimer_cancel(&hwc->hrtimer); + hrtimer_try_to_cancel(&hwc->hrtimer); } } @@ -11871,12 +11877,14 @@ static void cpu_clock_event_update(struct perf_event *event) static void cpu_clock_event_start(struct perf_event *event, int flags) { + event->hw.state = 0; local64_set(&event->hw.prev_count, local_clock()); perf_swevent_start_hrtimer(event); } static void cpu_clock_event_stop(struct perf_event *event, int flags) { + event->hw.state = PERF_HES_STOPPED; perf_swevent_cancel_hrtimer(event); if (flags & PERF_EF_UPDATE) cpu_clock_event_update(event); @@ -11950,12 +11958,14 @@ static void task_clock_event_update(struct perf_event *event, u64 now) static void task_clock_event_start(struct perf_event *event, int flags) { + event->hw.state = 0; local64_set(&event->hw.prev_count, event->ctx->time); perf_swevent_start_hrtimer(event); } static void task_clock_event_stop(struct perf_event *event, int flags) { + event->hw.state = PERF_HES_STOPPED; perf_swevent_cancel_hrtimer(event); if (flags & PERF_EF_UPDATE) task_clock_event_update(event, event->ctx->time); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8709c69118b5..f11ceb8be8c4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2765,6 +2765,9 @@ static void handle_swbp(struct pt_regs *regs) handler_chain(uprobe, regs); + /* Try to optimize after first hit. */ + arch_uprobe_optimize(&uprobe->arch, bp_vaddr); + /* * If user decided to take execution elsewhere, it makes little sense * to execute the original instruction, so let's skip it. @@ -2772,9 +2775,6 @@ static void handle_swbp(struct pt_regs *regs) if (instruction_pointer(regs) != bp_vaddr) goto out; - /* Try to optimize after first hit. */ - arch_uprobe_optimize(&uprobe->arch, bp_vaddr); - if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 125804fbb5cb..2e77a6e5c865 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1680,10 +1680,10 @@ static bool futex_ref_get(struct futex_private_hash *fph) { struct mm_struct *mm = fph->mm; - guard(rcu)(); + guard(preempt)(); - if (smp_load_acquire(&fph->state) == FR_PERCPU) { - this_cpu_inc(*mm->futex_ref); + if (READ_ONCE(fph->state) == FR_PERCPU) { + __this_cpu_inc(*mm->futex_ref); return true; } @@ -1694,10 +1694,10 @@ static bool futex_ref_put(struct futex_private_hash *fph) { struct mm_struct *mm = fph->mm; - guard(rcu)(); + guard(preempt)(); - if (smp_load_acquire(&fph->state) == FR_PERCPU) { - this_cpu_dec(*mm->futex_ref); + if (READ_ONCE(fph->state) == FR_PERCPU) { + __this_cpu_dec(*mm->futex_ref); return false; } diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index a08cc076f332..ffde93d051a4 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,9 @@ #include <linux/mm.h> #include "gcov.h" -#if (__GNUC__ >= 14) +#if (__GNUC__ >= 15) +#define GCOV_COUNTERS 10 +#elif (__GNUC__ >= 14) #define GCOV_COUNTERS 9 #elif (__GNUC__ >= 10) #define GCOV_COUNTERS 8 diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3ffa0d80ddd1..d1917b28761a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1030,7 +1030,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, const char *name) { - scoped_irqdesc_get_and_lock(irq, 0) + scoped_irqdesc_get_and_buslock(irq, 0) __irq_do_set_handler(scoped_irqdesc, handle, is_chained, name); } EXPORT_SYMBOL_GPL(__irq_set_handler); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c94837382037..400856abf672 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -659,7 +659,7 @@ void __disable_irq(struct irq_desc *desc) static int __disable_irq_nosync(unsigned int irq) { - scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { __disable_irq(scoped_irqdesc); return 0; } @@ -789,7 +789,7 @@ void __enable_irq(struct irq_desc *desc) */ void enable_irq(unsigned int irq) { - scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { struct irq_desc *desc = scoped_irqdesc; if (WARN(!desc->irq_data.chip, "enable_irq before setup/request_irq: irq %u\n", irq)) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 76f0940fb485..03d12e27189f 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "KHO: " fmt +#include <linux/cleanup.h> #include <linux/cma.h> #include <linux/count_zeros.h> #include <linux/debugfs.h> @@ -22,6 +23,7 @@ #include <asm/early_ioremap.h> +#include "kexec_handover_internal.h" /* * KHO is tightly coupled with mm init and needs access to some of mm * internal APIs. @@ -67,10 +69,10 @@ early_param("kho", kho_parse_enable); * Keep track of memory that is to be preserved across KHO. * * The serializing side uses two levels of xarrays to manage chunks of per-order - * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a - * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations - * each bitmap will cover 16M of address space. Thus, for 16G of memory at most - * 512K of bitmap memory will be needed for order 0. + * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order + * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0 + * allocations each bitmap will cover 128M of address space. Thus, for 16G of + * memory at most 512K of bitmap memory will be needed for order 0. * * This approach is fully incremental, as the serialization progresses folios * can continue be aggregated to the tracker. The final step, immediately prior @@ -78,12 +80,14 @@ early_param("kho", kho_parse_enable); * successor kernel to parse. */ -#define PRESERVE_BITS (512 * 8) +#define PRESERVE_BITS (PAGE_SIZE * 8) struct kho_mem_phys_bits { DECLARE_BITMAP(preserve, PRESERVE_BITS); }; +static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE); + struct kho_mem_phys { /* * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized @@ -131,28 +135,28 @@ static struct kho_out kho_out = { .finalized = false, }; -static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz) +static void *xa_load_or_alloc(struct xarray *xa, unsigned long index) { - void *elm, *res; + void *res = xa_load(xa, index); + + if (res) + return res; - elm = xa_load(xa, index); - if (elm) - return elm; + void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL); - elm = kzalloc(sz, GFP_KERNEL); if (!elm) return ERR_PTR(-ENOMEM); + if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE))) + return ERR_PTR(-EINVAL); + res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL); if (xa_is_err(res)) - res = ERR_PTR(xa_err(res)); - - if (res) { - kfree(elm); + return ERR_PTR(xa_err(res)); + else if (res) return res; - } - return elm; + return no_free_ptr(elm); } static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn, @@ -167,12 +171,12 @@ static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn, const unsigned long pfn_high = pfn >> order; physxa = xa_load(&track->orders, order); - if (!physxa) - continue; + if (WARN_ON_ONCE(!physxa)) + return; bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS); - if (!bits) - continue; + if (WARN_ON_ONCE(!bits)) + return; clear_bit(pfn_high % PRESERVE_BITS, bits->preserve); @@ -216,8 +220,7 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, } } - bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS, - sizeof(*bits)); + bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS); if (IS_ERR(bits)) return PTR_ERR(bits); @@ -345,15 +348,19 @@ static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE); static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk, unsigned long order) { - struct khoser_mem_chunk *chunk; + struct khoser_mem_chunk *chunk __free(free_page) = NULL; - chunk = kzalloc(PAGE_SIZE, GFP_KERNEL); + chunk = (void *)get_zeroed_page(GFP_KERNEL); if (!chunk) - return NULL; + return ERR_PTR(-ENOMEM); + + if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE))) + return ERR_PTR(-EINVAL); + chunk->hdr.order = order; if (cur_chunk) KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk); - return chunk; + return no_free_ptr(chunk); } static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk) @@ -374,14 +381,17 @@ static int kho_mem_serialize(struct kho_serialization *ser) struct khoser_mem_chunk *chunk = NULL; struct kho_mem_phys *physxa; unsigned long order; + int err = -ENOMEM; xa_for_each(&ser->track.orders, order, physxa) { struct kho_mem_phys_bits *bits; unsigned long phys; chunk = new_chunk(chunk, order); - if (!chunk) + if (IS_ERR(chunk)) { + err = PTR_ERR(chunk); goto err_free; + } if (!first_chunk) first_chunk = chunk; @@ -391,8 +401,10 @@ static int kho_mem_serialize(struct kho_serialization *ser) if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) { chunk = new_chunk(chunk, order); - if (!chunk) + if (IS_ERR(chunk)) { + err = PTR_ERR(chunk); goto err_free; + } } elm = &chunk->bitmaps[chunk->hdr.num_elms]; @@ -409,7 +421,7 @@ static int kho_mem_serialize(struct kho_serialization *ser) err_free: kho_mem_ser_free(first_chunk); - return -ENOMEM; + return err; } static void __init deserialize_bitmap(unsigned int order, @@ -465,8 +477,8 @@ static void __init kho_mem_deserialize(const void *fdt) * area for early allocations that happen before page allocator is * initialized. */ -static struct kho_scratch *kho_scratch; -static unsigned int kho_scratch_cnt; +struct kho_scratch *kho_scratch; +unsigned int kho_scratch_cnt; /* * The scratch areas are scaled by default as percent of memory allocated from @@ -752,6 +764,9 @@ int kho_preserve_folio(struct folio *folio) const unsigned int order = folio_order(folio); struct kho_mem_track *track = &kho_out.ser.track; + if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order))) + return -EINVAL; + return __kho_preserve_order(track, pfn, order); } EXPORT_SYMBOL_GPL(kho_preserve_folio); @@ -775,6 +790,11 @@ int kho_preserve_pages(struct page *page, unsigned int nr_pages) unsigned long failed_pfn = 0; int err = 0; + if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT, + nr_pages << PAGE_SHIFT))) { + return -EINVAL; + } + while (pfn < end_pfn) { const unsigned int order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); @@ -862,16 +882,17 @@ err_free: return NULL; } -static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) +static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk, + unsigned short order) { struct kho_mem_track *track = &kho_out.ser.track; unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); __kho_unpreserve(track, pfn, pfn + 1); - for (int i = 0; chunk->phys[i]; i++) { + for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { pfn = PHYS_PFN(chunk->phys[i]); - __kho_unpreserve(track, pfn, pfn + 1); + __kho_unpreserve(track, pfn, pfn + (1 << order)); } } @@ -882,7 +903,7 @@ static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc) while (chunk) { struct kho_vmalloc_chunk *tmp = chunk; - kho_vmalloc_unpreserve_chunk(chunk); + kho_vmalloc_unpreserve_chunk(chunk, kho_vmalloc->order); chunk = KHOSER_LOAD_PTR(chunk->hdr.next); free_page((unsigned long)tmp); @@ -992,7 +1013,7 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) while (chunk) { struct page *page; - for (int i = 0; chunk->phys[i]; i++) { + for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { phys_addr_t phys = chunk->phys[i]; if (idx + contig_pages > total_pages) diff --git a/kernel/kexec_handover_debug.c b/kernel/kexec_handover_debug.c new file mode 100644 index 000000000000..6efb696f5426 --- /dev/null +++ b/kernel/kexec_handover_debug.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * kexec_handover_debug.c - kexec handover optional debug functionality + * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +#define pr_fmt(fmt) "KHO: " fmt + +#include "kexec_handover_internal.h" + +bool kho_scratch_overlap(phys_addr_t phys, size_t size) +{ + phys_addr_t scratch_start, scratch_end; + unsigned int i; + + for (i = 0; i < kho_scratch_cnt; i++) { + scratch_start = kho_scratch[i].addr; + scratch_end = kho_scratch[i].addr + kho_scratch[i].size; + + if (phys < scratch_end && (phys + size) > scratch_start) + return true; + } + + return false; +} diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h new file mode 100644 index 000000000000..3c3c7148ceed --- /dev/null +++ b/kernel/kexec_handover_internal.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H +#define LINUX_KEXEC_HANDOVER_INTERNAL_H + +#include <linux/kexec_handover.h> +#include <linux/types.h> + +extern struct kho_scratch *kho_scratch; +extern unsigned int kho_scratch_cnt; + +#ifdef CONFIG_KEXEC_HANDOVER_DEBUG +bool kho_scratch_overlap(phys_addr_t phys, size_t size); +#else +static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size) +{ + return false; +} +#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */ + +#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */ diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1f250ce036a0..26e45f86b955 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -706,7 +706,6 @@ static void power_down(void) #ifdef CONFIG_SUSPEND if (hibernation_mode == HIBERNATION_SUSPEND) { - pm_restore_gfp_mask(); error = suspend_devices_and_enter(mem_sleep_current); if (!error) goto exit; @@ -746,9 +745,6 @@ static void power_down(void) cpu_relax(); exit: - /* Match the pm_restore_gfp_mask() call in hibernate(). */ - pm_restrict_gfp_mask(); - /* Restore swap signature. */ error = swsusp_unmark(); if (error) diff --git a/kernel/power/main.c b/kernel/power/main.c index 3cf2d7e72567..549f51ca3a1e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -31,23 +31,35 @@ * held, unless the suspend/hibernate code is guaranteed not to run in parallel * with that modification). */ +static unsigned int saved_gfp_count; static gfp_t saved_gfp_mask; void pm_restore_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); - if (saved_gfp_mask) { - gfp_allowed_mask = saved_gfp_mask; - saved_gfp_mask = 0; - } + + if (WARN_ON(!saved_gfp_count) || --saved_gfp_count) + return; + + gfp_allowed_mask = saved_gfp_mask; + saved_gfp_mask = 0; + + pm_pr_dbg("GFP mask restored\n"); } void pm_restrict_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); - WARN_ON(saved_gfp_mask); + + if (saved_gfp_count++) { + WARN_ON((saved_gfp_mask & ~(__GFP_IO | __GFP_FS)) != gfp_allowed_mask); + return; + } + saved_gfp_mask = gfp_allowed_mask; gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS); + + pm_pr_dbg("GFP mask restricted\n"); } unsigned int lock_system_sleep(void) diff --git a/kernel/power/process.c b/kernel/power/process.c index 8ff68ebaa1e0..dc0dfc349f22 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -132,6 +132,7 @@ int freeze_processes(void) if (!pm_freezing) static_branch_inc(&freezer_active); + pm_wakeup_clear(0); pm_freezing = true; error = try_to_freeze_tasks(true); if (!error) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c933a63a9718..3d4ebedad69f 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -594,7 +594,6 @@ static int enter_state(suspend_state_t state) } pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); - pm_wakeup_clear(0); pm_suspend_clear_flags(); error = suspend_prepare(state); if (error) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 0beff7eeaaba..70ae21f7370d 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -635,7 +635,7 @@ struct cmp_data { }; /* Indicates the image size after compression */ -static atomic_t compressed_size = ATOMIC_INIT(0); +static atomic64_t compressed_size = ATOMIC_INIT(0); /* * Compression function that runs in its own thread. @@ -664,7 +664,7 @@ static int compress_threadfn(void *data) d->ret = crypto_acomp_compress(d->cr); d->cmp_len = d->cr->dlen; - atomic_set(&compressed_size, atomic_read(&compressed_size) + d->cmp_len); + atomic64_add(d->cmp_len, &compressed_size); atomic_set_release(&d->stop, 1); wake_up(&d->done); } @@ -689,14 +689,14 @@ static int save_compressed_image(struct swap_map_handle *handle, ktime_t start; ktime_t stop; size_t off; - unsigned thr, run_threads, nr_threads; + unsigned int thr, run_threads, nr_threads; unsigned char *page = NULL; struct cmp_data *data = NULL; struct crc_data *crc = NULL; hib_init_batch(&hb); - atomic_set(&compressed_size, 0); + atomic64_set(&compressed_size, 0); /* * We'll limit the number of threads for compression to limit memory @@ -877,11 +877,14 @@ out_finish: stop = ktime_get(); if (!ret) ret = err2; - if (!ret) + if (!ret) { + swsusp_show_speed(start, stop, nr_to_write, "Wrote"); + pr_info("Image size after compression: %lld kbytes\n", + (atomic64_read(&compressed_size) / 1024)); pr_info("Image saving done\n"); - swsusp_show_speed(start, stop, nr_to_write, "Wrote"); - pr_info("Image size after compression: %d kbytes\n", - (atomic_read(&compressed_size) / 1024)); + } else { + pr_err("Image saving failed: %d\n", ret); + } out_clean: hib_finish_batch(&hb); @@ -899,7 +902,8 @@ out_clean: } vfree(data); } - if (page) free_page((unsigned long)page); + if (page) + free_page((unsigned long)page); return ret; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 198d2dd45f59..f754a60de848 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8571,10 +8571,12 @@ int sched_cpu_dying(unsigned int cpu) sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { WARN(true, "Dying CPU not properly vacated!"); dump_rq_tasks(rq, KERN_WARNING); } + dl_server_stop(&rq->fair_server); rq_unlock_irqrestore(rq, &rf); calc_load_migrate(rq); @@ -9604,7 +9606,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, guard(rq_lock_irq)(rq); cfs_rq->runtime_enabled = runtime_enabled; - cfs_rq->runtime_remaining = 0; + cfs_rq->runtime_remaining = 1; if (cfs_rq->throttled) unthrottle_cfs_rq(cfs_rq); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 615411a0a881..7b7671060bf9 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1582,6 +1582,9 @@ void dl_server_start(struct sched_dl_entity *dl_se) if (!dl_server(dl_se) || dl_se->dl_server_active) return; + if (WARN_ON_ONCE(!cpu_online(cpu_of(rq)))) + return; + dl_se->dl_server_active = 1; enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2b0e88206d07..7aae1d0ce37e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -25,7 +25,7 @@ static struct scx_sched __rcu *scx_root; * guarantee system safety. Maintain a dedicated task list which contains every * task between its fork and eventual free. */ -static DEFINE_SPINLOCK(scx_tasks_lock); +static DEFINE_RAW_SPINLOCK(scx_tasks_lock); static LIST_HEAD(scx_tasks); /* ops enable/disable */ @@ -67,8 +67,19 @@ static unsigned long scx_watchdog_timestamp = INITIAL_JIFFIES; static struct delayed_work scx_watchdog_work; -/* for %SCX_KICK_WAIT */ -static unsigned long __percpu *scx_kick_cpus_pnt_seqs; +/* + * For %SCX_KICK_WAIT: Each CPU has a pointer to an array of pick_task sequence + * numbers. The arrays are allocated with kvzalloc() as size can exceed percpu + * allocator limits on large machines. O(nr_cpu_ids^2) allocation, allocated + * lazily when enabling and freed when disabling to avoid waste when sched_ext + * isn't active. + */ +struct scx_kick_pseqs { + struct rcu_head rcu; + unsigned long seqs[]; +}; + +static DEFINE_PER_CPU(struct scx_kick_pseqs __rcu *, scx_kick_pseqs); /* * Direct dispatch marker. @@ -465,7 +476,7 @@ static void scx_task_iter_start(struct scx_task_iter *iter) BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS & ((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1)); - spin_lock_irq(&scx_tasks_lock); + raw_spin_lock_irq(&scx_tasks_lock); iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR }; list_add(&iter->cursor.tasks_node, &scx_tasks); @@ -496,14 +507,14 @@ static void scx_task_iter_unlock(struct scx_task_iter *iter) __scx_task_iter_rq_unlock(iter); if (iter->list_locked) { iter->list_locked = false; - spin_unlock_irq(&scx_tasks_lock); + raw_spin_unlock_irq(&scx_tasks_lock); } } static void __scx_task_iter_maybe_relock(struct scx_task_iter *iter) { if (!iter->list_locked) { - spin_lock_irq(&scx_tasks_lock); + raw_spin_lock_irq(&scx_tasks_lock); iter->list_locked = true; } } @@ -780,13 +791,23 @@ static void schedule_deferred(struct rq *rq) if (rq->scx.flags & SCX_RQ_IN_WAKEUP) return; + /* Don't do anything if there already is a deferred operation. */ + if (rq->scx.flags & SCX_RQ_BAL_CB_PENDING) + return; + /* * If in balance, the balance callbacks will be called before rq lock is * released. Schedule one. + * + * + * We can't directly insert the callback into the + * rq's list: The call can drop its lock and make the pending balance + * callback visible to unrelated code paths that call rq_pin_lock(). + * + * Just let balance_one() know that it must do it itself. */ if (rq->scx.flags & SCX_RQ_IN_BALANCE) { - queue_balance_callback(rq, &rq->scx.deferred_bal_cb, - deferred_bal_cb_workfn); + rq->scx.flags |= SCX_RQ_BAL_CB_PENDING; return; } @@ -2003,6 +2024,19 @@ static void flush_dispatch_buf(struct scx_sched *sch, struct rq *rq) dspc->cursor = 0; } +static inline void maybe_queue_balance_callback(struct rq *rq) +{ + lockdep_assert_rq_held(rq); + + if (!(rq->scx.flags & SCX_RQ_BAL_CB_PENDING)) + return; + + queue_balance_callback(rq, &rq->scx.deferred_bal_cb, + deferred_bal_cb_workfn); + + rq->scx.flags &= ~SCX_RQ_BAL_CB_PENDING; +} + static int balance_one(struct rq *rq, struct task_struct *prev) { struct scx_sched *sch = scx_root; @@ -2150,6 +2184,8 @@ static int balance_scx(struct rq *rq, struct task_struct *prev, #endif rq_repin_lock(rq, rf); + maybe_queue_balance_callback(rq); + return ret; } @@ -2904,9 +2940,9 @@ void scx_post_fork(struct task_struct *p) } } - spin_lock_irq(&scx_tasks_lock); + raw_spin_lock_irq(&scx_tasks_lock); list_add_tail(&p->scx.tasks_node, &scx_tasks); - spin_unlock_irq(&scx_tasks_lock); + raw_spin_unlock_irq(&scx_tasks_lock); percpu_up_read(&scx_fork_rwsem); } @@ -2930,9 +2966,9 @@ void sched_ext_free(struct task_struct *p) { unsigned long flags; - spin_lock_irqsave(&scx_tasks_lock, flags); + raw_spin_lock_irqsave(&scx_tasks_lock, flags); list_del_init(&p->scx.tasks_node); - spin_unlock_irqrestore(&scx_tasks_lock, flags); + raw_spin_unlock_irqrestore(&scx_tasks_lock, flags); /* * @p is off scx_tasks and wholly ours. scx_enable()'s READY -> ENABLED @@ -3471,7 +3507,9 @@ static void scx_sched_free_rcu_work(struct work_struct *work) struct scx_dispatch_q *dsq; int node; + irq_work_sync(&sch->error_irq_work); kthread_stop(sch->helper->task); + free_percpu(sch->pcpu); for_each_node_state(node, N_POSSIBLE) @@ -3850,6 +3888,27 @@ static const char *scx_exit_reason(enum scx_exit_kind kind) } } +static void free_kick_pseqs_rcu(struct rcu_head *rcu) +{ + struct scx_kick_pseqs *pseqs = container_of(rcu, struct scx_kick_pseqs, rcu); + + kvfree(pseqs); +} + +static void free_kick_pseqs(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct scx_kick_pseqs **pseqs = per_cpu_ptr(&scx_kick_pseqs, cpu); + struct scx_kick_pseqs *to_free; + + to_free = rcu_replace_pointer(*pseqs, NULL, true); + if (to_free) + call_rcu(&to_free->rcu, free_kick_pseqs_rcu); + } +} + static void scx_disable_workfn(struct kthread_work *work) { struct scx_sched *sch = container_of(work, struct scx_sched, disable_work); @@ -3986,6 +4045,7 @@ static void scx_disable_workfn(struct kthread_work *work) free_percpu(scx_dsp_ctx); scx_dsp_ctx = NULL; scx_dsp_max_batch = 0; + free_kick_pseqs(); mutex_unlock(&scx_enable_mutex); @@ -4216,7 +4276,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) size_t avail, used; bool idle; - rq_lock(rq, &rf); + rq_lock_irqsave(rq, &rf); idle = list_empty(&rq->scx.runnable_list) && rq->curr->sched_class == &idle_sched_class; @@ -4285,7 +4345,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) scx_dump_task(&s, &dctx, p, ' '); next: - rq_unlock(rq, &rf); + rq_unlock_irqrestore(rq, &rf); } dump_newline(&s); @@ -4348,6 +4408,33 @@ static void scx_vexit(struct scx_sched *sch, irq_work_queue(&sch->error_irq_work); } +static int alloc_kick_pseqs(void) +{ + int cpu; + + /* + * Allocate per-CPU arrays sized by nr_cpu_ids. Use kvzalloc as size + * can exceed percpu allocator limits on large machines. + */ + for_each_possible_cpu(cpu) { + struct scx_kick_pseqs **pseqs = per_cpu_ptr(&scx_kick_pseqs, cpu); + struct scx_kick_pseqs *new_pseqs; + + WARN_ON_ONCE(rcu_access_pointer(*pseqs)); + + new_pseqs = kvzalloc_node(struct_size(new_pseqs, seqs, nr_cpu_ids), + GFP_KERNEL, cpu_to_node(cpu)); + if (!new_pseqs) { + free_kick_pseqs(); + return -ENOMEM; + } + + rcu_assign_pointer(*pseqs, new_pseqs); + } + + return 0; +} + static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops) { struct scx_sched *sch; @@ -4495,10 +4582,14 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) goto err_unlock; } + ret = alloc_kick_pseqs(); + if (ret) + goto err_unlock; + sch = scx_alloc_and_add_sched(ops); if (IS_ERR(sch)) { ret = PTR_ERR(sch); - goto err_unlock; + goto err_free_pseqs; } /* @@ -4701,6 +4792,8 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) return 0; +err_free_pseqs: + free_kick_pseqs(); err_unlock: mutex_unlock(&scx_enable_mutex); return ret; @@ -5082,10 +5175,18 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work) { struct rq *this_rq = this_rq(); struct scx_rq *this_scx = &this_rq->scx; - unsigned long *pseqs = this_cpu_ptr(scx_kick_cpus_pnt_seqs); + struct scx_kick_pseqs __rcu *pseqs_pcpu = __this_cpu_read(scx_kick_pseqs); bool should_wait = false; + unsigned long *pseqs; s32 cpu; + if (unlikely(!pseqs_pcpu)) { + pr_warn_once("kick_cpus_irq_workfn() called with NULL scx_kick_pseqs"); + return; + } + + pseqs = rcu_dereference_bh(pseqs_pcpu)->seqs; + for_each_cpu(cpu, this_scx->cpus_to_kick) { should_wait |= kick_one_cpu(cpu, this_rq, pseqs); cpumask_clear_cpu(cpu, this_scx->cpus_to_kick); @@ -5208,11 +5309,6 @@ void __init init_sched_ext_class(void) scx_idle_init_masks(); - scx_kick_cpus_pnt_seqs = - __alloc_percpu(sizeof(scx_kick_cpus_pnt_seqs[0]) * nr_cpu_ids, - __alignof__(scx_kick_cpus_pnt_seqs[0])); - BUG_ON(!scx_kick_cpus_pnt_seqs); - for_each_possible_cpu(cpu) { struct rq *rq = cpu_rq(cpu); int n = cpu_to_node(cpu); @@ -5225,8 +5321,8 @@ void __init init_sched_ext_class(void) BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n)); - init_irq_work(&rq->scx.deferred_irq_work, deferred_irq_workfn); - init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn); + rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn); + rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn); if (cpu_online(cpu)) cpu_rq(cpu)->scx.flags |= SCX_RQ_ONLINE; @@ -5688,8 +5784,8 @@ BTF_KFUNCS_START(scx_kfunc_ids_dispatch) BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots) BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel) BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_dispatch) @@ -5820,8 +5916,8 @@ __bpf_kfunc_end_defs(); BTF_KFUNCS_START(scx_kfunc_ids_unlocked) BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_unlocked) @@ -6305,7 +6401,7 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf) guard(rcu)(); - sch = rcu_dereference(sch); + sch = rcu_dereference(scx_root); if (unlikely(!sch)) return; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bc0b7ce8a65d..5b752324270b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6024,20 +6024,17 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)]; /* - * It's possible we are called with !runtime_remaining due to things - * like user changed quota setting(see tg_set_cfs_bandwidth()) or async - * unthrottled us with a positive runtime_remaining but other still - * running entities consumed those runtime before we reached here. + * It's possible we are called with runtime_remaining < 0 due to things + * like async unthrottled us with a positive runtime_remaining but other + * still running entities consumed those runtime before we reached here. * - * Anyway, we can't unthrottle this cfs_rq without any runtime remaining - * because any enqueue in tg_unthrottle_up() will immediately trigger a - * throttle, which is not supposed to happen on unthrottle path. + * We can't unthrottle this cfs_rq without any runtime remaining because + * any enqueue in tg_unthrottle_up() will immediately trigger a throttle, + * which is not supposed to happen on unthrottle path. */ if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0) return; - se = cfs_rq->tg->se[cpu_of(rq)]; - cfs_rq->throttled = 0; update_rq_clock(rq); @@ -6437,6 +6434,16 @@ static void sync_throttle(struct task_group *tg, int cpu) cfs_rq->throttle_count = pcfs_rq->throttle_count; cfs_rq->throttled_clock_pelt = rq_clock_pelt(cpu_rq(cpu)); + + /* + * It is not enough to sync the "pelt_clock_throttled" indicator + * with the parent cfs_rq when the hierarchy is not queued. + * Always join a throttled hierarchy with PELT clock throttled + * and leaf it to the first enqueue, or distribution to + * unthrottle the PELT clock. + */ + if (cfs_rq->throttle_count) + cfs_rq->pelt_clock_throttled = 1; } /* conditionally throttle active cfs_rq's from put_prev_entity() */ @@ -8920,21 +8927,21 @@ simple: return p; idle: - if (!rf) - return NULL; - - new_tasks = sched_balance_newidle(rq, rf); + if (rf) { + new_tasks = sched_balance_newidle(rq, rf); - /* - * Because sched_balance_newidle() releases (and re-acquires) rq->lock, it is - * possible for any higher priority task to appear. In that case we - * must re-start the pick_next_entity() loop. - */ - if (new_tasks < 0) - return RETRY_TASK; + /* + * Because sched_balance_newidle() releases (and re-acquires) + * rq->lock, it is possible for any higher priority task to + * appear. In that case we must re-start the pick_next_entity() + * loop. + */ + if (new_tasks < 0) + return RETRY_TASK; - if (new_tasks > 0) - goto again; + if (new_tasks > 0) + goto again; + } /* * rq is about to be idle, check if we need to update the @@ -13187,6 +13194,8 @@ static void propagate_entity_cfs_rq(struct sched_entity *se) if (!cfs_rq_pelt_clock_throttled(cfs_rq)) list_add_leaf_cfs_rq(cfs_rq); } + + assert_list_leaf_cfs_rq(rq_of(cfs_rq)); } #else /* !CONFIG_FAIR_GROUP_SCHED: */ static void propagate_entity_cfs_rq(struct sched_entity *se) { } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1f5d07067f60..adfb6e3409d7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -784,6 +784,7 @@ enum scx_rq_flags { SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */ SCX_RQ_BYPASSING = 1 << 4, SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */ + SCX_RQ_BAL_CB_PENDING = 1 << 6, /* must queue a cb after dispatching */ SCX_RQ_IN_WAKEUP = 1 << 16, SCX_RQ_IN_BALANCE = 1 << 17, @@ -3740,11 +3741,9 @@ static inline int mm_cid_get(struct rq *rq, struct task_struct *t, struct mm_struct *mm) { struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; - struct cpumask *cpumask; int cid; lockdep_assert_rq_held(rq); - cpumask = mm_cidmask(mm); cid = __this_cpu_read(pcpu_cid->cid); if (mm_cid_is_valid(cid)) { mm_cid_snapshot_time(rq, mm); diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index aa3120104a51..56e17b625c72 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -475,12 +475,6 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, if (!kc->timer_create) return -EOPNOTSUPP; - new_timer = alloc_posix_timer(); - if (unlikely(!new_timer)) - return -EAGAIN; - - spin_lock_init(&new_timer->it_lock); - /* Special case for CRIU to restore timers with a given timer ID. */ if (unlikely(current->signal->timer_create_restore_ids)) { if (copy_from_user(&req_id, created_timer_id, sizeof(req_id))) @@ -490,6 +484,12 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, return -EINVAL; } + new_timer = alloc_posix_timer(); + if (unlikely(!new_timer)) + return -EAGAIN; + + spin_lock_init(&new_timer->it_lock); + /* * Add the timer to the hash table. The timer is not yet valid * after insertion, but has a unique ID allocated. diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b6974fce800c..3a4d3b2e3f74 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -3070,7 +3070,7 @@ static int __init tk_aux_sysfs_init(void) return -ENOMEM; } - for (int i = 0; i <= MAX_AUX_CLOCKS; i++) { + for (int i = 0; i < MAX_AUX_CLOCKS; i++) { char id[2] = { [0] = '0' + i, }; struct kobject *clk = kobject_create_and_add(id, auxo); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 42bd2ba68a82..59cfacb8a5bb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1971,7 +1971,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops) */ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_hash *old_hash, - struct ftrace_hash *new_hash) + struct ftrace_hash *new_hash, + bool update_target) { struct ftrace_page *pg; struct dyn_ftrace *rec, *end = NULL; @@ -2006,10 +2007,13 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, if (rec->flags & FTRACE_FL_DISABLED) continue; - /* We need to update only differences of filter_hash */ + /* + * Unless we are updating the target of a direct function, + * we only need to update differences of filter_hash + */ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); in_new = !!ftrace_lookup_ip(new_hash, rec->ip); - if (in_old == in_new) + if (!update_target && (in_old == in_new)) continue; if (in_new) { @@ -2020,7 +2024,16 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, if (is_ipmodify) goto rollback; - FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT); + /* + * If this is called by __modify_ftrace_direct() + * then it is only changing where the direct + * pointer is jumping to, and the record already + * points to a direct trampoline. If it isn't, + * then it is a bug to update ipmodify on a direct + * caller. + */ + FTRACE_WARN_ON(!update_target && + (rec->flags & FTRACE_FL_DIRECT)); /* * Another ops with IPMODIFY is already @@ -2076,7 +2089,7 @@ static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops) if (ftrace_hash_empty(hash)) hash = NULL; - return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash); + return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash, false); } /* Disabling always succeeds */ @@ -2087,7 +2100,7 @@ static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops) if (ftrace_hash_empty(hash)) hash = NULL; - __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH); + __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH, false); } static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, @@ -2101,7 +2114,7 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, if (ftrace_hash_empty(new_hash)) new_hash = NULL; - return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); + return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash, false); } static void print_ip_ins(const char *fmt, const unsigned char *p) @@ -5953,6 +5966,17 @@ static void register_ftrace_direct_cb(struct rcu_head *rhp) free_ftrace_hash(fhp); } +static void reset_direct(struct ftrace_ops *ops, unsigned long addr) +{ + struct ftrace_hash *hash = ops->func_hash->filter_hash; + + remove_direct_functions_hash(hash, addr); + + /* cleanup for possible another register call */ + ops->func = NULL; + ops->trampoline = 0; +} + /** * register_ftrace_direct - Call a custom trampoline directly * for multiple functions registered in @ops @@ -6048,6 +6072,8 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) ops->direct_call = addr; err = register_ftrace_function_nolock(ops); + if (err) + reset_direct(ops, addr); out_unlock: mutex_unlock(&direct_mutex); @@ -6080,7 +6106,6 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct); int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, bool free_filters) { - struct ftrace_hash *hash = ops->func_hash->filter_hash; int err; if (check_direct_multi(ops)) @@ -6090,13 +6115,9 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, mutex_lock(&direct_mutex); err = unregister_ftrace_function(ops); - remove_direct_functions_hash(hash, addr); + reset_direct(ops, addr); mutex_unlock(&direct_mutex); - /* cleanup for possible another register call */ - ops->func = NULL; - ops->trampoline = 0; - if (free_filters) ftrace_free_filter(ops); return err; @@ -6106,7 +6127,7 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_direct); static int __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { - struct ftrace_hash *hash; + struct ftrace_hash *hash = ops->func_hash->filter_hash; struct ftrace_func_entry *entry, *iter; static struct ftrace_ops tmp_ops = { .func = ftrace_stub, @@ -6127,12 +6148,20 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) return err; /* + * Call __ftrace_hash_update_ipmodify() here, so that we can call + * ops->ops_func for the ops. This is needed because the above + * register_ftrace_function_nolock() worked on tmp_ops. + */ + err = __ftrace_hash_update_ipmodify(ops, hash, hash, true); + if (err) + goto out; + + /* * Now the ftrace_ops_list_func() is called to do the direct callers. * We can safely change the direct functions attached to each entry. */ mutex_lock(&ftrace_lock); - hash = ops->func_hash->filter_hash; size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(iter, &hash->buckets[i], hlist) { @@ -6147,6 +6176,7 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) mutex_unlock(&ftrace_lock); +out: /* Removing the tmp_ops will add the updated direct callers to the functions */ unregister_ftrace_function(&tmp_ops); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1244d2c5c384..afcd3747264d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7344,6 +7344,10 @@ consume: goto out; } + /* Did the reader catch up with the writer? */ + if (cpu_buffer->reader_page == cpu_buffer->commit_page) + goto out; + reader = rb_get_reader_page(cpu_buffer); if (WARN_ON(!reader)) goto out; diff --git a/kernel/trace/rv/monitors/pagefault/Kconfig b/kernel/trace/rv/monitors/pagefault/Kconfig index 5e16625f1653..0e013f00c33b 100644 --- a/kernel/trace/rv/monitors/pagefault/Kconfig +++ b/kernel/trace/rv/monitors/pagefault/Kconfig @@ -5,6 +5,7 @@ config RV_MON_PAGEFAULT select RV_LTL_MONITOR depends on RV_MON_RTAPP depends on X86 || RISCV + depends on MMU default y select LTL_MON_EVENTS_ID bool "pagefault monitor" diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 48338520376f..43e9ea473cda 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -501,7 +501,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) list_for_each_entry_continue(mon, &rv_monitors_list, list) { if (mon->enabled) - return mon; + return &mon->list; } return NULL; @@ -509,7 +509,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) { - struct rv_monitor *mon; + struct list_head *head; loff_t l; mutex_lock(&rv_interface_lock); @@ -517,15 +517,15 @@ static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) if (list_empty(&rv_monitors_list)) return NULL; - mon = list_entry(&rv_monitors_list, struct rv_monitor, list); + head = &rv_monitors_list; for (l = 0; l <= *pos; ) { - mon = enabled_monitors_next(m, mon, &l); - if (!mon) + head = enabled_monitors_next(m, head, &l); + if (!head) break; } - return mon; + return head; } /* diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1d536219b624..6bfaf1210dd2 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3272,14 +3272,16 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, var = create_var(hist_data, file, field_name, val->size, val->type); if (IS_ERR(var)) { hist_err(tr, HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); - kfree(val); + destroy_hist_field(val, 0); ret = PTR_ERR(var); goto err; } field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL); if (!field_var) { - kfree(val); + destroy_hist_field(val, 0); + kfree_const(var->type); + kfree(var->var.name); kfree(var); ret = -ENOMEM; goto err; diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index ad9d6347b5fa..8001dbf16891 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -106,13 +106,14 @@ static struct tracepoint_user *__tracepoint_user_init(const char *name, struct t if (!tuser->name) return NULL; + /* Register tracepoint if it is loaded. */ if (tpoint) { + tuser->tpoint = tpoint; ret = tracepoint_user_register(tuser); if (ret) return ERR_PTR(ret); } - tuser->tpoint = tpoint; tuser->refcount = 1; INIT_LIST_HEAD(&tuser->list); list_add(&tuser->list, &tracepoint_user_list); @@ -1513,6 +1514,10 @@ static int disable_trace_fprobe(struct trace_event_call *call, if (!trace_probe_is_enabled(tp)) { list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { unregister_fprobe(&tf->fp); + if (tf->tuser) { + tracepoint_user_put(tf->tuser); + tf->tuser = NULL; + } } } diff --git a/lib/Kconfig.kmsan b/lib/Kconfig.kmsan index 7251b6b59e69..cae1ddcc18e1 100644 --- a/lib/Kconfig.kmsan +++ b/lib/Kconfig.kmsan @@ -3,7 +3,7 @@ config HAVE_ARCH_KMSAN bool config HAVE_KMSAN_COMPILER - def_bool CC_IS_CLANG + def_bool $(cc-option,-fsanitize=kernel-memory) config KMSAN bool "KMSAN: detector of uninitialized values use" diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index eea17e36a22b..16859c6226dd 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -64,7 +64,7 @@ config CRYPTO_LIB_CURVE25519 config CRYPTO_LIB_CURVE25519_ARCH bool depends on CRYPTO_LIB_CURVE25519 && !UML && !KMSAN - default y if ARM && KERNEL_MODE_NEON + default y if ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN default y if PPC64 && CPU_LITTLE_ENDIAN default y if X86_64 @@ -97,7 +97,7 @@ config CRYPTO_LIB_POLY1305 config CRYPTO_LIB_POLY1305_ARCH bool - depends on CRYPTO_LIB_POLY1305 && !UML + depends on CRYPTO_LIB_POLY1305 && !UML && !KMSAN default y if ARM default y if ARM64 && KERNEL_MODE_NEON default y if MIPS diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index bded351aeace..d2845b214585 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -90,7 +90,7 @@ else libcurve25519-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += curve25519-fiat32.o endif # clang versions prior to 18 may blow out the stack with KASAN -ifeq ($(call clang-min-version, 180000),) +ifeq ($(CONFIG_CC_IS_CLANG)_$(call clang-min-version, 180000),y_) KASAN_SANITIZE_curve25519-hacl64.o := n endif diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 8c01eabd4eaf..63130a48e237 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -739,7 +739,7 @@ static struct kunit_case kunit_current_test_cases[] = { static void test_dev_action(void *priv) { - *(void **)priv = (void *)1; + *(long *)priv = 1; } static void kunit_device_test(struct kunit *test) diff --git a/lib/kunit/test.c b/lib/kunit/test.c index bb66ea1a3eac..62eb529824c6 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -745,7 +745,8 @@ int kunit_run_tests(struct kunit_suite *suite) .param_index = ++test.param_index, .parent = &test, }; - kunit_init_test(¶m_test, test_case->name, test_case->log); + kunit_init_test(¶m_test, test_case->name, NULL); + param_test.log = test_case->log; kunit_run_case_catch_errors(suite, test_case, ¶m_test); if (param_desc[0] == '\0') { diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 39bb779cb311..5aa4c9500018 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -64,6 +64,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/maple_tree.h> +#define TP_FCT tracepoint_string(__func__) + /* * Kernel pointer hashing renders much of the maple tree dump useless as tagged * pointers get hashed to arbitrary values. @@ -2756,7 +2758,7 @@ static inline void mas_rebalance(struct ma_state *mas, MA_STATE(l_mas, mas->tree, mas->index, mas->last); MA_STATE(r_mas, mas->tree, mas->index, mas->last); - trace_ma_op(__func__, mas); + trace_ma_op(TP_FCT, mas); /* * Rebalancing occurs if a node is insufficient. Data is rebalanced @@ -2997,7 +2999,7 @@ static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); - trace_ma_op(__func__, mas); + trace_ma_op(TP_FCT, mas); mast.l = &l_mas; mast.r = &r_mas; @@ -3172,7 +3174,7 @@ static bool mas_is_span_wr(struct ma_wr_state *wr_mas) return false; } - trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); + trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry); return true; } @@ -3416,7 +3418,7 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) * of data may happen. */ mas = wr_mas->mas; - trace_ma_op(__func__, mas); + trace_ma_op(TP_FCT, mas); if (unlikely(!mas->index && mas->last == ULONG_MAX)) return mas_new_root(mas, wr_mas->entry); @@ -3552,7 +3554,7 @@ done: } else { memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); } - trace_ma_write(__func__, mas, 0, wr_mas->entry); + trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); mas_update_gap(mas); mas->end = new_end; return; @@ -3596,7 +3598,7 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) mas->offset++; /* Keep mas accurate. */ } - trace_ma_write(__func__, mas, 0, wr_mas->entry); + trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); /* * Only update gap when the new entry is empty or there is an empty * entry in the original two ranges. @@ -3717,7 +3719,7 @@ static inline void mas_wr_append(struct ma_wr_state *wr_mas, mas_update_gap(mas); mas->end = new_end; - trace_ma_write(__func__, mas, new_end, wr_mas->entry); + trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry); return; } @@ -3731,7 +3733,7 @@ static void mas_wr_bnode(struct ma_wr_state *wr_mas) { struct maple_big_node b_node; - trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); + trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry); memset(&b_node, 0, sizeof(struct maple_big_node)); mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); mas_commit_b_node(wr_mas, &b_node); @@ -5062,7 +5064,7 @@ void *mas_store(struct ma_state *mas, void *entry) { MA_WR_STATE(wr_mas, mas, entry); - trace_ma_write(__func__, mas, 0, entry); + trace_ma_write(TP_FCT, mas, 0, entry); #ifdef CONFIG_DEBUG_MAPLE_TREE if (MAS_WARN_ON(mas, mas->index > mas->last)) pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, @@ -5163,7 +5165,7 @@ void mas_store_prealloc(struct ma_state *mas, void *entry) } store: - trace_ma_write(__func__, mas, 0, entry); + trace_ma_write(TP_FCT, mas, 0, entry); mas_wr_store_entry(&wr_mas); MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); mas_destroy(mas); @@ -5882,7 +5884,7 @@ void *mtree_load(struct maple_tree *mt, unsigned long index) MA_STATE(mas, mt, index, index); void *entry; - trace_ma_read(__func__, &mas); + trace_ma_read(TP_FCT, &mas); rcu_read_lock(); retry: entry = mas_start(&mas); @@ -5925,7 +5927,7 @@ int mtree_store_range(struct maple_tree *mt, unsigned long index, MA_STATE(mas, mt, index, last); int ret = 0; - trace_ma_write(__func__, &mas, 0, entry); + trace_ma_write(TP_FCT, &mas, 0, entry); if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; @@ -6148,7 +6150,7 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index) void *entry = NULL; MA_STATE(mas, mt, index, index); - trace_ma_op(__func__, &mas); + trace_ma_op(TP_FCT, &mas); mtree_lock(mt); entry = mas_erase(&mas); @@ -6485,7 +6487,7 @@ void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) unsigned long copy = *index; #endif - trace_ma_read(__func__, &mas); + trace_ma_read(TP_FCT, &mas); if ((*index) > max) return NULL; diff --git a/lib/test_kho.c b/lib/test_kho.c index 60cd899ea745..fff018e5548d 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -301,6 +301,9 @@ static int __init kho_test_init(void) phys_addr_t fdt_phys; int err; + if (!kho_is_enabled()) + return 0; + err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); if (!err) return kho_test_restore(fdt_phys); diff --git a/mm/Kconfig b/mm/Kconfig index 0e26f4fc8717..ca3f146bc705 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -908,6 +908,13 @@ config PAGE_MAPCOUNT config PGTABLE_HAS_HUGE_LEAVES def_bool TRANSPARENT_HUGEPAGE || HUGETLB_PAGE +# +# We can end up creating gigantic folio. +# +config HAVE_GIGANTIC_FOLIOS + def_bool (HUGETLB_PAGE && ARCH_HAS_GIGANTIC_PAGE) || \ + (ZONE_DEVICE && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + # TODO: Allow to be enabled without THP config ARCH_SUPPORTS_HUGE_PFNMAP def_bool n diff --git a/mm/damon/core.c b/mm/damon/core.c index 93848b4c6944..109b050c795a 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -452,6 +452,9 @@ void damon_destroy_scheme(struct damos *s) damos_for_each_filter_safe(f, next, s) damos_destroy_filter(f); + damos_for_each_ops_filter_safe(f, next, s) + damos_destroy_filter(f); + kfree(s->migrate_dests.node_id_arr); kfree(s->migrate_dests.weight_arr); damon_del_scheme(s); @@ -832,7 +835,7 @@ int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src) src_goal->metric, src_goal->target_value); if (!new_goal) return -ENOMEM; - damos_commit_quota_goal_union(new_goal, src_goal); + damos_commit_quota_goal(new_goal, src_goal); damos_add_quota_goal(dst, new_goal); } return 0; @@ -1450,7 +1453,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) INIT_LIST_HEAD(&control->list); mutex_lock(&ctx->call_controls_lock); - list_add_tail(&ctx->call_controls, &control->list); + list_add_tail(&control->list, &ctx->call_controls); mutex_unlock(&ctx->call_controls_lock); if (!damon_is_running(ctx)) return -EINVAL; diff --git a/mm/damon/stat.c b/mm/damon/stat.c index d8010968bbed..bf8626859902 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -46,6 +46,8 @@ MODULE_PARM_DESC(aggr_interval_us, static struct damon_ctx *damon_stat_context; +static unsigned long damon_stat_last_refresh_jiffies; + static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c) { struct damon_target *t; @@ -130,13 +132,12 @@ static void damon_stat_set_idletime_percentiles(struct damon_ctx *c) static int damon_stat_damon_call_fn(void *data) { struct damon_ctx *c = data; - static unsigned long last_refresh_jiffies; /* avoid unnecessarily frequent stat update */ - if (time_before_eq(jiffies, last_refresh_jiffies + + if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies + msecs_to_jiffies(5 * MSEC_PER_SEC))) return 0; - last_refresh_jiffies = jiffies; + damon_stat_last_refresh_jiffies = jiffies; aggr_interval_us = c->attrs.aggr_interval; damon_stat_set_estimated_memory_bandwidth(c); @@ -210,6 +211,8 @@ static int damon_stat_start(void) err = damon_start(&damon_stat_context, 1, true); if (err) return err; + + damon_stat_last_refresh_jiffies = jiffies; call_control.data = damon_stat_context; return damon_call(damon_stat_context, &call_control); } diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 2fc722f998f8..3c0d727788c8 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1473,13 +1473,14 @@ static int damon_sysfs_commit_input(void *data) if (IS_ERR(param_ctx)) return PTR_ERR(param_ctx); test_ctx = damon_new_ctx(); + if (!test_ctx) + return -ENOMEM; err = damon_commit_ctx(test_ctx, param_ctx); - if (err) { - damon_destroy_ctx(test_ctx); + if (err) goto out; - } err = damon_commit_ctx(kdamond->damon_ctx, param_ctx); out: + damon_destroy_ctx(test_ctx); damon_destroy_ctx(param_ctx); return err; } @@ -1551,16 +1552,17 @@ static struct damon_ctx *damon_sysfs_build_ctx( return ctx; } +static unsigned long damon_sysfs_next_update_jiffies; + static int damon_sysfs_repeat_call_fn(void *data) { struct damon_sysfs_kdamond *sysfs_kdamond = data; - static unsigned long next_update_jiffies; if (!sysfs_kdamond->refresh_ms) return 0; - if (time_before(jiffies, next_update_jiffies)) + if (time_before(jiffies, damon_sysfs_next_update_jiffies)) return 0; - next_update_jiffies = jiffies + + damon_sysfs_next_update_jiffies = jiffies + msecs_to_jiffies(sysfs_kdamond->refresh_ms); if (!mutex_trylock(&damon_sysfs_lock)) @@ -1606,6 +1608,9 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) } kdamond->damon_ctx = ctx; + damon_sysfs_next_update_jiffies = + jiffies + msecs_to_jiffies(kdamond->refresh_ms); + repeat_call_control->fn = damon_sysfs_repeat_call_fn; repeat_call_control->data = kdamond; repeat_call_control->repeat = true; diff --git a/mm/filemap.c b/mm/filemap.c index 13f0259d993c..2f1e7e283a51 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3681,7 +3681,8 @@ skip: static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, struct folio *folio, unsigned long start, unsigned long addr, unsigned int nr_pages, - unsigned long *rss, unsigned short *mmap_miss) + unsigned long *rss, unsigned short *mmap_miss, + bool can_map_large) { unsigned int ref_from_caller = 1; vm_fault_t ret = 0; @@ -3696,7 +3697,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, * The folio must not cross VMA or page table boundary. */ addr0 = addr - start * PAGE_SIZE; - if (folio_within_vma(folio, vmf->vma) && + if (can_map_large && folio_within_vma(folio, vmf->vma) && (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) { vmf->pte -= start; page -= start; @@ -3811,13 +3812,27 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, unsigned long rss = 0; unsigned int nr_pages = 0, folio_type; unsigned short mmap_miss = 0, mmap_miss_saved; + bool can_map_large; rcu_read_lock(); folio = next_uptodate_folio(&xas, mapping, end_pgoff); if (!folio) goto out; - if (filemap_map_pmd(vmf, folio, start_pgoff)) { + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; + end_pgoff = min(end_pgoff, file_end); + + /* + * Do not allow to map with PTEs beyond i_size and with PMD + * across i_size to preserve SIGBUS semantics. + * + * Make an exception for shmem/tmpfs that for long time + * intentionally mapped with PMDs across i_size. + */ + can_map_large = shmem_mapping(mapping) || + file_end >= folio_next_index(folio); + + if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) { ret = VM_FAULT_NOPAGE; goto out; } @@ -3830,10 +3845,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, goto out; } - file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; - if (end_pgoff > file_end) - end_pgoff = file_end; - folio_type = mm_counter_file(folio); do { unsigned long end; @@ -3850,7 +3861,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, else ret |= filemap_map_folio_range(vmf, folio, xas.xa_index - folio->index, addr, - nr_pages, &rss, &mmap_miss); + nr_pages, &rss, &mmap_miss, + can_map_large); folio_unlock(folio); } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1b81680b4225..2f2a521e5d68 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -214,7 +214,8 @@ retry: if (likely(atomic_inc_not_zero(&huge_zero_refcount))) return true; - zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, + zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO | __GFP_ZEROTAGS) & + ~__GFP_MOVABLE, HPAGE_PMD_ORDER); if (!zero_folio) { count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); @@ -3263,6 +3264,14 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) caller_pins; } +static bool page_range_has_hwpoisoned(struct page *page, long nr_pages) +{ + for (; nr_pages; page++, nr_pages--) + if (PageHWPoison(page)) + return true; + return false; +} + /* * It splits @folio into @new_order folios and copies the @folio metadata to * all the resulting folios. @@ -3270,17 +3279,24 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) static void __split_folio_to_order(struct folio *folio, int old_order, int new_order) { + /* Scan poisoned pages when split a poisoned folio to large folios */ + const bool handle_hwpoison = folio_test_has_hwpoisoned(folio) && new_order; long new_nr_pages = 1 << new_order; long nr_pages = 1 << old_order; long i; + folio_clear_has_hwpoisoned(folio); + + /* Check first new_nr_pages since the loop below skips them */ + if (handle_hwpoison && + page_range_has_hwpoisoned(folio_page(folio, 0), new_nr_pages)) + folio_set_has_hwpoisoned(folio); /* * Skip the first new_nr_pages, since the new folio from them have all * the flags from the original folio. */ for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) { struct page *new_head = &folio->page + i; - /* * Careful: new_folio is not a "real" folio before we cleared PageTail. * Don't pass it around before clear_compound_head(). @@ -3322,6 +3338,10 @@ static void __split_folio_to_order(struct folio *folio, int old_order, (1L << PG_dirty) | LRU_GEN_MASK | LRU_REFS_MASK)); + if (handle_hwpoison && + page_range_has_hwpoisoned(new_head, new_nr_pages)) + folio_set_has_hwpoisoned(new_folio); + new_folio->mapping = folio->mapping; new_folio->index = folio->index + i; @@ -3422,8 +3442,6 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, if (folio_test_anon(folio)) mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1); - folio_clear_has_hwpoisoned(folio); - /* * split to new_order one order at a time. For uniform split, * folio is split to new_order directly. @@ -3504,7 +3522,8 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, /* order-1 is not supported for anonymous THP. */ VM_WARN_ONCE(warns && new_order == 1, "Cannot split to order-1 folio"); - return new_order != 1; + if (new_order == 1) + return false; } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { /* @@ -3535,7 +3554,8 @@ bool uniform_split_supported(struct folio *folio, unsigned int new_order, if (folio_test_anon(folio)) { VM_WARN_ONCE(warns && new_order == 1, "Cannot split to order-1 folio"); - return new_order != 1; + if (new_order == 1) + return false; } else if (new_order) { if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { @@ -3653,8 +3673,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order, min_order = mapping_min_folio_order(folio->mapping); if (new_order < min_order) { - VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u", - min_order); ret = -EINVAL; goto out; } @@ -3986,12 +4004,7 @@ int min_order_for_split(struct folio *folio) int split_folio_to_list(struct folio *folio, struct list_head *list) { - int ret = min_order_for_split(folio); - - if (ret < 0) - return ret; - - return split_huge_page_to_list_to_order(&folio->page, list, ret); + return split_huge_page_to_list_to_order(&folio->page, list, 0); } /* @@ -4109,6 +4122,9 @@ static bool thp_underused(struct folio *folio) if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1) return false; + if (folio_contain_hwpoisoned_page(folio)) + return false; + for (i = 0; i < folio_nr_pages(folio); i++) { if (pages_identical(folio_page(folio, i), ZERO_PAGE(0))) { if (++num_zero_pages > khugepaged_max_ptes_none) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 795ee393eac0..0455119716ec 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7614,13 +7614,12 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud = pud_offset(p4d, addr); - i_mmap_assert_write_locked(vma->vm_file->f_mapping); - hugetlb_vma_assert_locked(vma); if (sz != PMD_SIZE) return 0; if (!ptdesc_pmd_is_shared(virt_to_ptdesc(ptep))) return 0; - + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + hugetlb_vma_assert_locked(vma); pud_clear(pud); /* * Once our caller drops the rmap lock, some other process might be diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c index 8bca7fece47f..35ceaa8adb41 100644 --- a/mm/kmsan/core.c +++ b/mm/kmsan/core.c @@ -72,9 +72,6 @@ depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags, nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0); - /* Don't sleep. */ - flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM); - handle = stack_depot_save(entries, nr_entries, flags); return stack_depot_set_extra_bits(handle, extra); } diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c index 2cee59d89c80..8f22d1f22981 100644 --- a/mm/kmsan/hooks.c +++ b/mm/kmsan/hooks.c @@ -84,7 +84,8 @@ void kmsan_slab_free(struct kmem_cache *s, void *object) if (s->ctor) return; kmsan_enter_runtime(); - kmsan_internal_poison_memory(object, s->object_size, GFP_KERNEL, + kmsan_internal_poison_memory(object, s->object_size, + GFP_KERNEL & ~(__GFP_RECLAIM), KMSAN_POISON_CHECK | KMSAN_POISON_FREE); kmsan_leave_runtime(); } @@ -114,7 +115,8 @@ void kmsan_kfree_large(const void *ptr) kmsan_enter_runtime(); page = virt_to_head_page((void *)ptr); KMSAN_WARN_ON(ptr != page_address(page)); - kmsan_internal_poison_memory((void *)ptr, page_size(page), GFP_KERNEL, + kmsan_internal_poison_memory((void *)ptr, page_size(page), + GFP_KERNEL & ~(__GFP_RECLAIM), KMSAN_POISON_CHECK | KMSAN_POISON_FREE); kmsan_leave_runtime(); } diff --git a/mm/kmsan/shadow.c b/mm/kmsan/shadow.c index 54f3c3c962f0..55fdea199aaf 100644 --- a/mm/kmsan/shadow.c +++ b/mm/kmsan/shadow.c @@ -208,7 +208,7 @@ void kmsan_free_page(struct page *page, unsigned int order) return; kmsan_enter_runtime(); kmsan_internal_poison_memory(page_address(page), page_size(page), - GFP_KERNEL, + GFP_KERNEL & ~(__GFP_RECLAIM), KMSAN_POISON_CHECK | KMSAN_POISON_FREE); kmsan_leave_runtime(); } @@ -2455,6 +2455,95 @@ static bool should_skip_rmap_item(struct folio *folio, return true; } +struct ksm_next_page_arg { + struct folio *folio; + struct page *page; + unsigned long addr; +}; + +static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct ksm_next_page_arg *private = walk->private; + struct vm_area_struct *vma = walk->vma; + pte_t *start_ptep = NULL, *ptep, pte; + struct mm_struct *mm = walk->mm; + struct folio *folio; + struct page *page; + spinlock_t *ptl; + pmd_t pmd; + + if (ksm_test_exit(mm)) + return 0; + + cond_resched(); + + pmd = pmdp_get_lockless(pmdp); + if (!pmd_present(pmd)) + return 0; + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) { + ptl = pmd_lock(mm, pmdp); + pmd = pmdp_get(pmdp); + + if (!pmd_present(pmd)) { + goto not_found_unlock; + } else if (pmd_leaf(pmd)) { + page = vm_normal_page_pmd(vma, addr, pmd); + if (!page) + goto not_found_unlock; + folio = page_folio(page); + + if (folio_is_zone_device(folio) || !folio_test_anon(folio)) + goto not_found_unlock; + + page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT); + goto found_unlock; + } + spin_unlock(ptl); + } + + start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!start_ptep) + return 0; + + for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) { + pte = ptep_get(ptep); + + if (!pte_present(pte)) + continue; + + page = vm_normal_page(vma, addr, pte); + if (!page) + continue; + folio = page_folio(page); + + if (folio_is_zone_device(folio) || !folio_test_anon(folio)) + continue; + goto found_unlock; + } + +not_found_unlock: + spin_unlock(ptl); + if (start_ptep) + pte_unmap(start_ptep); + return 0; +found_unlock: + folio_get(folio); + spin_unlock(ptl); + if (start_ptep) + pte_unmap(start_ptep); + private->page = page; + private->folio = folio; + private->addr = addr; + return 1; +} + +static struct mm_walk_ops ksm_next_page_ops = { + .pmd_entry = ksm_next_page_pmd_entry, + .walk_lock = PGWALK_RDLOCK, +}; + static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) { struct mm_struct *mm; @@ -2542,21 +2631,27 @@ next_mm: ksm_scan.address = vma->vm_end; while (ksm_scan.address < vma->vm_end) { + struct ksm_next_page_arg ksm_next_page_arg; struct page *tmp_page = NULL; - struct folio_walk fw; struct folio *folio; if (ksm_test_exit(mm)) break; - folio = folio_walk_start(&fw, vma, ksm_scan.address, 0); - if (folio) { - if (!folio_is_zone_device(folio) && - folio_test_anon(folio)) { - folio_get(folio); - tmp_page = fw.page; - } - folio_walk_end(&fw, vma); + int found; + + found = walk_page_range_vma(vma, ksm_scan.address, + vma->vm_end, + &ksm_next_page_ops, + &ksm_next_page_arg); + + if (found > 0) { + folio = ksm_next_page_arg.folio; + tmp_page = ksm_next_page_arg.page; + ksm_scan.address = ksm_next_page_arg.addr; + } else { + VM_WARN_ON_ONCE(found < 0); + ksm_scan.address = vma->vm_end - PAGE_SIZE; } if (tmp_page) { diff --git a/mm/memory.c b/mm/memory.c index 74b45e258323..b59ae7ce42eb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -65,6 +65,7 @@ #include <linux/gfp.h> #include <linux/migrate.h> #include <linux/string.h> +#include <linux/shmem_fs.h> #include <linux/memory-tiers.h> #include <linux/debugfs.h> #include <linux/userfaultfd_k.h> @@ -5501,8 +5502,25 @@ fallback: return ret; } + if (!needs_fallback && vma->vm_file) { + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t file_end; + + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); + + /* + * Do not allow to map with PTEs beyond i_size and with PMD + * across i_size to preserve SIGBUS semantics. + * + * Make an exception for shmem/tmpfs that for long time + * intentionally mapped with PMDs across i_size. + */ + needs_fallback = !shmem_mapping(mapping) && + file_end < folio_next_index(folio); + } + if (pmd_none(*vmf->pmd)) { - if (folio_test_pmd_mappable(folio)) { + if (!needs_fallback && folio_test_pmd_mappable(folio)) { ret = do_set_pmd(vmf, folio, page); if (ret != VM_FAULT_FALLBACK) return ret; diff --git a/mm/migrate.c b/mm/migrate.c index e3065c9edb55..c0e9f15be2a2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -301,8 +301,9 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, struct page *page = folio_page(folio, idx); pte_t newpte; - if (PageCompound(page)) + if (PageCompound(page) || PageHWPoison(page)) return false; + VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(pte_present(old_pte), page); diff --git a/mm/mm_init.c b/mm/mm_init.c index 3db2dea7db4c..7712d887b696 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(const char *tablename, panic("Failed to allocate %s hash table\n", tablename); pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, + tablename, 1UL << log2qty, get_order(size), size, virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear"); if (_hash_shift) diff --git a/mm/mremap.c b/mm/mremap.c index 35de0a7b910e..419a0ea0a870 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -187,7 +187,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr if (!folio || !folio_test_large(folio)) return 1; - return folio_pte_batch(folio, ptep, pte, max_nr); + return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE); } static int move_ptes(struct pagetable_move_control *pmc, @@ -1237,10 +1237,10 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, } /* - * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() and - * account flags on remaining VMA by convention (it cannot be mlock()'d any - * longer, as pages in range are no longer mapped), and removing anon_vma_chain - * links from it (if the entire VMA was copied over). + * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() flag on + * remaining VMA by convention (it cannot be mlock()'d any longer, as pages in + * range are no longer mapped), and removing anon_vma_chain links from it if the + * entire VMA was copied over. */ static void dontunmap_complete(struct vma_remap_struct *vrm, struct vm_area_struct *new_vma) @@ -1250,11 +1250,8 @@ static void dontunmap_complete(struct vma_remap_struct *vrm, unsigned long old_start = vrm->vma->vm_start; unsigned long old_end = vrm->vma->vm_end; - /* - * We always clear VM_LOCKED[ONFAULT] | VM_ACCOUNT on the old - * vma. - */ - vm_flags_clear(vrm->vma, VM_LOCKED_MASK | VM_ACCOUNT); + /* We always clear VM_LOCKED[ONFAULT] on the old VMA. */ + vm_flags_clear(vrm->vma, VM_LOCKED_MASK); /* * anon_vma links of the old vma is no longer needed after its page diff --git a/mm/page_owner.c b/mm/page_owner.c index c3ca21132c2c..589ec37c94aa 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -168,6 +168,9 @@ static void add_stack_record_to_list(struct stack_record *stack_record, unsigned long flags; struct stack *stack; + if (!gfpflags_allow_spinning(gfp_mask)) + return; + set_current_in_page_owner(); stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask)); if (!stack) { diff --git a/mm/secretmem.c b/mm/secretmem.c index 60137305bc20..b59350daffe3 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -82,13 +82,13 @@ retry: __folio_mark_uptodate(folio); err = filemap_add_folio(mapping, folio, offset, gfp); if (unlikely(err)) { - folio_put(folio); /* * If a split of large page was required, it * already happened when we marked the page invalid * which guarantees that this call won't fail */ set_direct_map_default_noflush(folio_page(folio, 0)); + folio_put(folio); if (err == -EEXIST) goto retry; diff --git a/mm/shmem.c b/mm/shmem.c index 1b976414d6fa..5a3f0f754dc0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1881,6 +1881,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf, struct shmem_inode_info *info = SHMEM_I(inode); unsigned long suitable_orders = 0; struct folio *folio = NULL; + pgoff_t aligned_index; long pages; int error, order; @@ -1894,10 +1895,12 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf, order = highest_order(suitable_orders); while (suitable_orders) { pages = 1UL << order; - index = round_down(index, pages); - folio = shmem_alloc_folio(gfp, order, info, index); - if (folio) + aligned_index = round_down(index, pages); + folio = shmem_alloc_folio(gfp, order, info, aligned_index); + if (folio) { + index = aligned_index; goto allocated; + } if (pages == HPAGE_PMD_NR) count_vm_event(THP_FILE_FALLBACK); diff --git a/mm/slub.c b/mm/slub.c index b1f15598fbfd..a0b905c2a557 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2046,15 +2046,19 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) if (slab_exts) { unsigned int offs = obj_to_index(obj_exts_slab->slab_cache, obj_exts_slab, obj_exts); - /* codetag should be NULL */ + + if (unlikely(is_codetag_empty(&slab_exts[offs].ref))) + return; + + /* codetag should be NULL here */ WARN_ON(slab_exts[offs].ref.ct); set_codetag_empty(&slab_exts[offs].ref); } } -static inline void mark_failed_objexts_alloc(struct slab *slab) +static inline bool mark_failed_objexts_alloc(struct slab *slab) { - slab->obj_exts = OBJEXTS_ALLOC_FAIL; + return cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL) == 0; } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, @@ -2076,7 +2080,7 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {} -static inline void mark_failed_objexts_alloc(struct slab *slab) {} +static inline bool mark_failed_objexts_alloc(struct slab *slab) { return false; } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, struct slabobj_ext *vec, unsigned int objects) {} @@ -2124,8 +2128,14 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, slab_nid(slab)); } if (!vec) { - /* Mark vectors which failed to allocate */ - mark_failed_objexts_alloc(slab); + /* + * Try to mark vectors which failed to allocate. + * If this operation fails, there may be a racing process + * that has already completed the allocation. + */ + if (!mark_failed_objexts_alloc(slab) && + slab_obj_exts(slab)) + return 0; return -ENOMEM; } @@ -2136,6 +2146,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, #ifdef CONFIG_MEMCG new_exts |= MEMCG_DATA_OBJEXTS; #endif +retry: old_exts = READ_ONCE(slab->obj_exts); handle_failed_objexts_alloc(old_exts, vec, objects); if (new_slab) { @@ -2145,8 +2156,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, * be simply assigned. */ slab->obj_exts = new_exts; - } else if ((old_exts & ~OBJEXTS_FLAGS_MASK) || - cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { + } else if (old_exts & ~OBJEXTS_FLAGS_MASK) { /* * If the slab is already in use, somebody can allocate and * assign slabobj_exts in parallel. In this case the existing @@ -2158,6 +2168,9 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, else kfree(vec); return 0; + } else if (cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { + /* Retry if a racing thread changed slab->obj_exts from under us. */ + goto retry; } if (allow_spin) @@ -2170,8 +2183,15 @@ static inline void free_slab_obj_exts(struct slab *slab) struct slabobj_ext *obj_exts; obj_exts = slab_obj_exts(slab); - if (!obj_exts) + if (!obj_exts) { + /* + * If obj_exts allocation failed, slab->obj_exts is set to + * OBJEXTS_ALLOC_FAIL. In this case, we end up here and should + * clear the flag. + */ + slab->obj_exts = 0; return; + } /* * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its @@ -3412,7 +3432,6 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, struct slab *slab, if (!allow_spin && !spin_trylock_irqsave(&n->list_lock, flags)) { /* Unlucky, discard newly allocated slab */ - slab->frozen = 1; defer_deactivate_slab(slab, NULL); return NULL; } @@ -4651,8 +4670,12 @@ new_objects: if (kmem_cache_debug(s)) { freelist = alloc_single_from_new_slab(s, slab, orig_size, gfpflags); - if (unlikely(!freelist)) + if (unlikely(!freelist)) { + /* This could cause an endless loop. Fail instead. */ + if (!allow_spin) + return NULL; goto new_objects; + } if (s->flags & SLAB_STORE_USER) set_track(s, freelist, TRACK_ALLOC, addr, @@ -6313,8 +6336,6 @@ next_remote_batch: if (unlikely(!slab_free_hook(s, p[i], init, false))) { p[i] = p[--size]; - if (!size) - goto flush_remote; continue; } @@ -6329,6 +6350,9 @@ next_remote_batch: i++; } + if (!size) + goto flush_remote; + next_batch: if (!local_trylock(&s->cpu_sheaves->lock)) goto fallback; @@ -6383,6 +6407,9 @@ do_free: goto next_batch; } + if (remote_nr) + goto flush_remote; + return; no_empty: @@ -6443,15 +6470,16 @@ static void free_deferred_objects(struct irq_work *work) slab = virt_to_slab(x); s = slab->slab_cache; + /* Point 'x' back to the beginning of allocated object */ + x -= s->offset; + /* * We used freepointer in 'x' to link 'x' into df->objects. * Clear it to NULL to avoid false positive detection * of "Freepointer corruption". */ - *(void **)x = NULL; + set_freepointer(s, x, NULL); - /* Point 'x' back to the beginning of allocated object */ - x -= s->offset; __slab_free(s, slab, x, x, 1, _THIS_IP_); } @@ -6460,9 +6488,12 @@ static void free_deferred_objects(struct irq_work *work) struct slab *slab = container_of(pos, struct slab, llnode); #ifdef CONFIG_SLUB_TINY - discard_slab(slab->slab_cache, slab); + free_slab(slab->slab_cache, slab); #else - deactivate_slab(slab->slab_cache, slab, slab->flush_freelist); + if (slab->frozen) + deactivate_slab(slab->slab_cache, slab, slab->flush_freelist); + else + free_slab(slab->slab_cache, slab); #endif } } diff --git a/mm/swap_state.c b/mm/swap_state.c index b13e9c4baa90..f4980dde5394 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -748,6 +748,8 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, blk_start_plug(&plug); for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) { + struct swap_info_struct *si = NULL; + if (!pte++) { pte = pte_offset_map(vmf->pmd, addr); if (!pte) @@ -761,8 +763,19 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, continue; pte_unmap(pte); pte = NULL; + /* + * Readahead entry may come from a device that we are not + * holding a reference to, try to grab a reference, or skip. + */ + if (swp_type(entry) != swp_type(targ_entry)) { + si = get_swap_device(entry); + if (!si) + continue; + } folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); + if (si) + put_swap_device(si); if (!folio) continue; if (page_allocated) { diff --git a/mm/truncate.c b/mm/truncate.c index 91eb92a5ce4f..3c5a50ae3274 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -177,6 +177,32 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) return 0; } +static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, + unsigned long min_order) +{ + enum ttu_flags ttu_flags = + TTU_SYNC | + TTU_SPLIT_HUGE_PMD | + TTU_IGNORE_MLOCK; + int ret; + + ret = try_folio_split_to_order(folio, split_at, min_order); + + /* + * If the split fails, unmap the folio, so it will be refaulted + * with PTEs to respect SIGBUS semantics. + * + * Make an exception for shmem/tmpfs that for long time + * intentionally mapped with PMDs across i_size. + */ + if (ret && !shmem_mapping(folio->mapping)) { + try_to_unmap(folio, ttu_flags); + WARN_ON(folio_mapped(folio)); + } + + return ret; +} + /* * Handle partial folios. The folio may be entirely within the * range if a split has raced with us. If not, we zero the part of the @@ -194,6 +220,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) size_t size = folio_size(folio); unsigned int offset, length; struct page *split_at, *split_at2; + unsigned int min_order; if (pos < start) offset = start - pos; @@ -223,8 +250,9 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) if (!folio_test_large(folio)) return true; + min_order = mapping_min_folio_order(folio->mapping); split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); - if (!try_folio_split(folio, split_at, NULL)) { + if (!try_folio_split_or_unmap(folio, split_at, min_order)) { /* * try to split at offset + length to make sure folios within * the range can be dropped, especially to avoid memory waste @@ -248,13 +276,10 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) if (!folio_trylock(folio2)) goto out; - /* - * make sure folio2 is large and does not change its mapping. - * Its split result does not matter here. - */ + /* make sure folio2 is large and does not change its mapping */ if (folio_test_large(folio2) && folio2->mapping == folio->mapping) - try_folio_split(folio2, split_at2, NULL); + try_folio_split_or_unmap(folio2, split_at2, min_order); folio_unlock(folio2); out: diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index fda3a80e9340..2b74ed56eb16 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -193,6 +193,8 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; + netdev_update_features(dev); + return 0; out_unregister_netdev: diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index c84420cb410d..a662408ad867 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -763,11 +763,16 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + if (!primary_if) { ret = -ENOENT; goto out_put_mesh_iface; } + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out_put_primary_if; + } + hard_iface = batadv_netlink_get_hardif(bat_priv, cb); if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) { ret = PTR_ERR(hard_iface); @@ -1327,11 +1332,16 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + if (!primary_if) { ret = -ENOENT; goto out_put_mesh_iface; } + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out_put_primary_if; + } + hard_iface = batadv_netlink_get_hardif(bat_priv, cb); if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) { ret = PTR_ERR(hard_iface); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index f0c862091bff..2c21ae8abadc 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -53,6 +53,11 @@ static bool enable_6lowpan; static struct l2cap_chan *listen_chan; static DEFINE_MUTEX(set_lock); +enum { + LOWPAN_PEER_CLOSING, + LOWPAN_PEER_MAXBITS +}; + struct lowpan_peer { struct list_head list; struct rcu_head rcu; @@ -61,6 +66,8 @@ struct lowpan_peer { /* peer addresses in various formats */ unsigned char lladdr[ETH_ALEN]; struct in6_addr peer_addr; + + DECLARE_BITMAP(flags, LOWPAN_PEER_MAXBITS); }; struct lowpan_btle_dev { @@ -289,6 +296,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->pkt_type = PACKET_HOST; local_skb->dev = dev; + skb_reset_mac_header(local_skb); skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { @@ -919,7 +927,9 @@ static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type) BT_DBG("peer %p chan %p", peer, peer->chan); + l2cap_chan_lock(peer->chan); l2cap_chan_close(peer->chan, ENOENT); + l2cap_chan_unlock(peer->chan); return 0; } @@ -956,10 +966,11 @@ static struct l2cap_chan *bt_6lowpan_listen(void) } static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, - struct l2cap_conn **conn) + struct l2cap_conn **conn, bool disconnect) { struct hci_conn *hcon; struct hci_dev *hdev; + int le_addr_type; int n; n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", @@ -970,13 +981,32 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, if (n < 7) return -EINVAL; + if (disconnect) { + /* The "disconnect" debugfs command has used different address + * type constants than "connect" since 2015. Let's retain that + * for now even though it's obviously buggy... + */ + *addr_type += 1; + } + + switch (*addr_type) { + case BDADDR_LE_PUBLIC: + le_addr_type = ADDR_LE_DEV_PUBLIC; + break; + case BDADDR_LE_RANDOM: + le_addr_type = ADDR_LE_DEV_RANDOM; + break; + default: + return -EINVAL; + } + /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */ hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC); if (!hdev) return -ENOENT; hci_dev_lock(hdev); - hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); + hcon = hci_conn_hash_lookup_le(hdev, addr, le_addr_type); hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -993,41 +1023,52 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, static void disconnect_all_peers(void) { struct lowpan_btle_dev *entry; - struct lowpan_peer *peer, *tmp_peer, *new_peer; - struct list_head peers; - - INIT_LIST_HEAD(&peers); + struct lowpan_peer *peer; + int nchans; - /* We make a separate list of peers as the close_cb() will - * modify the device peers list so it is better not to mess - * with the same list at the same time. + /* l2cap_chan_close() cannot be called from RCU, and lock ordering + * chan->lock > devices_lock prevents taking write side lock, so copy + * then close. */ rcu_read_lock(); + list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) + list_for_each_entry_rcu(peer, &entry->peers, list) + clear_bit(LOWPAN_PEER_CLOSING, peer->flags); + rcu_read_unlock(); - list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { - list_for_each_entry_rcu(peer, &entry->peers, list) { - new_peer = kmalloc(sizeof(*new_peer), GFP_ATOMIC); - if (!new_peer) - break; + do { + struct l2cap_chan *chans[32]; + int i; - new_peer->chan = peer->chan; - INIT_LIST_HEAD(&new_peer->list); + nchans = 0; - list_add(&new_peer->list, &peers); - } - } + spin_lock(&devices_lock); - rcu_read_unlock(); + list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { + list_for_each_entry_rcu(peer, &entry->peers, list) { + if (test_and_set_bit(LOWPAN_PEER_CLOSING, + peer->flags)) + continue; - spin_lock(&devices_lock); - list_for_each_entry_safe(peer, tmp_peer, &peers, list) { - l2cap_chan_close(peer->chan, ENOENT); + l2cap_chan_hold(peer->chan); + chans[nchans++] = peer->chan; - list_del_rcu(&peer->list); - kfree_rcu(peer, rcu); - } - spin_unlock(&devices_lock); + if (nchans >= ARRAY_SIZE(chans)) + goto done; + } + } + +done: + spin_unlock(&devices_lock); + + for (i = 0; i < nchans; ++i) { + l2cap_chan_lock(chans[i]); + l2cap_chan_close(chans[i], ENOENT); + l2cap_chan_unlock(chans[i]); + l2cap_chan_put(chans[i]); + } + } while (nchans); } struct set_enable { @@ -1050,7 +1091,9 @@ static void do_enable_set(struct work_struct *work) mutex_lock(&set_lock); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); } @@ -1103,13 +1146,15 @@ static ssize_t lowpan_control_write(struct file *fp, buf[buf_size] = '\0'; if (memcmp(buf, "connect ", 8) == 0) { - ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn); + ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn, false); if (ret == -EINVAL) return ret; mutex_lock(&set_lock); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); listen_chan = NULL; } @@ -1140,7 +1185,7 @@ static ssize_t lowpan_control_write(struct file *fp, } if (memcmp(buf, "disconnect ", 11) == 0) { - ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn); + ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn, true); if (ret < 0) return ret; @@ -1271,7 +1316,9 @@ static void __exit bt_6lowpan_exit(void) debugfs_remove(lowpan_control_debugfs); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 111f0e37b672..6fc0692abf05 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -769,21 +769,23 @@ static void find_bis(struct hci_conn *conn, void *data) d->count++; } -static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn) +static int hci_le_big_terminate(struct hci_dev *hdev, struct hci_conn *conn) { struct iso_list_data *d; int ret; - bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, conn->sync_handle); + bt_dev_dbg(hdev, "hcon %p big 0x%2.2x sync_handle 0x%4.4x", conn, + conn->iso_qos.bcast.big, conn->sync_handle); d = kzalloc(sizeof(*d), GFP_KERNEL); if (!d) return -ENOMEM; - d->big = big; + d->big = conn->iso_qos.bcast.big; d->sync_handle = conn->sync_handle; - if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { + if (conn->type == PA_LINK && + test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { hci_conn_hash_list_flag(hdev, find_bis, PA_LINK, HCI_CONN_PA_SYNC, d); @@ -801,6 +803,9 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c d->big_sync_term = true; } + if (!d->pa_sync_term && !d->big_sync_term) + return 0; + ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, terminate_big_destroy); if (ret) @@ -843,10 +848,16 @@ static void bis_cleanup(struct hci_conn *conn) if (bis) return; + bis = hci_conn_hash_lookup_big_state(hdev, + conn->iso_qos.bcast.big, + BT_OPEN, + HCI_ROLE_MASTER); + if (bis) + return; + hci_le_terminate_big(hdev, conn); } else { - hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, - conn); + hci_le_big_terminate(hdev, conn); } } @@ -987,19 +998,20 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; case CIS_LINK: - case BIS_LINK: - case PA_LINK: /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); - /* set proper cleanup function */ - if (!bacmp(dst, BDADDR_ANY)) - conn->cleanup = bis_cleanup; - else if (conn->role == HCI_ROLE_MASTER) + if (conn->role == HCI_ROLE_MASTER) conn->cleanup = cis_cleanup; - conn->mtu = hdev->iso_mtu ? hdev->iso_mtu : - hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; + conn->mtu = hdev->iso_mtu; + break; + case PA_LINK: + case BIS_LINK: + /* conn->src should reflect the local identity address */ + hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + conn->cleanup = bis_cleanup; + conn->mtu = hdev->iso_mtu; break; case SCO_LINK: if (lmp_esco_capable(hdev)) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d790b0d4eb9a..3838b90343d9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1607,8 +1607,10 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, hci_dev_set_flag(hdev, HCI_LE_ADV); - if (adv && !adv->periodic) + if (adv) adv->enabled = true; + else if (!set->handle) + hci_dev_set_flag(hdev, HCI_LE_ADV_0); conn = hci_lookup_le_connect(hdev); if (conn) @@ -1619,6 +1621,8 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, if (cp->num_of_sets) { if (adv) adv->enabled = false; + else if (!set->handle) + hci_dev_clear_flag(hdev, HCI_LE_ADV_0); /* If just one instance was disabled check if there are * any other instance enabled before clearing HCI_LE_ADV @@ -3959,8 +3963,11 @@ static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data, hci_dev_set_flag(hdev, HCI_LE_PER_ADV); if (adv) - adv->enabled = true; + adv->periodic_enabled = true; } else { + if (adv) + adv->periodic_enabled = false; + /* If just one instance was disabled check if there are * any other instance enabled before clearing HCI_LE_PER_ADV. * The current periodic adv instance will be marked as @@ -4211,6 +4218,13 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, } if (i == ARRAY_SIZE(hci_cc_table)) { + if (!skb->len) { + bt_dev_err(hdev, "Unexpected cc 0x%4.4x with no status", + *opcode); + *status = HCI_ERROR_UNSPECIFIED; + return; + } + /* Unknown opcode, assume byte 0 contains the status, so * that e.g. __hci_cmd_sync() properly returns errors * for vendor specific commands send by HCI drivers. @@ -5829,6 +5843,29 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, void *data, le16_to_cpu(ev->supervision_timeout)); } +static void hci_le_pa_sync_lost_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_ev_le_pa_sync_lost *ev = data; + u16 handle = le16_to_cpu(ev->handle); + struct hci_conn *conn; + + bt_dev_dbg(hdev, "sync handle 0x%4.4x", handle); + + hci_dev_lock(hdev); + + /* Delete the pa sync connection */ + conn = hci_conn_hash_lookup_pa_sync_handle(hdev, handle); + if (conn) { + clear_bit(HCI_CONN_BIG_SYNC, &conn->flags); + clear_bit(HCI_CONN_PA_SYNC, &conn->flags); + hci_disconn_cfm(conn, HCI_ERROR_REMOTE_USER_TERM); + hci_conn_del(conn); + } + + hci_dev_unlock(hdev); +} + static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -6987,14 +7024,9 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, continue; } - if (ev->status != 0x42) { + if (ev->status != 0x42) /* Mark PA sync as established */ set_bit(HCI_CONN_PA_SYNC, &bis->flags); - /* Reset cleanup callback of PA Sync so it doesn't - * terminate the sync when deleting the connection. - */ - conn->cleanup = NULL; - } bis->sync_handle = conn->sync_handle; bis->iso_qos.bcast.big = ev->handle; @@ -7037,29 +7069,24 @@ static void hci_le_big_sync_lost_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_evt_le_big_sync_lost *ev = data; - struct hci_conn *bis, *conn; - bool mgmt_conn; + struct hci_conn *bis; + bool mgmt_conn = false; bt_dev_dbg(hdev, "big handle 0x%2.2x", ev->handle); hci_dev_lock(hdev); - /* Delete the pa sync connection */ - bis = hci_conn_hash_lookup_pa_sync_big_handle(hdev, ev->handle); - if (bis) { - conn = hci_conn_hash_lookup_pa_sync_handle(hdev, - bis->sync_handle); - if (conn) - hci_conn_del(conn); - } - /* Delete each bis connection */ while ((bis = hci_conn_hash_lookup_big_state(hdev, ev->handle, BT_CONNECTED, HCI_ROLE_SLAVE))) { - mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &bis->flags); - mgmt_device_disconnected(hdev, &bis->dst, bis->type, bis->dst_type, - ev->reason, mgmt_conn); + if (!mgmt_conn) { + mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, + &bis->flags); + mgmt_device_disconnected(hdev, &bis->dst, bis->type, + bis->dst_type, ev->reason, + mgmt_conn); + } clear_bit(HCI_CONN_BIG_SYNC, &bis->flags); hci_disconn_cfm(bis, ev->reason); @@ -7173,6 +7200,9 @@ static const struct hci_le_ev { hci_le_per_adv_report_evt, sizeof(struct hci_ev_le_per_adv_report), HCI_MAX_EVENT_SIZE), + /* [0x10 = HCI_EV_LE_PA_SYNC_LOST] */ + HCI_LE_EV(HCI_EV_LE_PA_SYNC_LOST, hci_le_pa_sync_lost_evt, + sizeof(struct hci_ev_le_pa_sync_lost)), /* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */ HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt, sizeof(struct hci_evt_le_ext_adv_set_term)), diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index eefdb6134ca5..6e76798ec786 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -863,11 +863,17 @@ bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, { struct hci_cmd_sync_work_entry *entry; - entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy); - if (!entry) + mutex_lock(&hdev->cmd_sync_work_lock); + + entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy); + if (!entry) { + mutex_unlock(&hdev->cmd_sync_work_lock); return false; + } - hci_cmd_sync_cancel_entry(hdev, entry); + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); + + mutex_unlock(&hdev->cmd_sync_work_lock); return true; } @@ -1601,7 +1607,7 @@ int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) /* If periodic advertising already disabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->periodic || !adv->enabled) + if (!adv || !adv->periodic_enabled) return 0; memset(&cp, 0, sizeof(cp)); @@ -1666,7 +1672,7 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) /* If periodic advertising already enabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); - if (adv && adv->periodic && adv->enabled) + if (adv && adv->periodic_enabled) return 0; memset(&cp, 0, sizeof(cp)); @@ -2600,9 +2606,8 @@ static int hci_resume_advertising_sync(struct hci_dev *hdev) /* If current advertising instance is set to instance 0x00 * then we need to re-enable it. */ - if (!hdev->cur_adv_instance) - err = hci_enable_ext_advertising_sync(hdev, - hdev->cur_adv_instance); + if (hci_dev_test_and_clear_flag(hdev, HCI_LE_ADV_0)) + err = hci_enable_ext_advertising_sync(hdev, 0x00); } else { /* Schedule for most recent instance to be restarted and begin * the software rotation loop @@ -6994,7 +6999,7 @@ static void create_pa_complete(struct hci_dev *hdev, void *data, int err) hci_dev_lock(hdev); - if (!hci_conn_valid(hdev, conn)) + if (hci_conn_valid(hdev, conn)) clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); if (!err) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 9b263d061e05..3d98cb6291da 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2032,7 +2032,7 @@ static void iso_conn_ready(struct iso_conn *conn) */ if (!bacmp(&hcon->dst, BDADDR_ANY)) { bacpy(&hcon->dst, &iso_pi(parent)->dst); - hcon->dst_type = iso_pi(parent)->dst_type; + hcon->dst_type = le_addr_type(iso_pi(parent)->dst_type); } if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) { @@ -2046,7 +2046,13 @@ static void iso_conn_ready(struct iso_conn *conn) } bacpy(&iso_pi(sk)->dst, &hcon->dst); - iso_pi(sk)->dst_type = hcon->dst_type; + + /* Convert from HCI to three-value type */ + if (hcon->dst_type == ADDR_LE_DEV_PUBLIC) + iso_pi(sk)->dst_type = BDADDR_LE_PUBLIC; + else + iso_pi(sk)->dst_type = BDADDR_LE_RANDOM; + iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle; memcpy(iso_pi(sk)->base, iso_pi(parent)->base, iso_pi(parent)->base_len); iso_pi(sk)->base_len = iso_pi(parent)->base_len; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 805c752ac0a9..35c57657bcf4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -282,7 +282,7 @@ static void __set_retrans_timer(struct l2cap_chan *chan) if (!delayed_work_pending(&chan->monitor_timer) && chan->retrans_timeout) { l2cap_set_timer(chan, &chan->retrans_timer, - secs_to_jiffies(chan->retrans_timeout)); + msecs_to_jiffies(chan->retrans_timeout)); } } @@ -291,7 +291,7 @@ static void __set_monitor_timer(struct l2cap_chan *chan) __clear_retrans_timer(chan); if (chan->monitor_timeout) { l2cap_set_timer(chan, &chan->monitor_timer, - secs_to_jiffies(chan->monitor_timeout)); + msecs_to_jiffies(chan->monitor_timeout)); } } @@ -497,6 +497,7 @@ void l2cap_chan_hold(struct l2cap_chan *c) kref_get(&c->kref); } +EXPORT_SYMBOL_GPL(l2cap_chan_hold); struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a3d16eece0d2..262bf984d2aa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2175,19 +2175,24 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) sk = cmd->sk; if (status) { + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + status); mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); - return; + goto done; } - mgmt_pending_remove(cmd); mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, 0, NULL, 0); + +done: + mgmt_pending_free(cmd); } static int set_mesh_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_set_mesh cp; + DEFINE_FLEX(struct mgmt_cp_set_mesh, cp, ad_types, num_ad_types, + sizeof(hdev->mesh_ad_types)); size_t len; mutex_lock(&hdev->mgmt_pending_lock); @@ -2197,27 +2202,26 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) return -ECANCELED; } - memcpy(&cp, cmd->param, sizeof(cp)); + len = cmd->param_len; + memcpy(cp, cmd->param, min(__struct_size(cp), len)); mutex_unlock(&hdev->mgmt_pending_lock); - len = cmd->param_len; - memset(hdev->mesh_ad_types, 0, sizeof(hdev->mesh_ad_types)); - if (cp.enable) + if (cp->enable) hci_dev_set_flag(hdev, HCI_MESH); else hci_dev_clear_flag(hdev, HCI_MESH); - hdev->le_scan_interval = __le16_to_cpu(cp.period); - hdev->le_scan_window = __le16_to_cpu(cp.window); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); - len -= sizeof(cp); + len -= sizeof(struct mgmt_cp_set_mesh); /* If filters don't fit, forward all adv pkts */ if (len <= sizeof(hdev->mesh_ad_types)) - memcpy(hdev->mesh_ad_types, cp.ad_types, len); + memcpy(hdev->mesh_ad_types, cp->ad_types, len); hci_update_passive_scan_sync(hdev); return 0; @@ -5391,9 +5395,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_EXT_AD_LENGTH || - length > HCI_MAX_EXT_AD_LENGTH || - (offset + length) > HCI_MAX_EXT_AD_LENGTH) + if (offset >= HCI_MAX_AD_LENGTH || + length > HCI_MAX_AD_LENGTH || + (offset + length) > HCI_MAX_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); @@ -9493,6 +9497,7 @@ void mgmt_index_removed(struct hci_dev *hdev) cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->service_cache); cancel_delayed_work_sync(&hdev->rpa_expired); + cancel_delayed_work_sync(&hdev->mesh_send_done); } void mgmt_power_on(struct hci_dev *hdev, int err) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 376ce6de84be..b783526ab588 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -643,8 +643,8 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) tty_port_tty_hangup(&dev->port, true); dev->modem_status = - ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | - ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) | + ((v24_sig & RFCOMM_V24_RTC) ? TIOCM_DSR : 0) | + ((v24_sig & RFCOMM_V24_RTR) ? TIOCM_CTS : 0) | ((v24_sig & RFCOMM_V24_IC) ? TIOCM_RI : 0) | ((v24_sig & RFCOMM_V24_DV) ? TIOCM_CD : 0); } @@ -1055,10 +1055,14 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) static int rfcomm_tty_tiocmget(struct tty_struct *tty) { struct rfcomm_dev *dev = tty->driver_data; + struct rfcomm_dlc *dlc = dev->dlc; + u8 v24_sig; BT_DBG("tty %p dev %p", tty, dev); - return dev->modem_status; + rfcomm_dlc_get_modem_status(dlc, &v24_sig); + + return (v24_sig & (TIOCM_DTR | TIOCM_RTS)) | dev->modem_status; } static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) @@ -1071,23 +1075,15 @@ static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigne rfcomm_dlc_get_modem_status(dlc, &v24_sig); - if (set & TIOCM_DSR || set & TIOCM_DTR) + if (set & TIOCM_DTR) v24_sig |= RFCOMM_V24_RTC; - if (set & TIOCM_RTS || set & TIOCM_CTS) + if (set & TIOCM_RTS) v24_sig |= RFCOMM_V24_RTR; - if (set & TIOCM_RI) - v24_sig |= RFCOMM_V24_IC; - if (set & TIOCM_CD) - v24_sig |= RFCOMM_V24_DV; - if (clear & TIOCM_DSR || clear & TIOCM_DTR) + if (clear & TIOCM_DTR) v24_sig &= ~RFCOMM_V24_RTC; - if (clear & TIOCM_RTS || clear & TIOCM_CTS) + if (clear & TIOCM_RTS) v24_sig &= ~RFCOMM_V24_RTR; - if (clear & TIOCM_RI) - v24_sig &= ~RFCOMM_V24_IC; - if (clear & TIOCM_CD) - v24_sig &= ~RFCOMM_V24_DV; rfcomm_dlc_set_modem_status(dlc, v24_sig); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfb03ee0bb62..8b7d0b90fea7 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -29,7 +29,6 @@ #include <trace/events/bpf_test_run.h> struct bpf_test_timer { - enum { NO_PREEMPT, NO_MIGRATE } mode; u32 i; u64 time_start, time_spent; }; @@ -37,12 +36,7 @@ struct bpf_test_timer { static void bpf_test_timer_enter(struct bpf_test_timer *t) __acquires(rcu) { - rcu_read_lock(); - if (t->mode == NO_PREEMPT) - preempt_disable(); - else - migrate_disable(); - + rcu_read_lock_dont_migrate(); t->time_start = ktime_get_ns(); } @@ -50,12 +44,7 @@ static void bpf_test_timer_leave(struct bpf_test_timer *t) __releases(rcu) { t->time_start = 0; - - if (t->mode == NO_PREEMPT) - preempt_enable(); - else - migrate_enable(); - rcu_read_unlock(); + rcu_read_unlock_migrate(); } static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations, @@ -374,7 +363,7 @@ static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx, { struct xdp_test_data xdp = { .batch_size = batch_size }; - struct bpf_test_timer t = { .mode = NO_MIGRATE }; + struct bpf_test_timer t = {}; int ret; if (!repeat) @@ -404,7 +393,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, struct bpf_prog_array_item item = {.prog = prog}; struct bpf_run_ctx *old_ctx; struct bpf_cg_run_ctx run_ctx; - struct bpf_test_timer t = { NO_MIGRATE }; + struct bpf_test_timer t = {}; enum bpf_cgroup_storage_type stype; int ret; @@ -1269,7 +1258,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, goto free_ctx; if (kattr->test.data_size_in - meta_sz < ETH_HLEN) - return -EINVAL; + goto free_ctx; data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom); if (IS_ERR(data)) { @@ -1377,7 +1366,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_test_timer t = {}; u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; @@ -1445,7 +1434,7 @@ out: int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_test_timer t = {}; struct bpf_prog_array *progs = NULL; struct bpf_sk_lookup_kern ctx = {}; u32 repeat = kattr->test.repeat; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 870bdf2e082c..dea09096ad0f 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -25,7 +25,7 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) && + (br_mst_is_enabled(p) || p->state == BR_STATE_FORWARDING) && br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) && !br_skb_isolated(p, skb); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 98c5b9c3145f..ca3a637d7cca 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,6 +386,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_mst_uninit(br); br_recalculate_neigh_suppress_enabled(br); br_fdb_delete_by_port(br, NULL, 0, 1); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 67b4c905e49a..777fa869c1a1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -94,7 +94,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb br = p->br; - if (br_mst_is_enabled(br)) { + if (br_mst_is_enabled(p)) { state = BR_STATE_FORWARDING; } else { if (p->state == BR_STATE_DISABLED) { @@ -429,7 +429,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; forward: - if (br_mst_is_enabled(p->br)) + if (br_mst_is_enabled(p)) goto defer_stp_filtering; switch (p->state) { diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 3f24b4ee49c2..43a300ae6bfa 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -22,6 +22,12 @@ bool br_mst_enabled(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_mst_enabled); +void br_mst_uninit(struct net_bridge *br) +{ + if (br_opt_get(br, BROPT_MST_ENABLED)) + static_branch_dec(&br_mst_used); +} + int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids) { const struct net_bridge_vlan_group *vg; @@ -225,9 +231,9 @@ int br_mst_set_enabled(struct net_bridge *br, bool on, return err; if (on) - static_branch_enable(&br_mst_used); + static_branch_inc(&br_mst_used); else - static_branch_disable(&br_mst_used); + static_branch_dec(&br_mst_used); br_opt_toggle(br, BROPT_MST_ENABLED, on); return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 16be5d250402..7280c4e9305f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1935,10 +1935,12 @@ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow) /* br_mst.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING DECLARE_STATIC_KEY_FALSE(br_mst_used); -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { + /* check the port's vlan group to avoid racing with port deletion */ return static_branch_unlikely(&br_mst_used) && - br_opt_get(br, BROPT_MST_ENABLED); + br_opt_get(p->br, BROPT_MST_ENABLED) && + rcu_access_pointer(p->vlgrp); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, @@ -1952,8 +1954,9 @@ int br_mst_fill_info(struct sk_buff *skb, const struct net_bridge_vlan_group *vg); int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); +void br_mst_uninit(struct net_bridge *br); #else -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { return false; } @@ -1987,6 +1990,10 @@ static inline int br_mst_process(struct net_bridge_port *p, { return -EOPNOTSUPP; } + +static inline void br_mst_uninit(struct net_bridge *br) +{ +} #endif struct nf_br_ops { diff --git a/net/core/datagram.c b/net/core/datagram.c index cb4b9ef2e4e3..c285c6465923 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -920,21 +920,22 @@ fault: EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); /** - * datagram_poll - generic datagram poll + * datagram_poll_queue - same as datagram_poll, but on a specific receive + * queue * @file: file struct * @sock: socket * @wait: poll table + * @rcv_queue: receive queue to poll * - * Datagram poll: Again totally generic. This also handles - * sequenced packet sockets providing the socket receive queue - * is only ever holding data ready to receive. + * Performs polling on the given receive queue, handling shutdown, error, + * and connection state. This is useful for protocols that deliver + * userspace-bound packets through a custom queue instead of + * sk->sk_receive_queue. * - * Note: when you *don't* use this routine for this protocol, - * and you use a different write policy from sock_writeable() - * then please supply your own write_space callback. + * Return: poll bitmask indicating the socket's current state */ -__poll_t datagram_poll(struct file *file, struct socket *sock, - poll_table *wait) +__poll_t datagram_poll_queue(struct file *file, struct socket *sock, + poll_table *wait, struct sk_buff_head *rcv_queue) { struct sock *sk = sock->sk; __poll_t mask; @@ -956,7 +957,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, mask |= EPOLLHUP; /* readable? */ - if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(rcv_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -978,4 +979,27 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, return mask; } +EXPORT_SYMBOL(datagram_poll_queue); + +/** + * datagram_poll - generic datagram poll + * @file: file struct + * @sock: socket + * @wait: poll table + * + * Datagram poll: Again totally generic. This also handles + * sequenced packet sockets providing the socket receive queue + * is only ever holding data ready to receive. + * + * Note: when you *don't* use this routine for this protocol, + * and you use a different write policy from sock_writeable() + * then please supply your own write_space callback. + * + * Return: poll bitmask indicating the socket's current state + */ +__poll_t datagram_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + return datagram_poll_queue(file, sock, wait, + &sock->sk->sk_receive_queue); +} EXPORT_SYMBOL(datagram_poll); diff --git a/net/core/devmem.c b/net/core/devmem.c index d9de31a6cc7f..1d04754bc756 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -17,6 +17,7 @@ #include <net/page_pool/helpers.h> #include <net/page_pool/memory_provider.h> #include <net/sock.h> +#include <net/tcp.h> #include <trace/events/page_pool.h> #include "devmem.h" @@ -357,7 +358,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id) { struct net_devmem_dmabuf_binding *binding; - struct dst_entry *dst = __sk_dst_get(sk); + struct net_device *dst_dev; + struct dst_entry *dst; int err = 0; binding = net_devmem_lookup_dmabuf(dmabuf_id); @@ -366,16 +368,35 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, goto out_err; } + rcu_read_lock(); + dst = __sk_dst_get(sk); + /* If dst is NULL (route expired), attempt to rebuild it. */ + if (unlikely(!dst)) { + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) { + err = -EHOSTUNREACH; + goto out_unlock; + } + dst = __sk_dst_get(sk); + if (unlikely(!dst)) { + err = -ENODEV; + goto out_unlock; + } + } + /* The dma-addrs in this binding are only reachable to the corresponding * net_device. */ - if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) { + dst_dev = dst_dev_rcu(dst); + if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) { err = -ENODEV; - goto out_err; + goto out_unlock; } + rcu_read_unlock(); return binding; +out_unlock: + rcu_read_unlock(); out_err: if (binding) net_devmem_dmabuf_binding_put(binding); diff --git a/net/core/filter.c b/net/core/filter.c index 76628df1fc82..fa06c5a08e22 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3877,7 +3877,8 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, u32 new_len = skb->len + head_room; int ret; - if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) || + if (unlikely(flags || (int)head_room < 0 || + (!skb_is_gso(skb) && new_len > max_len) || new_len < skb->len)) return -EINVAL; diff --git a/net/core/gro.c b/net/core/gro.c index 5ba4504cfd28..76f9c3712422 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -639,6 +639,8 @@ EXPORT_SYMBOL(gro_receive_skb); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { + struct skb_shared_info *shinfo; + if (unlikely(skb->pfmemalloc)) { consume_skb(skb); return; @@ -655,8 +657,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->encapsulation = 0; skb->ip_summed = CHECKSUM_NONE; - skb_shinfo(skb)->gso_type = 0; - skb_shinfo(skb)->gso_size = 0; + + shinfo = skb_shinfo(skb); + shinfo->gso_type = 0; + shinfo->gso_size = 0; + shinfo->hwtstamps.hwtstamp = 0; + if (unlikely(skb->slow_gro)) { skb_orphan(skb); skb_ext_reset(skb); diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index b43911562f4d..a725d21159a6 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -43,12 +43,11 @@ drop: if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); - if (have_bh_lock) - local_unlock_nested_bh(&gcells->cells->bh_lock); - res = NET_RX_SUCCESS; unlock: + if (have_bh_lock) + local_unlock_nested_bh(&gcells->cells->bh_lock); rcu_read_unlock(); return res; } @@ -61,9 +60,10 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) struct sk_buff *skb; int work_done = 0; - __local_lock_nested_bh(&cell->bh_lock); while (work_done < budget) { + __local_lock_nested_bh(&cell->bh_lock); skb = __skb_dequeue(&cell->napi_skbs); + __local_unlock_nested_bh(&cell->bh_lock); if (!skb) break; napi_gro_receive(napi, skb); @@ -72,7 +72,6 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) if (work_done < budget) napi_complete_done(napi, work_done); - __local_unlock_nested_bh(&cell->bh_lock); return work_done; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 60a05d3b7c24..331764845e8f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,19 +228,16 @@ static void refill_skbs(struct netpoll *np) { struct sk_buff_head *skb_pool; struct sk_buff *skb; - unsigned long flags; skb_pool = &np->skb_pool; - spin_lock_irqsave(&skb_pool->lock, flags); - while (skb_pool->qlen < MAX_SKBS) { + while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - __skb_queue_tail(skb_pool, skb); + skb_queue_tail(skb_pool, skb); } - spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) @@ -814,6 +811,10 @@ static void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; + /* At this point, there is a single npinfo instance per netdevice, and + * its refcnt tracks how many netpoll structures are linked to it. We + * only perform npinfo cleanup when the refcnt decrements to zero. + */ if (refcount_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; @@ -823,8 +824,7 @@ static void __netpoll_cleanup(struct netpoll *np) RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); - } else - RCU_INIT_POINTER(np->dev->npinfo, NULL); + } skb_pool_flush(np); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8040ff7c356e..576d5ec3bb36 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4715,9 +4715,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u16 vid; - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - if (!del_bulk) { err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 26bb657ceac3..eadb358179ce 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -176,7 +176,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); - dsa_default_offload_fwd_mark(skb); + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); return skb; } @@ -224,12 +225,14 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, { int len = BRCM_LEG_TAG_LEN; int source_port; + __be16 *proto; u8 *brcm_tag; if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) return NULL; brcm_tag = dsa_etype_header_pos_rx(skb); + proto = (__be16 *)(brcm_tag + BRCM_LEG_TAG_LEN); source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; @@ -237,14 +240,19 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - /* VLAN tag is added by BCM63xx internal switch */ - if (netdev_uses_dsa(skb->dev)) + /* The internal switch in BCM63XX SoCs always tags on egress on the CPU + * port. We use VID 0 internally for untagged traffic, so strip the tag + * if the TCI field is all 0, and keep it otherwise to also retain + * e.g. 802.1p tagged packets. + */ + if (proto[0] == htons(ETH_P_8021Q) && proto[1] == 0) len += VLAN_HLEN; /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, len); - dsa_default_offload_fwd_mark(skb); + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); dsa_strip_etype_header(skb, len); diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index 081093dfd553..8f9532a15f43 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -259,6 +259,7 @@ static int tls_handshake_accept(struct handshake_req *req, out_cancel: genlmsg_cancel(msg, hdr); + nlmsg_free(msg); out: return ret; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index fbbc3ccf9df6..492cbc78ab75 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -320,6 +320,9 @@ static void send_hsr_supervision_frame(struct hsr_port *port, } hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); + skb_reset_mac_len(skb); + set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); @@ -334,7 +337,7 @@ static void send_hsr_supervision_frame(struct hsr_port *port, } hsr_stag->tlv.HSR_TLV_type = type; - /* TODO: Why 12 in HSRv0? */ + /* HSRv0 has 6 unused bytes after the MAC */ hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? sizeof(struct hsr_sup_payload) : 12; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index c67c0d35921d..339f0d220212 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -262,15 +262,23 @@ static struct sk_buff *prp_fill_rct(struct sk_buff *skb, return skb; } -static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr, +static void hsr_set_path_id(struct hsr_frame_info *frame, + struct hsr_ethhdr *hsr_ethhdr, struct hsr_port *port) { int path_id; - if (port->type == HSR_PT_SLAVE_A) - path_id = 0; - else - path_id = 1; + if (port->hsr->prot_version) { + if (port->type == HSR_PT_SLAVE_A) + path_id = 0; + else + path_id = 1; + } else { + if (frame->is_supervision) + path_id = 0xf; + else + path_id = 1; + } set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id); } @@ -304,7 +312,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, else hsr_ethhdr = (struct hsr_ethhdr *)pc; - hsr_set_path_id(hsr_ethhdr, port); + hsr_set_path_id(frame, hsr_ethhdr, port); set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; @@ -330,7 +338,7 @@ struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, (struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr); /* set the lane id properly */ - hsr_set_path_id(hsr_ethhdr, port); + hsr_set_path_id(frame, hsr_ethhdr, port); return skb_clone(frame->skb_hsr, GFP_ATOMIC); } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { return skb_clone(frame->skb_std, GFP_ATOMIC); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b120470246cc..c96b63adf96f 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -34,12 +34,18 @@ static int hsr_newlink(struct net_device *dev, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); + struct net_device *link[2], *interlink = NULL; struct nlattr **data = params->data; enum hsr_version proto_version; unsigned char multicast_spec; u8 proto = HSR_PROTOCOL_HSR; - struct net_device *link[2], *interlink = NULL; + if (!net_eq(link_net, dev_net(dev))) { + NL_SET_ERR_MSG_MOD(extack, + "HSR slaves/interlink must be on the same net namespace than HSR link"); + return -EINVAL; + } + if (!data) { NL_SET_ERR_MSG_MOD(extack, "No slave devices specified"); return -EINVAL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6d27d3610c1c..b549d6a57307 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -607,6 +607,11 @@ static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) oldest_p = fnhe_p; } } + + /* Clear oldest->fnhe_daddr to prevent this fnhe from being + * rebound with new dsts in rt_bind_exception(). + */ + oldest->fnhe_daddr = 0; fnhe_flush_routes(oldest); *oldest_p = oldest->fnhe_next; kfree_rcu(oldest, rcu); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 31ea5af49f2d..e4a979b75cc6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -891,18 +891,27 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, } } -void tcp_rcvbuf_grow(struct sock *sk) +void tcp_rcvbuf_grow(struct sock *sk, u32 newval) { const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); - int rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap, oldval; + u64 grow; + + oldval = tp->rcvq_space.space; + tp->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return; + /* DRS is always one RTT late. */ + rcvwin = newval << 1; + /* slow start: allow the sender to double its rate. */ - rcvwin = tp->rcvq_space.space << 1; + grow = (u64)rcvwin * (newval - oldval); + do_div(grow, oldval); + rcvwin += grow << 1; if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; @@ -943,9 +952,7 @@ void tcp_rcv_space_adjust(struct sock *sk) trace_tcp_rcvbuf_grow(sk, time); - tp->rcvq_space.space = copied; - - tcp_rcvbuf_grow(sk); + tcp_rcvbuf_grow(sk, copied); new_measure: tp->rcvq_space.seq = tp->copied_seq; @@ -5270,7 +5277,7 @@ end: } /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ if (sk->sk_socket) - tcp_rcvbuf_grow(sk); + tcp_rcvbuf_grow(sk, tp->rcvq_space.space); } static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d9aca1c3c097..c52b0456039d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1876,6 +1876,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; + link_conf->fils_discovery.min_interval = 0; + link_conf->fils_discovery.max_interval = 0; + link_conf->unsol_bcast_probe_resp_interval = 0; __sta_info_flush(sdata, true, link_id, NULL); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 57065714cf8c..7f8799fd673e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1290,7 +1290,7 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) &link->csa.finalize_work); break; case NL80211_IFTYPE_STATION: - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 73fd86ec1bce..878c3b14aeb8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -612,11 +612,11 @@ struct ieee80211_if_managed { u8 *assoc_req_ies; size_t assoc_req_ies_len; - struct wiphy_delayed_work ml_reconf_work; + struct wiphy_hrtimer_work ml_reconf_work; u16 removed_links; /* TID-to-link mapping support */ - struct wiphy_delayed_work ttlm_work; + struct wiphy_hrtimer_work ttlm_work; struct ieee80211_adv_ttlm_info ttlm_info; struct wiphy_work teardown_ttlm_work; @@ -1017,10 +1017,10 @@ struct ieee80211_link_data_managed { bool operating_11g_mode; struct { - struct wiphy_delayed_work switch_work; + struct wiphy_hrtimer_work switch_work; struct cfg80211_chan_def ap_chandef; struct ieee80211_parsed_tpe tpe; - unsigned long time; + ktime_t time; bool waiting_bcn; bool ignored_same_chan; bool blocked_tx; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a7873832d4fa..0ca55b9655a7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -223,6 +223,10 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata if (netif_carrier_ok(sdata->dev)) return -EBUSY; + /* if any stations are set known (so they know this vif too), reject */ + if (sta_info_get_by_idx(sdata, 0)) + return -EBUSY; + /* First check no ROC work is happening on this iface */ list_for_each_entry(roc, &local->roc_list, list) { if (roc->sdata != sdata) @@ -242,12 +246,16 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata ret = -EBUSY; } + /* + * More interface types could be added here but changing the + * address while powered makes the most sense in client modes. + */ switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - /* More interface types could be added here but changing the - * address while powered makes the most sense in client modes. - */ + /* refuse while connecting */ + if (sdata->u.mgd.auth_data || sdata->u.mgd.assoc_data) + return -EBUSY; break; default: ret = -EOPNOTSUPP; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b14e9cd9713f..d5da7ccea66e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -508,11 +508,16 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ret = ieee80211_key_enable_hw_accel(new); } } else { - if (!new->local->wowlan) + if (!new->local->wowlan) { ret = ieee80211_key_enable_hw_accel(new); - else if (link_id < 0 || !sdata->vif.active_links || - BIT(link_id) & sdata->vif.active_links) + } else if (link_id < 0 || !sdata->vif.active_links || + BIT(link_id) & sdata->vif.active_links) { new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + if (!(new->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE | + IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) + decrease_tailroom_need_count(sdata, 1); + } } if (ret) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d71eabe5abf8..4a19b765ccb6 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -472,10 +472,10 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, * from there. */ if (link->conf->csa_active) - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - - jiffies); + ktime_get_boottime()); } for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b5827ea438e..f3138d158535 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -45,7 +45,7 @@ #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 -#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100) +#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS (100 * USEC_PER_MSEC) #define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00 #define IEEE80211_NEG_TTLM_REQ_TIMEOUT (HZ / 5) @@ -2594,7 +2594,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, return; } - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); } @@ -2753,7 +2753,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, .timestamp = timestamp, .device_timestamp = device_timestamp, }; - unsigned long now; + u32 csa_time_tu; + ktime_t now; int res; lockdep_assert_wiphy(local->hw.wiphy); @@ -2983,10 +2984,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.mode); /* we may have to handle timeout for deactivated link in software */ - now = jiffies; - link->u.mgd.csa.time = now + - TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * - link->conf->beacon_int); + now = ktime_get_boottime(); + csa_time_tu = (max_t(int, csa_ie.count, 1) - 1) * link->conf->beacon_int; + link->u.mgd.csa.time = now + us_to_ktime(ieee80211_tu_to_usec(csa_time_tu)); if (ieee80211_vif_link_active(&sdata->vif, link->link_id) && local->ops->channel_switch) { @@ -3001,7 +3001,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } /* channel switch handled in software */ - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - now); return; @@ -4242,14 +4242,14 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(&sdata->u.mgd.ttlm_info, 0, sizeof(sdata->u.mgd.ttlm_info)); - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->neg_ttlm_timeout_work); sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); wiphy_work_cancel(sdata->local->hw.wiphy, @@ -6876,7 +6876,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, /* In case the removal was cancelled, abort it */ if (sdata->u.mgd.removed_links) { sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); } return; @@ -6906,9 +6906,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, } sdata->u.mgd.removed_links = removed_links; - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work, - TU_TO_JIFFIES(delay)); + us_to_ktime(ieee80211_tu_to_usec(delay))); } static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, @@ -7095,7 +7095,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, /* if a planned TID-to-link mapping was cancelled - * abort it */ - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); } else if (sdata->u.mgd.ttlm_info.active) { /* if no TID-to-link element, set to default mapping in @@ -7130,7 +7130,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (ttlm_info.switch_time) { u16 beacon_ts_tu, st_tu, delay; - u32 delay_jiffies; + u64 delay_usec; u64 mask; /* The t2l map switch time is indicated with a partial @@ -7152,23 +7152,23 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (delay > IEEE80211_ADV_TTLM_ST_UNDERFLOW) return; - delay_jiffies = TU_TO_JIFFIES(delay); + delay_usec = ieee80211_tu_to_usec(delay); /* Link switching can take time, so schedule it * 100ms before to be ready on time */ - if (delay_jiffies > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) - delay_jiffies -= + if (delay_usec > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) + delay_usec -= IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS; else - delay_jiffies = 0; + delay_usec = 0; sdata->u.mgd.ttlm_info = ttlm_info; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work, - delay_jiffies); + us_to_ktime(delay_usec)); return; } } @@ -8793,7 +8793,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_csa_connection_drop_work); wiphy_delayed_work_init(&ifmgd->tdls_peer_del_work, ieee80211_tdls_peer_del_work); - wiphy_delayed_work_init(&ifmgd->ml_reconf_work, + wiphy_hrtimer_work_init(&ifmgd->ml_reconf_work, ieee80211_ml_reconf_work); wiphy_delayed_work_init(&ifmgd->reconf.wk, ieee80211_ml_sta_reconf_timeout); @@ -8802,7 +8802,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); wiphy_delayed_work_init(&ifmgd->tx_tspec_wk, ieee80211_sta_handle_tspec_ac_params_wk); - wiphy_delayed_work_init(&ifmgd->ttlm_work, + wiphy_hrtimer_work_init(&ifmgd->ttlm_work, ieee80211_tid_to_link_map_work); wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work, ieee80211_neg_ttlm_timeout_work); @@ -8849,7 +8849,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) else link->u.mgd.req_smps = IEEE80211_SMPS_OFF; - wiphy_delayed_work_init(&link->u.mgd.csa.switch_work, + wiphy_hrtimer_work_init(&link->u.mgd.csa.switch_work, ieee80211_csa_switch_work); ieee80211_clear_tpe(&link->conf->tpe); @@ -10064,7 +10064,7 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) &link->u.mgd.request_smps_work); wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.recalc_smps); - wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6af43dfefdd6..5b4c3fe9970a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -5360,10 +5360,14 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, if (WARN_ON(!local->started)) goto drop; - if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) { + if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC) && + !(status->flag & RX_FLAG_NO_PSDU && + status->zero_length_psdu_type == + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED))) { /* - * Validate the rate, unless a PLCP error means that - * we probably can't have a valid rate here anyway. + * Validate the rate, unless there was a PLCP error which may + * have an invalid rate or the PSDU was not capture and may be + * missing rate information. */ switch (status->encoding) { diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 6003e47c770a..171643815076 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -85,6 +85,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK), SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK), SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED), + SNMP_MIB_ITEM("WinProbe", MPTCP_MIB_WINPROBE), }; /* mptcp_mib_alloc - allocate percpu mib counters diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 309bac6fea32..a1d3e9369fbb 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -88,6 +88,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */ MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */ MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */ + MPTCP_MIB_WINPROBE, /* MPTCP-level zero window probe */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index e0f44dc232aa..2ae95476dba3 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -370,6 +370,10 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) } subflow: + /* No need to try establishing subflows to remote id0 if not allowed */ + if (mptcp_pm_add_addr_c_flag_case(msk)) + goto exit; + /* check if should create a new subflow */ while (msk->pm.local_addr_used < endp_subflow_max && msk->pm.extra_subflows < limit_extra_subflows) { @@ -401,6 +405,8 @@ subflow: __mptcp_subflow_connect(sk, &local, &addrs[i]); spin_lock_bh(&msk->pm.lock); } + +exit: mptcp_pm_nl_check_work_pending(msk); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0292162a14ee..90b4aeca2596 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -61,11 +61,13 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) { + unsigned short family = READ_ONCE(sk->sk_family); + #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (sk->sk_prot == &tcpv6_prot) + if (family == AF_INET6) return &inet6_stream_ops; #endif - WARN_ON_ONCE(sk->sk_prot != &tcp_prot); + WARN_ON_ONCE(family != AF_INET); return &inet_stream_ops; } @@ -194,17 +196,26 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, * - mptcp does not maintain a msk-level window clamp * - returns true when the receive buffer is actually updated */ -static bool mptcp_rcvbuf_grow(struct sock *sk) +static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval) { struct mptcp_sock *msk = mptcp_sk(sk); const struct net *net = sock_net(sk); - int rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap, oldval; + u64 grow; + oldval = msk->rcvq_space.space; + msk->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return false; - rcvwin = msk->rcvq_space.space << 1; + /* DRS is always one RTT late. */ + rcvwin = newval << 1; + + /* slow start: allow the sender to double its rate. */ + grow = (u64)rcvwin * (newval - oldval); + do_div(grow, oldval); + rcvwin += grow << 1; if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq; @@ -334,7 +345,7 @@ end: skb_set_owner_r(skb, sk); /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ if (sk->sk_socket) - mptcp_rcvbuf_grow(sk); + mptcp_rcvbuf_grow(sk, msk->rcvq_space.space); } static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset, @@ -998,7 +1009,7 @@ static void __mptcp_clean_una(struct sock *sk) if (WARN_ON_ONCE(!msk->recovery)) break; - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + msk->first_pending = mptcp_send_next(sk); } dfrag_clear(sk, dfrag); @@ -1290,7 +1301,12 @@ alloc_skb: if (copy == 0) { u64 snd_una = READ_ONCE(msk->snd_una); - if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) { + /* No need for zero probe if there are any data pending + * either at the msk or ssk level; skb is the current write + * queue tail and can be empty at this point. + */ + if (snd_una != msk->snd_nxt || skb->len || + skb != tcp_send_head(ssk)) { tcp_remove_empty_skb(ssk); return 0; } @@ -1341,6 +1357,7 @@ alloc_skb: mpext->dsn64); if (zero_window_probe) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_WINPROBE); mptcp_subflow_ctx(ssk)->rel_write_seq += copy; mpext->frozen = 1; if (READ_ONCE(msk->csum_enabled)) @@ -1543,7 +1560,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, mptcp_update_post_push(msk, dfrag, ret); } - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + msk->first_pending = mptcp_send_next(sk); if (msk->snd_burst <= 0 || !sk_stream_memory_free(ssk) || @@ -1903,7 +1920,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) get_page(dfrag->page); list_add_tail(&dfrag->list, &msk->rtx_queue); if (!msk->first_pending) - WRITE_ONCE(msk->first_pending, dfrag); + msk->first_pending = dfrag; } pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk, dfrag->data_seq, dfrag->data_len, dfrag->already_sent, @@ -1936,22 +1953,36 @@ do_error: static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); -static int __mptcp_recvmsg_mskq(struct sock *sk, - struct msghdr *msg, - size_t len, int flags, +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, + size_t len, int flags, int copied_total, struct scm_timestamping_internal *tss, int *cmsg_flags) { struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; + int total_data_len = 0; int copied = 0; skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { - u32 offset = MPTCP_SKB_CB(skb)->offset; + u32 delta, offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; - u32 count = min_t(size_t, len - copied, data_len); + u32 count; int err; + if (flags & MSG_PEEK) { + /* skip already peeked skbs */ + if (total_data_len + data_len <= copied_total) { + total_data_len += data_len; + continue; + } + + /* skip the already peeked data in the current skb */ + delta = copied_total - total_data_len; + offset += delta; + data_len -= delta; + } + + count = min_t(size_t, len - copied, data_len); if (!(flags & MSG_TRUNC)) { err = skb_copy_datagram_msg(skb, offset, msg, count); if (unlikely(err < 0)) { @@ -1968,16 +1999,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, copied += count; - if (count < data_len) { - if (!(flags & MSG_PEEK)) { + if (!(flags & MSG_PEEK)) { + msk->bytes_consumed += count; + if (count < data_len) { MPTCP_SKB_CB(skb)->offset += count; MPTCP_SKB_CB(skb)->map_seq += count; - msk->bytes_consumed += count; + break; } - break; - } - if (!(flags & MSG_PEEK)) { /* avoid the indirect call, we know the destructor is sock_rfree */ skb->destructor = NULL; skb->sk = NULL; @@ -1985,7 +2014,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, sk_mem_uncharge(sk, skb->truesize); __skb_unlink(skb, &sk->sk_receive_queue); skb_attempt_defer_free(skb); - msk->bytes_consumed += count; } if (copied >= len) @@ -2049,9 +2077,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (msk->rcvq_space.copied <= msk->rcvq_space.space) goto new_measure; - msk->rcvq_space.space = msk->rcvq_space.copied; - if (mptcp_rcvbuf_grow(sk)) { - + if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) { /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to * the mptcp rx queue fast enough (announced rcv_win can @@ -2063,8 +2089,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); - tcp_sk(ssk)->rcvq_space.space = msk->rcvq_space.copied; - tcp_rcvbuf_grow(ssk); + /* subflows can be added before tcp_init_transfer() */ + if (tcp_sk(ssk)->rcvq_space.space) + tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied); unlock_sock_fast(ssk, slow); } } @@ -2183,7 +2210,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int err, bytes_read; - bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); + bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, + copied, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@ -2874,7 +2902,7 @@ static void __mptcp_clear_xmit(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - WRITE_ONCE(msk->first_pending, NULL); + msk->first_pending = NULL; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) dfrag_clear(sk, dfrag); } @@ -3414,9 +3442,6 @@ void __mptcp_data_acked(struct sock *sk) void __mptcp_check_push(struct sock *sk, struct sock *ssk) { - if (!mptcp_send_head(sk)) - return; - if (!sock_owned_by_user(sk)) __mptcp_subflow_push_pending(sk, ssk, false); else diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 52f9cfa4ce95..379a88e14e8d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -414,7 +414,7 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); - return READ_ONCE(msk->first_pending); + return msk->first_pending; } static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e8325890a322..af707ce0f624 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -2144,6 +2144,10 @@ void __init mptcp_subflow_init(void) tcp_prot_override = tcp_prot; tcp_prot_override.release_cb = tcp_release_cb_override; tcp_prot_override.diag_destroy = tcp_abort_override; +#ifdef CONFIG_BPF_SYSCALL + /* Disable sockmap processing for subflows */ + tcp_prot_override.psock_update_sk_prot = NULL; +#endif #if IS_ENABLED(CONFIG_MPTCP_IPV6) /* In struct mptcp_subflow_request_sock, we assume the TCP request sock @@ -2180,6 +2184,10 @@ void __init mptcp_subflow_init(void) tcpv6_prot_override = tcpv6_prot; tcpv6_prot_override.release_cb = tcp_release_cb_override; tcpv6_prot_override.diag_destroy = tcp_abort_override; +#ifdef CONFIG_BPF_SYSCALL + /* Disable sockmap processing for subflows */ + tcpv6_prot_override.psock_update_sk_prot = NULL; +#endif #endif mptcp_diag_subflow_init(&subflow_ulp_ops); diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 92b984fa8175..fc35a11cdca2 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -48,7 +48,7 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - count = priv->list->count; + count = READ_ONCE(priv->list->count); if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d526e69a2a2b..6f2ae7cad731 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -22,6 +22,7 @@ #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_seqadj.h> struct nft_ct_helper_obj { struct nf_conntrack_helper *helper4; @@ -379,6 +380,14 @@ static bool nft_ct_tmpl_alloc_pcpu(void) } #endif +static void __nft_ct_get_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + if (priv->key == NFT_CT_LABELS) + nf_connlabels_put(ctx->net); +#endif +} + static int nft_ct_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -413,6 +422,10 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_DIRECTION] != NULL) return -EINVAL; len = NF_CT_LABELS_MAX_SIZE; + + err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); + if (err) + return err; break; #endif case NFT_CT_HELPER: @@ -494,7 +507,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case IP_CT_DIR_REPLY: break; default: - return -EINVAL; + err = -EINVAL; + goto err; } } @@ -502,11 +516,11 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); if (err < 0) - return err; + goto err; err = nf_ct_netns_get(ctx->net, ctx->family); if (err < 0) - return err; + goto err; if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS || @@ -514,6 +528,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, nf_ct_set_acct(ctx->net, true); return 0; +err: + __nft_ct_get_destroy(ctx, priv); + return err; } static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) @@ -626,6 +643,9 @@ err1: static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { + struct nft_ct *priv = nft_expr_priv(expr); + + __nft_ct_get_destroy(ctx, priv); nf_ct_netns_put(ctx->net, ctx->family); } @@ -1173,6 +1193,10 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj, if (help) { rcu_assign_pointer(help->helper, to_assign); set_bit(IPS_HELPER_BIT, &ct->status); + + if ((ct->status & IPS_NAT_MASK) && !nfct_seqadj(ct)) + if (!nfct_seqadj_ext_add(ct)) + regs->verdict.code = NF_DROP; } } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 396b576390d0..c2b5bc19e091 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -47,12 +47,10 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(filter, skb); + filter_res = bpf_prog_run_data_pointers(filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(filter, skb); + filter_res = bpf_prog_run_data_pointers(filter, skb); } if (unlikely(!skb->tstamp && skb->tstamp_type)) skb->tstamp_type = SKB_CLOCK_REALTIME; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 3e89927d7116..26ba8c2d20ab 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -195,13 +195,15 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, const struct tcf_connmark_info *ci = to_connmark(a); unsigned char *b = skb_tail_pointer(skb); const struct tcf_connmark_parms *parms; - struct tc_connmark opt = { - .index = ci->tcf_index, - .refcnt = refcount_read(&ci->tcf_refcnt) - ref, - .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, - }; + struct tc_connmark opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + + opt.index = ci->tcf_index; + opt.refcnt = refcount_read(&ci->tcf_refcnt) - ref; + opt.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind; + rcu_read_lock(); parms = rcu_dereference(ci->parms); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 107c6d83dc5c..7c6975632fc2 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -644,13 +644,15 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; - struct tc_ife opt = { - .index = ife->tcf_index, - .refcnt = refcount_read(&ife->tcf_refcnt) - ref, - .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, - }; + struct tc_ife opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + + opt.index = ife->tcf_index, + opt.refcnt = refcount_read(&ife->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, + spin_lock_bh(&ife->tcf_lock); opt.action = ife->tcf_action; p = rcu_dereference_protected(ife->params, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 7fbe42f0e5c2..a32754a2658b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -97,12 +97,10 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(prog->filter, skb); + filter_res = bpf_prog_run_data_pointers(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(prog->filter, skb); + filter_res = bpf_prog_run_data_pointers(prog->filter, skb); } if (unlikely(!skb->tstamp && skb->tstamp_type)) skb->tstamp_type = SKB_CLOCK_REALTIME; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1e058b46d3e1..f56b18c8aebf 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1599,6 +1599,11 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } + if (p->flags & TCQ_F_INGRESS) { + NL_SET_ERR_MSG(extack, + "Cannot add children to ingress/clsact qdisc"); + return -EOPNOTSUPP; + } q = qdisc_leaf(p, clid, extack); if (IS_ERR(q)) return PTR_ERR(q); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1e008a228ebd..7dee9748a56b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -180,9 +180,10 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static void try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, const struct netdev_queue *txq, - int *packets) + int *packets, int budget) { int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; + int cnt = 0; while (bytelimit > 0) { struct sk_buff *nskb = q->dequeue(q); @@ -193,8 +194,10 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, bytelimit -= nskb->len; /* covers GSO len */ skb->next = nskb; skb = nskb; - (*packets)++; /* GSO counts as one pkt */ + if (++cnt >= budget) + break; } + (*packets) += cnt; skb_mark_not_on_list(skb); } @@ -228,7 +231,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, * A requeued skb (via q->gso_skb) can also be a SKB list. */ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, - int *packets) + int *packets, int budget) { const struct netdev_queue *txq = q->dev_queue; struct sk_buff *skb = NULL; @@ -295,7 +298,7 @@ validate: if (skb) { bulk: if (qdisc_may_bulk(q)) - try_bulk_dequeue_skb(q, skb, txq, packets); + try_bulk_dequeue_skb(q, skb, txq, packets, budget); else try_bulk_dequeue_skb_slow(q, skb, packets); } @@ -387,7 +390,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, * >0 - queue is not empty. * */ -static inline bool qdisc_restart(struct Qdisc *q, int *packets) +static inline bool qdisc_restart(struct Qdisc *q, int *packets, int budget) { spinlock_t *root_lock = NULL; struct netdev_queue *txq; @@ -396,7 +399,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets) bool validate; /* Dequeue packet */ - skb = dequeue_skb(q, &validate, packets); + skb = dequeue_skb(q, &validate, packets, budget); if (unlikely(!skb)) return false; @@ -414,7 +417,7 @@ void __qdisc_run(struct Qdisc *q) int quota = READ_ONCE(net_hotdata.dev_tx_weight); int packets; - while (qdisc_restart(q, &packets)) { + while (qdisc_restart(q, &packets, quota)) { quota -= packets; if (quota <= 0) { if (q->flags & TCQ_F_NOLOCK) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 996c2018f0e6..2afb376299fe 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -73,19 +73,26 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, struct nlattr *attr; void *info = NULL; + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) addrcnt++; + rcu_read_unlock(); attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); if (!attr) return -EMSGSIZE; info = nla_data(attr); + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) { memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; + + if (!--addrcnt) + break; } + rcu_read_unlock(); return 0; } @@ -223,14 +230,15 @@ struct sctp_comm_param { bool net_admin; }; -static size_t inet_assoc_attr_size(struct sctp_association *asoc) +static size_t inet_assoc_attr_size(struct sock *sk, + struct sctp_association *asoc) { int addrlen = sizeof(struct sockaddr_storage); int addrcnt = 0; struct sctp_sockaddr_entry *laddr; list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, - list) + list, lockdep_sock_is_held(sk)) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) @@ -256,11 +264,14 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t if (err) return err; - rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); - if (!rep) + lock_sock(sk); + + rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL); + if (!rep) { + release_sock(sk); return -ENOMEM; + } - lock_sock(sk); if (ep != assoc->ep) { err = -EAGAIN; goto out; diff --git a/net/sctp/input.c b/net/sctp/input.c index 7e99894778d4..e119e460ccde 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -190,7 +190,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset_ct(skb); - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb) || skb->len < sizeof(struct sctp_chunkhdr)) goto discard_release; /* Create an SCTP packet structure. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 5c1652181805..f5a7d5a38755 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -169,13 +169,14 @@ next_chunk: chunk->head_skb = chunk->skb; /* skbs with "cover letter" */ - if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) + if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) { + if (WARN_ON(!skb_shinfo(chunk->skb)->frag_list)) { + __SCTP_INC_STATS(dev_net(chunk->skb->dev), + SCTP_MIB_IN_PKT_DISCARDS); + sctp_chunk_free(chunk); + goto next_chunk; + } chunk->skb = skb_shinfo(chunk->skb)->frag_list; - - if (WARN_ON(!chunk->skb)) { - __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); - sctp_chunk_free(chunk); - goto next_chunk; } } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4d258a6e8033..0c56d9673cc1 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -37,10 +37,10 @@ /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ -static struct sctp_transport *sctp_transport_init(struct net *net, - struct sctp_transport *peer, - const union sctp_addr *addr, - gfp_t gfp) +static void sctp_transport_init(struct net *net, + struct sctp_transport *peer, + const union sctp_addr *addr, + gfp_t gfp) { /* Copy in the address. */ peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); @@ -83,8 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct net *net, get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); refcount_set(&peer->refcnt, 1); - - return peer; } /* Allocate and initialize a new transport. */ @@ -96,20 +94,13 @@ struct sctp_transport *sctp_transport_new(struct net *net, transport = kzalloc(sizeof(*transport), gfp); if (!transport) - goto fail; + return NULL; - if (!sctp_transport_init(net, transport, addr, gfp)) - goto fail_init; + sctp_transport_init(net, transport, addr, gfp); SCTP_DBG_OBJCNT_INC(transport); return transport; - -fail_init: - kfree(transport); - -fail: - return NULL; } /* This transport is no longer needed. Free up if possible, or @@ -495,6 +486,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) if (tp->rttvar || tp->srtt) { struct net *net = tp->asoc->base.net; + unsigned int rto_beta, rto_alpha; /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' @@ -506,10 +498,14 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ - tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) - + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); - tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) - + (rtt >> net->sctp.rto_alpha); + rto_beta = READ_ONCE(net->sctp.rto_beta); + if (rto_beta < 32) + tp->rttvar = tp->rttvar - (tp->rttvar >> rto_beta) + + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> rto_beta); + rto_alpha = READ_ONCE(net->sctp.rto_alpha); + if (rto_alpha < 32) + tp->srtt = tp->srtt - (tp->srtt >> rto_alpha) + + (rtt >> rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 157aace169d4..87c87edadde7 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -890,6 +890,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) return SMC_CLC_DECL_CNFERR; } pclc_base->hdr.typev1 = SMC_TYPE_N; + ini->smc_type_v1 = SMC_TYPE_N; } else { pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); plen += sizeof(*pclc_prfx) + diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c index a944e7dcb8b9..a94084b4a498 100644 --- a/net/smc/smc_inet.c +++ b/net/smc/smc_inet.c @@ -56,7 +56,6 @@ static struct inet_protosw smc_inet_protosw = { .protocol = IPPROTO_SMC, .prot = &smc_inet_prot, .ops = &smc_inet_stream_ops, - .flags = INET_PROTOSW_ICSK, }; #if IS_ENABLED(CONFIG_IPV6) @@ -104,27 +103,15 @@ static struct inet_protosw smc_inet6_protosw = { .protocol = IPPROTO_SMC, .prot = &smc_inet6_prot, .ops = &smc_inet6_stream_ops, - .flags = INET_PROTOSW_ICSK, }; #endif /* CONFIG_IPV6 */ -static unsigned int smc_sync_mss(struct sock *sk, u32 pmtu) -{ - /* No need pass it through to clcsock, mss can always be set by - * sock_create_kern or smc_setsockopt. - */ - return 0; -} - static int smc_inet_init_sock(struct sock *sk) { struct net *net = sock_net(sk); /* init common smc sock */ smc_sk_init(net, sk, IPPROTO_SMC); - - inet_csk(sk)->icsk_sync_mss = smc_sync_mss; - /* create clcsock */ return smc_create_clcsk(net, sk, sk->sk_family); } diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 43b1f558b33d..e659fea2da70 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -238,7 +238,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, strp_parser_err(strp, -EMSGSIZE, desc); break; } else if (len <= (ssize_t)head->len - - skb->len - stm->strp.offset) { + (ssize_t)skb->len - stm->strp.offset) { /* Length must be into new skb (and also * greater than zero) */ diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 984e0cf9bf8a..a570e7adf270 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,10 +18,9 @@ config SUNRPC_SWAP config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism" - depends on SUNRPC + depends on SUNRPC && CRYPTO default y select SUNRPC_GSS - select CRYPTO select CRYPTO_SKCIPHER select CRYPTO_HASH help diff --git a/net/tipc/net.c b/net/tipc/net.c index 0e95572e56b4..7e65d0b0c4a8 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -145,7 +145,9 @@ void tipc_net_finalize_work(struct work_struct *work) { struct tipc_net *tn = container_of(work, struct tipc_net, work); + rtnl_lock(); tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); + rtnl_unlock(); } void tipc_net_stop(struct net *net) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a64ae15b1a60..71734411ff4c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -723,8 +723,10 @@ tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, /* shouldn't get to wraparound: * too long in async stage, something bad happened */ - if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) + if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) { + tls_offload_rx_resync_async_request_cancel(resync_async); return false; + } /* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 684ab03137b6..65396a4e1b07 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -145,6 +145,7 @@ enum unix_vertex_index { }; static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1; +static unsigned long unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) { @@ -153,6 +154,7 @@ static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) if (!vertex) { vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry); vertex->index = unix_vertex_unvisited_index; + vertex->scc_index = ++unix_vertex_max_scc_index; vertex->out_degree = 0; INIT_LIST_HEAD(&vertex->edges); INIT_LIST_HEAD(&vertex->scc_entry); @@ -489,10 +491,15 @@ prev_vertex: scc_dead = unix_vertex_dead(v); } - if (scc_dead) + if (scc_dead) { unix_collect_skb(&scc, hitlist); - else if (!unix_graph_maybe_cyclic) - unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + } else { + if (unix_vertex_max_scc_index < vertex->scc_index) + unix_vertex_max_scc_index = vertex->scc_index; + + if (!unix_graph_maybe_cyclic) + unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + } list_del(&scc); } @@ -507,6 +514,7 @@ static void unix_walk_scc(struct sk_buff_head *hitlist) unsigned long last_index = UNIX_VERTEX_INDEX_START; unix_graph_maybe_cyclic = false; + unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; /* Visit every vertex exactly once. * __unix_walk_scc() moves visited vertices to unix_visited_vertices. diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 4c2db6cca557..76763247a377 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -487,12 +487,26 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) goto err; } - if (vsk->transport) { - if (vsk->transport == new_transport) { - ret = 0; - goto err; - } + if (vsk->transport && vsk->transport == new_transport) { + ret = 0; + goto err; + } + /* We increase the module refcnt to prevent the transport unloading + * while there are open sockets assigned to it. + */ + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); + + if (vsk->transport) { /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we * have already held the sock lock. In the other cases, this @@ -512,20 +526,6 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->peer_shutdown = 0; } - /* We increase the module refcnt to prevent the transport unloading - * while there are open sockets assigned to it. - */ - if (!new_transport || !try_module_get(new_transport->module)) { - ret = -ENODEV; - goto err; - } - - /* It's safe to release the mutex after a successful try_module_get(). - * Whichever transport `new_transport` points at, it won't go away until - * the last module_put() below or in vsock_deassign_transport(). - */ - mutex_unlock(&vsock_register_mutex); - if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || !new_transport->seqpacket_allow(remote_cid)) { diff --git a/net/wireless/core.c b/net/wireless/core.c index 797f9f2004a6..54a34d8d356e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1787,6 +1787,62 @@ bool wiphy_delayed_work_pending(struct wiphy *wiphy, } EXPORT_SYMBOL_GPL(wiphy_delayed_work_pending); +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t) +{ + struct wiphy_hrtimer_work *hrwork = + container_of(t, struct wiphy_hrtimer_work, timer); + + wiphy_work_queue(hrwork->wiphy, &hrwork->work); + + return HRTIMER_NORESTART; +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_timer); + +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay) +{ + trace_wiphy_hrtimer_work_queue(wiphy, &hrwork->work, delay); + + if (!delay) { + hrtimer_cancel(&hrwork->timer); + wiphy_work_queue(wiphy, &hrwork->work); + return; + } + + hrwork->wiphy = wiphy; + hrtimer_start_range_ns(&hrwork->timer, delay, + 1000 * NSEC_PER_USEC, HRTIMER_MODE_REL); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_queue); + +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_cancel(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_cancel); + +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_flush(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_flush); + +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + return hrtimer_is_queued(&hrwork->timer); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_pending); + static int __init cfg80211_init(void) { int err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 346dfd2bd987..03d07b54359a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4136,8 +4136,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.txq_quantum = old_txq_quantum; } - if (old_rts_threshold) - kfree(old_radio_rts_threshold); + kfree(old_radio_rts_threshold); return result; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8a4c34112eb5..2b71f1d867a0 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -304,6 +304,27 @@ TRACE_EVENT(wiphy_delayed_work_queue, __entry->delay) ); +TRACE_EVENT(wiphy_hrtimer_work_queue, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work, + ktime_t delay), + TP_ARGS(wiphy, work, delay), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(void *, instance) + __field(void *, func) + __field(ktime_t, delay) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->instance = work; + __entry->func = work->func; + __entry->delay = delay; + ), + TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%llu", + WIPHY_PR_ARG, __entry->instance, __entry->func, + __entry->delay) +); + TRACE_EVENT(wiphy_work_worker_start, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy), diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index fc7a603b04f1..bf744ac9d5a7 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -555,14 +555,10 @@ static void espintcp_close(struct sock *sk, long timeout) static __poll_t espintcp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct espintcp_ctx *ctx = espintcp_getctx(sk); - if (!skb_queue_empty(&ctx->ike_queue)) - mask |= EPOLLIN | EPOLLRDNORM; - - return mask; + return datagram_poll_queue(file, sock, wait, &ctx->ike_queue); } static void build_protos(struct proto *espintcp_prot, diff --git a/rust/Makefile b/rust/Makefile index 23c7ae905bd2..7842ad0a4ea7 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -69,6 +69,9 @@ core-edition := $(if $(call rustc-min-version,108700),2024,2021) # the time being (https://github.com/rust-lang/rust/issues/144521). rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18) +# Similarly, for doctests (https://github.com/rust-lang/rust/issues/146465). +doctests_modifiers_workaround := $(rustdoc_modifiers_workaround)$(if $(call rustc-min-version,109100),$(comma)sanitizer) + # `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only # since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust # 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both @@ -127,9 +130,14 @@ rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs rustdoc-clean FORCE +$(call if_changed,rustdoc) +# Even if `rustdoc` targets are not kernel objects, they should still be +# treated as such so that we pass the same flags. Otherwise, for instance, +# `rustdoc` will complain about missing sanitizer flags causing an ABI mismatch. +rustdoc-compiler_builtins: private is-kernel-object := y rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE +$(call if_changed,rustdoc) +rustdoc-ffi: private is-kernel-object := y rustdoc-ffi: $(src)/ffi.rs rustdoc-core FORCE +$(call if_changed,rustdoc) @@ -147,6 +155,7 @@ rustdoc-pin_init: $(src)/pin-init/src/lib.rs rustdoc-pin_init_internal \ rustdoc-macros FORCE +$(call if_changed,rustdoc) +rustdoc-kernel: private is-kernel-object := y rustdoc-kernel: private rustc_target_flags = --extern ffi --extern pin_init \ --extern build_error --extern macros \ --extern bindings --extern uapi @@ -230,7 +239,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< --extern bindings --extern uapi \ --no-run --crate-name kernel -Zunstable-options \ --sysroot=/dev/null \ - $(rustdoc_modifiers_workaround) \ + $(doctests_modifiers_workaround) \ --test-builder $(objtree)/scripts/rustdoc_test_builder \ $< $(rustdoc_test_kernel_quiet); \ $(objtree)/scripts/rustdoc_test_gen @@ -289,7 +298,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ -fstrict-flex-arrays=% -fmin-function-alignment=% \ -fzero-init-padding-bits=% -mno-fdpic \ - --param=% --param asan-% + --param=% --param asan-% -fno-isolate-erroneous-paths-dereference # Derived from `scripts/Makefile.clang`. BINDGEN_TARGET_x86 := x86_64-linux-gnu @@ -522,6 +531,10 @@ $(obj)/pin_init.o: $(src)/pin-init/src/lib.rs $(obj)/compiler_builtins.o \ $(obj)/$(libpin_init_internal_name) $(obj)/$(libmacros_name) FORCE +$(call if_changed_rule,rustc_library) +# Even if normally `build_error` is not a kernel object, it should still be +# treated as such so that we pass the same flags. Otherwise, for instance, +# `rustc` will complain about missing sanitizer flags causing an ABI mismatch. +$(obj)/build_error.o: private is-kernel-object := y $(obj)/build_error.o: private skip_gendwarfksyms = 1 $(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index e94aebd084c8..ac8d6f763ae8 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -9,7 +9,7 @@ use super::{ }; use crate::{ fmt, - page::AsPageIter, + page::AsPageIter, // }; use core::{ borrow::{Borrow, BorrowMut}, diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs index e11848bbf206..7a3b0b9c418e 100644 --- a/rust/kernel/auxiliary.rs +++ b/rust/kernel/auxiliary.rs @@ -217,13 +217,7 @@ impl<Ctx: device::DeviceContext> Device<Ctx> { /// Returns a reference to the parent [`device::Device`], if any. pub fn parent(&self) -> Option<&device::Device> { - let ptr: *const Self = self; - // CAST: `Device<Ctx: DeviceContext>` types are transparent to each other. - let ptr: *const Device = ptr.cast(); - // SAFETY: `ptr` was derived from `&self`. - let this = unsafe { &*ptr }; - - this.as_ref().parent() + self.as_ref().parent() } } diff --git a/rust/kernel/bitmap.rs b/rust/kernel/bitmap.rs index 711b8368b38f..aa8fc7bf06fc 100644 --- a/rust/kernel/bitmap.rs +++ b/rust/kernel/bitmap.rs @@ -167,7 +167,9 @@ impl core::ops::Deref for BitmapVec { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] - unsafe { core::ptr::addr_of!(self.repr.bitmap) } + unsafe { + core::ptr::addr_of!(self.repr.bitmap) + } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. unsafe { self.repr.ptr.as_ptr() } @@ -184,7 +186,9 @@ impl core::ops::DerefMut for BitmapVec { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] - unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) } + unsafe { + core::ptr::addr_of_mut!(self.repr.bitmap) + } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. unsafe { self.repr.ptr.as_ptr() } diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 21b5b9b8acc1..1a555fcb120a 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -38,8 +38,7 @@ use macros::vtable; const CPUFREQ_NAME_LEN: usize = bindings::CPUFREQ_NAME_LEN as usize; /// Default transition latency value in nanoseconds. -pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = - bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; +pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; /// CPU frequency driver flags. pub mod flags { diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 1321e6f0b53c..a849b7dde2fd 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -251,7 +251,7 @@ impl<Ctx: DeviceContext> Device<Ctx> { /// Returns a reference to the parent device, if any. #[cfg_attr(not(CONFIG_AUXILIARY_BUS), expect(dead_code))] - pub(crate) fn parent(&self) -> Option<&Self> { + pub(crate) fn parent(&self) -> Option<&Device> { // SAFETY: // - By the type invariant `self.as_raw()` is always valid. // - The parent device is only ever set at device creation. @@ -264,7 +264,7 @@ impl<Ctx: DeviceContext> Device<Ctx> { // - Since `parent` is not NULL, it must be a valid pointer to a `struct device`. // - `parent` is valid for the lifetime of `self`, since a `struct device` holds a // reference count of its parent. - Some(unsafe { Self::from_raw(parent) }) + Some(unsafe { Device::from_raw(parent) }) } } diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 10a6a1789854..2392c281459e 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -103,7 +103,7 @@ struct Inner<T: Send> { /// /// # Invariants /// -/// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only. +/// `Self::inner` is guaranteed to be initialized and is always accessed read-only. #[pin_data(PinnedDrop)] pub struct Devres<T: Send> { dev: ARef<Device>, diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs index c6ec64295c9f..aa5b9a7a726d 100644 --- a/rust/kernel/sync/condvar.rs +++ b/rust/kernel/sync/condvar.rs @@ -36,7 +36,7 @@ pub use new_condvar; /// spuriously. /// /// Instances of [`CondVar`] need a lock class and to be pinned. The recommended way to create such -/// instances is with the [`pin_init`](crate::pin_init!) and [`new_condvar`] macros. +/// instances is with the [`pin_init`](pin_init::pin_init!) and [`new_condvar`] macros. /// /// # Examples /// diff --git a/scripts/Makefile.build b/scripts/Makefile.build index d0ee33a487be..52c08c4eb0b9 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -167,7 +167,7 @@ else ifeq ($(KBUILD_CHECKSRC),2) endif ifneq ($(KBUILD_EXTRA_WARN),) - cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(KDOCFLAGS) \ + cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(KDOCFLAGS) \ $(if $(findstring 2, $(KBUILD_EXTRA_WARN)), -Wall) \ $< endif diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index ced4379550d7..cd788cac9d91 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -102,11 +102,24 @@ vmlinux: vmlinux.unstripped FORCE # modules.builtin.modinfo # --------------------------------------------------------------------------- +# .modinfo in vmlinux.unstripped is aligned to 8 bytes for compatibility with +# tools that expect vmlinux to have sufficiently aligned sections but the +# additional bytes used for padding .modinfo to satisfy this requirement break +# certain versions of kmod with +# +# depmod: ERROR: kmod_builtin_iter_next: unexpected string without modname prefix +# +# Strip the trailing padding bytes after extracting .modinfo to comply with +# what kmod expects to parse. +quiet_cmd_modules_builtin_modinfo = GEN $@ + cmd_modules_builtin_modinfo = $(cmd_objcopy); \ + sed -i 's/\x00\+$$/\x00/g' $@ + OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary targets += modules.builtin.modinfo modules.builtin.modinfo: vmlinux.unstripped FORCE - $(call if_changed,objcopy) + $(call if_changed,modules_builtin_modinfo) # modules.builtin # --------------------------------------------------------------------------- diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index c73cb802a0a3..8d01b741de62 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -277,12 +277,6 @@ handle_line() { fi done - if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then - words[$last-1]="${words[$last-1]} ${words[$last]}" - unset words[$last] spaces[$last] - last=$(( $last - 1 )) - fi - # Extract info after the symbol if present. E.g.: # func_name+0x54/0x80 (P) # ^^^ @@ -295,6 +289,14 @@ handle_line() { last=$(( $last - 1 )) fi + # Join module name with its build id if present, as these were + # split during tokenization (e.g. "[module" and "modbuildid]"). + if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then + words[$last-1]="${words[$last-1]} ${words[$last]}" + unset words[$last] spaces[$last] + last=$(( $last - 1 )) + fi + if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then module=${words[$last]} # some traces format is "(%pS)", which like "(foo+0x0/0x1 [bar])" diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c index 08ae61eb327e..f5203d1640ee 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.c +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -138,7 +138,8 @@ int main(int argc, char **argv) error("no input files?"); } - symbol_read_exports(stdin); + if (!symbol_read_exports(stdin)) + return 0; if (symtypes_file) { symfile = fopen(symtypes_file, "w"); diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index d9c06d2cb1df..32cec8f7695a 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -123,7 +123,7 @@ struct symbol { typedef void (*symbol_callback_t)(struct symbol *, void *arg); bool is_symbol_ptr(const char *name); -void symbol_read_exports(FILE *file); +int symbol_read_exports(FILE *file); void symbol_read_symtab(int fd); struct symbol *symbol_get(const char *name); void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr); diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c index 35ed594f0749..ecddcb5ffcdf 100644 --- a/scripts/gendwarfksyms/symbols.c +++ b/scripts/gendwarfksyms/symbols.c @@ -128,7 +128,7 @@ static bool is_exported(const char *name) return for_each(name, NULL, NULL) > 0; } -void symbol_read_exports(FILE *file) +int symbol_read_exports(FILE *file) { struct symbol *sym; char *line = NULL; @@ -159,6 +159,8 @@ void symbol_read_exports(FILE *file) free(line); debug("%d exported symbols", nsym); + + return nsym; } static void get_symbol(struct symbol *sym, void *arg) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 84ea9215c0a7..b8b7bba84a65 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -12,6 +12,7 @@ #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <locale.h> #include <stdarg.h> #include <stdlib.h> #include <string.h> @@ -931,6 +932,8 @@ int main(int ac, char **av) signal(SIGINT, sig_handler); + setlocale(LC_ALL, ""); + if (ac > 1 && strcmp(av[1], "-s") == 0) { silent = 1; /* Silence conf_read() until the real callback is set up */ diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index ae1fe5f60327..521700ed7152 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -7,6 +7,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include <locale.h> #include <string.h> #include <strings.h> #include <stdlib.h> @@ -1478,6 +1479,8 @@ int main(int ac, char **av) int lines, columns; char *mode; + setlocale(LC_ALL, ""); + if (ac > 1 && strcmp(av[1], "-s") == 0) { /* Silence conf_read() until the real callback is set up */ conf_set_message_callback(NULL); diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index b96538787f3d..054fdf45cc37 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-base=. "${destdir}")"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-to=. "${destdir}")"/scripts rm -f "${destdir}/scripts/Kbuild" fi diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index 775db3fc4959..ec10270c2cce 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -32,7 +32,7 @@ * allows 5 times as large as IEC 61883-6 defines. * @CIP_HEADER_WITHOUT_EOH: Only for in-stream. CIP Header doesn't include * valid EOH. - * @CIP_NO_HEADERS: a lack of headers in packets + * @CIP_NO_HEADER: a lack of headers in packets * @CIP_UNALIGHED_DBC: Only for in-stream. The value of dbc is not alighed to * the value of current SYT_INTERVAL; e.g. initial value is not zero. * @CIP_UNAWARE_SYT: For outgoing packet, the value in SYT field of CIP is 0xffff. diff --git a/sound/hda/codecs/hdmi/nvhdmi-mcp.c b/sound/hda/codecs/hdmi/nvhdmi-mcp.c index 8fd8d76fa72f..1c5fdfe872f2 100644 --- a/sound/hda/codecs/hdmi/nvhdmi-mcp.c +++ b/sound/hda/codecs/hdmi/nvhdmi-mcp.c @@ -350,8 +350,8 @@ static int nvhdmi_mcp_probe(struct hda_codec *codec, static const struct hda_codec_ops nvhdmi_mcp_codec_ops = { .probe = nvhdmi_mcp_probe, .remove = snd_hda_hdmi_simple_remove, - .build_controls = nvhdmi_mcp_build_pcms, - .build_pcms = nvhdmi_mcp_build_controls, + .build_pcms = nvhdmi_mcp_build_pcms, + .build_controls = nvhdmi_mcp_build_controls, .init = nvhdmi_mcp_init, .unsol_event = snd_hda_hdmi_simple_unsol_event, }; diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 214eb9df6ef8..269b6c1e3b6d 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3736,6 +3736,7 @@ enum { ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1, ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC, ALC289_FIXUP_ASUS_ZEPHYRUS_DUAL_SPK, + ALC256_FIXUP_VAIO_RPL_MIC_NO_PRESENCE, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -6172,6 +6173,16 @@ static const struct hda_fixup alc269_fixups[] = { { 0x1e, 0x90170150 }, /* Internal Speaker */ { } }, + }, + [ALC256_FIXUP_VAIO_RPL_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a1113c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x22a190a0 }, /* dock mic */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST } }; @@ -6397,6 +6408,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x854a, "HP EliteBook 830 G6", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), + SND_PCI_QUIRK(0x103c, 0x8603, "HP Omen 17-cb0xxx", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x860c, "HP ZBook 17 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), @@ -6576,6 +6589,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c16, "HP Spectre x360 2-in-1 Laptop 16-aa0xxx", ALC245_FIXUP_HP_SPECTRE_X360_16_AA0XXX), SND_PCI_QUIRK(0x103c, 0x8c17, "HP Spectre 16", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8c21, "HP Pavilion Plus Laptop 14-ey0XXX", ALC245_FIXUP_HP_X360_MUTE_LEDS), + SND_PCI_QUIRK(0x103c, 0x8c2d, "HP Victus 15-fa1xxx (MB 8C2D)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8c30, "HP Victus 15-fb1xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8c46, "HP EliteBook 830 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c47, "HP EliteBook 840 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -6680,6 +6694,15 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8ed5, "HP Merino13X", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8ed6, "HP Merino13", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8ed7, "HP Merino14", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8ed8, "HP Merino16", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8ed9, "HP Merino14W", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8eda, "HP Merino16W", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8f40, "HP Lampas14", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x103c, 0x8f41, "HP Lampas16", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x103c, 0x8f42, "HP LampasW14", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x1043, 0x1032, "ASUS VivoBook X513EA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1034, "ASUS GU605C", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), @@ -6957,6 +6980,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL5[03]RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa554, "VAIO VJFH52", ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa559, "VAIO RPL", ALC256_FIXUP_VAIO_RPL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -7078,6 +7102,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), + HDA_CODEC_QUIRK(0x17aa, 0x391c, "Lenovo Yoga 7 2-in-1 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b7, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), diff --git a/sound/hda/codecs/side-codecs/cs35l41_hda.c b/sound/hda/codecs/side-codecs/cs35l41_hda.c index c04208e685a0..c0f2a3ff77a1 100644 --- a/sound/hda/codecs/side-codecs/cs35l41_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l41_hda.c @@ -1410,6 +1410,8 @@ static int cs35l41_get_acpi_mute_state(struct cs35l41_hda *cs35l41, acpi_handle if (cs35l41_dsm_supported(handle, CS35L41_DSM_GET_MUTE)) { ret = acpi_evaluate_dsm(handle, &guid, 0, CS35L41_DSM_GET_MUTE, NULL); + if (!ret) + return -EINVAL; mute = *ret->buffer.pointer; dev_dbg(cs35l41->dev, "CS35L41_DSM_GET_MUTE: %d\n", mute); } diff --git a/sound/hda/codecs/side-codecs/hda_component.c b/sound/hda/codecs/side-codecs/hda_component.c index bcf47a301697..8a2a200600a7 100644 --- a/sound/hda/codecs/side-codecs/hda_component.c +++ b/sound/hda/codecs/side-codecs/hda_component.c @@ -174,6 +174,10 @@ int hda_component_manager_init(struct hda_codec *cdc, sm->match_str = match_str; sm->index = i; component_match_add(dev, &match, hda_comp_match_dev_name, sm); + if (IS_ERR(match)) { + codec_err(cdc, "Fail to add component %ld\n", PTR_ERR(match)); + return PTR_ERR(match); + } } ret = component_master_add_with_match(dev, ops, match); diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index a126f04c3ed7..0357401a6023 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -669,6 +669,7 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) */ device_name = "TXNW5825"; hda_priv->hda_chip_id = HDA_TAS5825; + tas_hda->priv->chip_id = TAS5825; } else { return -ENODEV; } diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 48c52a207024..a19258c95886 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -2075,6 +2075,7 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ + { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1462, 0xee59) }, /* MSI X870E Tomahawk WiFi */ {} }; diff --git a/sound/soc/amd/acp/acp-sdw-sof-mach.c b/sound/soc/amd/acp/acp-sdw-sof-mach.c index 91d72d4bb9a2..d055582a3bf1 100644 --- a/sound/soc/amd/acp/acp-sdw-sof-mach.c +++ b/sound/soc/amd/acp/acp-sdw-sof-mach.c @@ -176,9 +176,9 @@ static int create_sdw_dailink(struct snd_soc_card *card, cpus->dai_name = devm_kasprintf(dev, GFP_KERNEL, "SDW%d Pin%d", link_num, cpu_pin_id); - dev_dbg(dev, "cpu->dai_name:%s\n", cpus->dai_name); if (!cpus->dai_name) return -ENOMEM; + dev_dbg(dev, "cpu->dai_name:%s\n", cpus->dai_name); codec_maps[j].cpu = 0; codec_maps[j].codec = j; diff --git a/sound/soc/amd/acp/amd-acp70-acpi-match.c b/sound/soc/amd/acp/amd-acp70-acpi-match.c index dcecac792e6d..871b4f054a84 100644 --- a/sound/soc/amd/acp/amd-acp70-acpi-match.c +++ b/sound/soc/amd/acp/amd-acp70-acpi-match.c @@ -30,6 +30,20 @@ static const struct snd_soc_acpi_endpoint spk_r_endpoint = { .group_id = 1 }; +static const struct snd_soc_acpi_endpoint spk_2_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 2, + .group_id = 1 +}; + +static const struct snd_soc_acpi_endpoint spk_3_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 3, + .group_id = 1 +}; + static const struct snd_soc_acpi_adr_device rt711_rt1316_group_adr[] = { { .adr = 0x000030025D071101ull, @@ -112,6 +126,134 @@ static const struct snd_soc_acpi_adr_device rt1320_1_single_adr[] = { } }; +static const struct snd_soc_acpi_endpoint cs42l43_endpoints[] = { + { /* Jack Playback Endpoint */ + .num = 0, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* DMIC Capture Endpoint */ + .num = 1, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Jack Capture Endpoint */ + .num = 2, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Speaker Playback Endpoint */ + .num = 3, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, +}; + +static const struct snd_soc_acpi_adr_device cs42l43_0_adr[] = { + { + .adr = 0x00003001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43_1_cs35l56x4_1_adr[] = { + { + .adr = 0x00013001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l56x4_1_adr[] = { + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_link_adr acp70_cs42l43_l1_cs35l56x4_l1[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l43_1_cs35l56x4_1_adr), + .adr_d = cs42l43_1_cs35l56x4_1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp70_cs42l43_l0_cs35l56x4_l1[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43_0_adr), + .adr_d = cs42l43_0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_1_adr), + .adr_d = cs35l56x4_1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp70_cs35l56x4_l1[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_1_adr), + .adr_d = cs35l56x4_1_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr acp70_rt722_only[] = { { .mask = BIT(0), @@ -151,6 +293,21 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { .links = acp70_4_in_1_sdca, .drv_name = "amd_sdw", }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp70_cs42l43_l0_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp70_cs42l43_l1_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp70_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, {}, }; EXPORT_SYMBOL(snd_soc_acpi_amd_acp70_sdw_machines); diff --git a/sound/soc/codecs/cs-amp-lib-test.c b/sound/soc/codecs/cs-amp-lib-test.c index 2fde84309338..3406887cdfa2 100644 --- a/sound/soc/codecs/cs-amp-lib-test.c +++ b/sound/soc/codecs/cs-amp-lib-test.c @@ -7,6 +7,7 @@ #include <kunit/resource.h> #include <kunit/test.h> +#include <kunit/test-bug.h> #include <kunit/static_stub.h> #include <linux/device/faux.h> #include <linux/firmware/cirrus/cs_dsp.h> diff --git a/sound/soc/codecs/cs4271.c b/sound/soc/codecs/cs4271.c index 6a3cca3d26c7..ead447a5da7f 100644 --- a/sound/soc/codecs/cs4271.c +++ b/sound/soc/codecs/cs4271.c @@ -581,17 +581,17 @@ static int cs4271_component_probe(struct snd_soc_component *component) ret = regcache_sync(cs4271->regmap); if (ret < 0) - return ret; + goto err_disable_regulator; ret = regmap_update_bits(cs4271->regmap, CS4271_MODE2, CS4271_MODE2_PDN | CS4271_MODE2_CPEN, CS4271_MODE2_PDN | CS4271_MODE2_CPEN); if (ret < 0) - return ret; + goto err_disable_regulator; ret = regmap_update_bits(cs4271->regmap, CS4271_MODE2, CS4271_MODE2_PDN, 0); if (ret < 0) - return ret; + goto err_disable_regulator; /* Power-up sequence requires 85 uS */ udelay(85); @@ -601,6 +601,10 @@ static int cs4271_component_probe(struct snd_soc_component *component) CS4271_MODE2_MUTECAEQUB); return 0; + +err_disable_regulator: + regulator_bulk_disable(ARRAY_SIZE(cs4271->supplies), cs4271->supplies); + return ret; } static void cs4271_component_remove(struct snd_soc_component *component) diff --git a/sound/soc/codecs/cs530x.c b/sound/soc/codecs/cs530x.c index b9eff240b929..535387cd7aa3 100644 --- a/sound/soc/codecs/cs530x.c +++ b/sound/soc/codecs/cs530x.c @@ -793,7 +793,7 @@ static int cs530x_set_sysclk(struct snd_soc_component *component, int clk_id, case CS530X_SYSCLK_SRC_PLL: break; default: - dev_err(component->dev, "Invalid clock id %d\n", clk_id); + dev_err(component->dev, "Invalid sysclk source: %d\n", source); return -EINVAL; } diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index ae89260ca215..3420011da444 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -2124,11 +2124,50 @@ static int da7213_probe(struct snd_soc_component *component) return 0; } +static int da7213_runtime_suspend(struct device *dev) +{ + struct da7213_priv *da7213 = dev_get_drvdata(dev); + + regcache_cache_only(da7213->regmap, true); + regcache_mark_dirty(da7213->regmap); + regulator_bulk_disable(DA7213_NUM_SUPPLIES, da7213->supplies); + + return 0; +} + +static int da7213_runtime_resume(struct device *dev) +{ + struct da7213_priv *da7213 = dev_get_drvdata(dev); + int ret; + + ret = regulator_bulk_enable(DA7213_NUM_SUPPLIES, da7213->supplies); + if (ret < 0) + return ret; + regcache_cache_only(da7213->regmap, false); + return regcache_sync(da7213->regmap); +} + +static int da7213_suspend(struct snd_soc_component *component) +{ + struct da7213_priv *da7213 = snd_soc_component_get_drvdata(component); + + return da7213_runtime_suspend(da7213->dev); +} + +static int da7213_resume(struct snd_soc_component *component) +{ + struct da7213_priv *da7213 = snd_soc_component_get_drvdata(component); + + return da7213_runtime_resume(da7213->dev); +} + static const struct snd_soc_component_driver soc_component_dev_da7213 = { .probe = da7213_probe, .set_bias_level = da7213_set_bias_level, .controls = da7213_snd_controls, .num_controls = ARRAY_SIZE(da7213_snd_controls), + .suspend = da7213_suspend, + .resume = da7213_resume, .dapm_widgets = da7213_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(da7213_dapm_widgets), .dapm_routes = da7213_audio_map, @@ -2175,6 +2214,8 @@ static int da7213_i2c_probe(struct i2c_client *i2c) if (!da7213->fin_min_rate) return -EINVAL; + da7213->dev = &i2c->dev; + i2c_set_clientdata(i2c, da7213); /* Get required supplies */ @@ -2224,31 +2265,9 @@ static void da7213_i2c_remove(struct i2c_client *i2c) pm_runtime_disable(&i2c->dev); } -static int da7213_runtime_suspend(struct device *dev) -{ - struct da7213_priv *da7213 = dev_get_drvdata(dev); - - regcache_cache_only(da7213->regmap, true); - regcache_mark_dirty(da7213->regmap); - regulator_bulk_disable(DA7213_NUM_SUPPLIES, da7213->supplies); - - return 0; -} - -static int da7213_runtime_resume(struct device *dev) -{ - struct da7213_priv *da7213 = dev_get_drvdata(dev); - int ret; - - ret = regulator_bulk_enable(DA7213_NUM_SUPPLIES, da7213->supplies); - if (ret < 0) - return ret; - regcache_cache_only(da7213->regmap, false); - return regcache_sync(da7213->regmap); -} - -static DEFINE_RUNTIME_DEV_PM_OPS(da7213_pm, da7213_runtime_suspend, - da7213_runtime_resume, NULL); +static const struct dev_pm_ops da7213_pm = { + RUNTIME_PM_OPS(da7213_runtime_suspend, da7213_runtime_resume, NULL) +}; static const struct i2c_device_id da7213_i2c_id[] = { { "da7213" }, diff --git a/sound/soc/codecs/da7213.h b/sound/soc/codecs/da7213.h index b9ab791d6b88..29cbf0eb6124 100644 --- a/sound/soc/codecs/da7213.h +++ b/sound/soc/codecs/da7213.h @@ -595,6 +595,7 @@ enum da7213_supplies { /* Codec private data */ struct da7213_priv { struct regmap *regmap; + struct device *dev; struct mutex ctrl_lock; struct regulator_bulk_data supplies[DA7213_NUM_SUPPLIES]; struct clk *mclk; diff --git a/sound/soc/codecs/idt821034.c b/sound/soc/codecs/idt821034.c index a03d4e5e7d14..cab2f2eecdfb 100644 --- a/sound/soc/codecs/idt821034.c +++ b/sound/soc/codecs/idt821034.c @@ -548,14 +548,14 @@ end: return ret; } -static const DECLARE_TLV_DB_LINEAR(idt821034_gain_in, -6520, 1306); -#define IDT821034_GAIN_IN_MIN_RAW 1 /* -65.20 dB -> 10^(-65.2/20.0) * 1820 = 1 */ -#define IDT821034_GAIN_IN_MAX_RAW 8191 /* 13.06 dB -> 10^(13.06/20.0) * 1820 = 8191 */ +static const DECLARE_TLV_DB_LINEAR(idt821034_gain_in, -300, 1300); +#define IDT821034_GAIN_IN_MIN_RAW 1288 /* -3.0 dB -> 10^(-3.0/20.0) * 1820 = 1288 */ +#define IDT821034_GAIN_IN_MAX_RAW 8130 /* 13.0 dB -> 10^(13.0/20.0) * 1820 = 8130 */ #define IDT821034_GAIN_IN_INIT_RAW 1820 /* 0dB -> 10^(0/20) * 1820 = 1820 */ -static const DECLARE_TLV_DB_LINEAR(idt821034_gain_out, -6798, 1029); -#define IDT821034_GAIN_OUT_MIN_RAW 1 /* -67.98 dB -> 10^(-67.98/20.0) * 2506 = 1*/ -#define IDT821034_GAIN_OUT_MAX_RAW 8191 /* 10.29 dB -> 10^(10.29/20.0) * 2506 = 8191 */ +static const DECLARE_TLV_DB_LINEAR(idt821034_gain_out, -1300, 300); +#define IDT821034_GAIN_OUT_MIN_RAW 561 /* -13.0 dB -> 10^(-13.0/20.0) * 2506 = 561 */ +#define IDT821034_GAIN_OUT_MAX_RAW 3540 /* 3.0 dB -> 10^(3.0/20.0) * 2506 = 3540 */ #define IDT821034_GAIN_OUT_INIT_RAW 2506 /* 0dB -> 10^(0/20) * 2506 = 2506 */ static const struct snd_kcontrol_new idt821034_controls[] = { diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index 2e1b77973a3e..92c177b82a02 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -1638,7 +1638,7 @@ static int va_macro_probe(struct platform_device *pdev) if (ret) goto err_clkout; - va->fsgen = clk_hw_get_clk(&va->hw, "fsgen"); + va->fsgen = devm_clk_hw_get_clk(dev, &va->hw, "fsgen"); if (IS_ERR(va->fsgen)) { ret = PTR_ERR(va->fsgen); goto err_clkout; diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 22177c1ce160..5aff5a459a43 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -1234,9 +1234,13 @@ static const struct snd_soc_dapm_widget max98091_dapm_widgets[] = { SND_SOC_DAPM_INPUT("DMIC4"), SND_SOC_DAPM_SUPPLY("DMIC3_ENA", M98090_REG_DIGITAL_MIC_ENABLE, - M98090_DIGMIC3_SHIFT, 0, NULL, 0), + M98090_DIGMIC3_SHIFT, 0, max98090_shdn_event, + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("DMIC4_ENA", M98090_REG_DIGITAL_MIC_ENABLE, - M98090_DIGMIC4_SHIFT, 0, NULL, 0), + M98090_DIGMIC4_SHIFT, 0, max98090_shdn_event, + SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_SUPPLY("DMIC34_HPF", M98090_REG_FILTER_CONFIG, + M98090_FLT_DMIC34HPF_SHIFT, 0, NULL, 0), }; static const struct snd_soc_dapm_route max98090_dapm_routes[] = { @@ -1425,8 +1429,8 @@ static const struct snd_soc_dapm_route max98091_dapm_routes[] = { /* DMIC inputs */ {"DMIC3", NULL, "DMIC3_ENA"}, {"DMIC4", NULL, "DMIC4_ENA"}, - {"DMIC3", NULL, "AHPF"}, - {"DMIC4", NULL, "AHPF"}, + {"DMIC3", NULL, "DMIC34_HPF"}, + {"DMIC4", NULL, "DMIC34_HPF"}, }; static int max98090_add_widgets(struct snd_soc_component *component) diff --git a/sound/soc/codecs/nau8821.c b/sound/soc/codecs/nau8821.c index edb95f869a4a..4fa9a785513e 100644 --- a/sound/soc/codecs/nau8821.c +++ b/sound/soc/codecs/nau8821.c @@ -26,7 +26,8 @@ #include <sound/tlv.h> #include "nau8821.h" -#define NAU8821_JD_ACTIVE_HIGH BIT(0) +#define NAU8821_QUIRK_JD_ACTIVE_HIGH BIT(0) +#define NAU8821_QUIRK_JD_DB_BYPASS BIT(1) static int nau8821_quirk; static int quirk_override = -1; @@ -1021,12 +1022,17 @@ static bool nau8821_is_jack_inserted(struct regmap *regmap) return active_high == is_high; } -static void nau8821_int_status_clear_all(struct regmap *regmap) +static void nau8821_irq_status_clear(struct regmap *regmap, int active_irq) { - int active_irq, clear_irq, i; + int clear_irq, i; - /* Reset the intrruption status from rightmost bit if the corres- - * ponding irq event occurs. + if (active_irq) { + regmap_write(regmap, NAU8821_R11_INT_CLR_KEY_STATUS, active_irq); + return; + } + + /* Reset the interruption status from rightmost bit if the + * corresponding irq event occurs. */ regmap_read(regmap, NAU8821_R10_IRQ_STATUS, &active_irq); for (i = 0; i < NAU8821_REG_DATA_LEN; i++) { @@ -1052,20 +1058,24 @@ static void nau8821_eject_jack(struct nau8821 *nau8821) snd_soc_component_disable_pin(component, "MICBIAS"); snd_soc_dapm_sync(dapm); + /* Disable & mask both insertion & ejection IRQs */ + regmap_update_bits(regmap, NAU8821_R12_INTERRUPT_DIS_CTRL, + NAU8821_IRQ_INSERT_DIS | NAU8821_IRQ_EJECT_DIS, + NAU8821_IRQ_INSERT_DIS | NAU8821_IRQ_EJECT_DIS); + regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, + NAU8821_IRQ_INSERT_EN | NAU8821_IRQ_EJECT_EN, + NAU8821_IRQ_INSERT_EN | NAU8821_IRQ_EJECT_EN); + /* Clear all interruption status */ - nau8821_int_status_clear_all(regmap); + nau8821_irq_status_clear(regmap, 0); - /* Enable the insertion interruption, disable the ejection inter- - * ruption, and then bypass de-bounce circuit. - */ + /* Enable & unmask the insertion IRQ */ regmap_update_bits(regmap, NAU8821_R12_INTERRUPT_DIS_CTRL, - NAU8821_IRQ_EJECT_DIS | NAU8821_IRQ_INSERT_DIS, - NAU8821_IRQ_EJECT_DIS); - /* Mask unneeded IRQs: 1 - disable, 0 - enable */ + NAU8821_IRQ_INSERT_DIS, 0); regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, - NAU8821_IRQ_EJECT_EN | NAU8821_IRQ_INSERT_EN, - NAU8821_IRQ_EJECT_EN); + NAU8821_IRQ_INSERT_EN, 0); + /* Bypass de-bounce circuit */ regmap_update_bits(regmap, NAU8821_R0D_JACK_DET_CTRL, NAU8821_JACK_DET_DB_BYPASS, NAU8821_JACK_DET_DB_BYPASS); @@ -1089,22 +1099,17 @@ static void nau8821_eject_jack(struct nau8821 *nau8821) NAU8821_IRQ_KEY_RELEASE_DIS | NAU8821_IRQ_KEY_PRESS_DIS); } - } static void nau8821_jdet_work(struct work_struct *work) { struct nau8821 *nau8821 = - container_of(work, struct nau8821, jdet_work); + container_of(work, struct nau8821, jdet_work.work); struct snd_soc_dapm_context *dapm = nau8821->dapm; struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); struct regmap *regmap = nau8821->regmap; int jack_status_reg, mic_detected, event = 0, event_mask = 0; - snd_soc_component_force_enable_pin(component, "MICBIAS"); - snd_soc_dapm_sync(dapm); - msleep(20); - regmap_read(regmap, NAU8821_R58_I2C_DEVICE_ID, &jack_status_reg); mic_detected = !(jack_status_reg & NAU8821_KEYDET); if (mic_detected) { @@ -1137,6 +1142,7 @@ static void nau8821_jdet_work(struct work_struct *work) snd_soc_component_disable_pin(component, "MICBIAS"); snd_soc_dapm_sync(dapm); } + event_mask |= SND_JACK_HEADSET; snd_soc_jack_report(nau8821->jack, event, event_mask); } @@ -1146,6 +1152,15 @@ static void nau8821_setup_inserted_irq(struct nau8821 *nau8821) { struct regmap *regmap = nau8821->regmap; + /* Disable & mask insertion IRQ */ + regmap_update_bits(regmap, NAU8821_R12_INTERRUPT_DIS_CTRL, + NAU8821_IRQ_INSERT_DIS, NAU8821_IRQ_INSERT_DIS); + regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, + NAU8821_IRQ_INSERT_EN, NAU8821_IRQ_INSERT_EN); + + /* Clear insert IRQ status */ + nau8821_irq_status_clear(regmap, NAU8821_JACK_INSERT_DETECTED); + /* Enable internal VCO needed for interruptions */ if (nau8821->dapm->bias_level < SND_SOC_BIAS_PREPARE) nau8821_configure_sysclk(nau8821, NAU8821_CLK_INTERNAL, 0); @@ -1160,21 +1175,24 @@ static void nau8821_setup_inserted_irq(struct nau8821 *nau8821) regmap_update_bits(regmap, NAU8821_R1D_I2S_PCM_CTRL2, NAU8821_I2S_MS_MASK, NAU8821_I2S_MS_SLAVE); - /* Not bypass de-bounce circuit */ - regmap_update_bits(regmap, NAU8821_R0D_JACK_DET_CTRL, - NAU8821_JACK_DET_DB_BYPASS, 0); + /* Do not bypass de-bounce circuit */ + if (!(nau8821_quirk & NAU8821_QUIRK_JD_DB_BYPASS)) + regmap_update_bits(regmap, NAU8821_R0D_JACK_DET_CTRL, + NAU8821_JACK_DET_DB_BYPASS, 0); + /* Unmask & enable the ejection IRQs */ regmap_update_bits(regmap, NAU8821_R0F_INTERRUPT_MASK, - NAU8821_IRQ_EJECT_EN, 0); + NAU8821_IRQ_EJECT_EN, 0); regmap_update_bits(regmap, NAU8821_R12_INTERRUPT_DIS_CTRL, - NAU8821_IRQ_EJECT_DIS, 0); + NAU8821_IRQ_EJECT_DIS, 0); } static irqreturn_t nau8821_interrupt(int irq, void *data) { struct nau8821 *nau8821 = (struct nau8821 *)data; struct regmap *regmap = nau8821->regmap; - int active_irq, clear_irq = 0, event = 0, event_mask = 0; + struct snd_soc_component *component; + int active_irq, event = 0, event_mask = 0; if (regmap_read(regmap, NAU8821_R10_IRQ_STATUS, &active_irq)) { dev_err(nau8821->dev, "failed to read irq status\n"); @@ -1185,48 +1203,41 @@ static irqreturn_t nau8821_interrupt(int irq, void *data) if ((active_irq & NAU8821_JACK_EJECT_IRQ_MASK) == NAU8821_JACK_EJECT_DETECTED) { + cancel_delayed_work_sync(&nau8821->jdet_work); regmap_update_bits(regmap, NAU8821_R71_ANALOG_ADC_1, NAU8821_MICDET_MASK, NAU8821_MICDET_DIS); nau8821_eject_jack(nau8821); event_mask |= SND_JACK_HEADSET; - clear_irq = NAU8821_JACK_EJECT_IRQ_MASK; } else if (active_irq & NAU8821_KEY_SHORT_PRESS_IRQ) { event |= NAU8821_BUTTON; event_mask |= NAU8821_BUTTON; - clear_irq = NAU8821_KEY_SHORT_PRESS_IRQ; + nau8821_irq_status_clear(regmap, NAU8821_KEY_SHORT_PRESS_IRQ); } else if (active_irq & NAU8821_KEY_RELEASE_IRQ) { event_mask = NAU8821_BUTTON; - clear_irq = NAU8821_KEY_RELEASE_IRQ; + nau8821_irq_status_clear(regmap, NAU8821_KEY_RELEASE_IRQ); } else if ((active_irq & NAU8821_JACK_INSERT_IRQ_MASK) == NAU8821_JACK_INSERT_DETECTED) { + cancel_delayed_work_sync(&nau8821->jdet_work); regmap_update_bits(regmap, NAU8821_R71_ANALOG_ADC_1, NAU8821_MICDET_MASK, NAU8821_MICDET_EN); if (nau8821_is_jack_inserted(regmap)) { - /* detect microphone and jack type */ - cancel_work_sync(&nau8821->jdet_work); - schedule_work(&nau8821->jdet_work); + /* Detect microphone and jack type */ + component = snd_soc_dapm_to_component(nau8821->dapm); + snd_soc_component_force_enable_pin(component, "MICBIAS"); + snd_soc_dapm_sync(nau8821->dapm); + schedule_delayed_work(&nau8821->jdet_work, msecs_to_jiffies(20)); /* Turn off insertion interruption at manual mode */ - regmap_update_bits(regmap, - NAU8821_R12_INTERRUPT_DIS_CTRL, - NAU8821_IRQ_INSERT_DIS, - NAU8821_IRQ_INSERT_DIS); - regmap_update_bits(regmap, - NAU8821_R0F_INTERRUPT_MASK, - NAU8821_IRQ_INSERT_EN, - NAU8821_IRQ_INSERT_EN); nau8821_setup_inserted_irq(nau8821); } else { dev_warn(nau8821->dev, "Inserted IRQ fired but not connected\n"); nau8821_eject_jack(nau8821); } + } else { + /* Clear the rightmost interrupt */ + nau8821_irq_status_clear(regmap, active_irq); } - if (!clear_irq) - clear_irq = active_irq; - /* clears the rightmost interruption */ - regmap_write(regmap, NAU8821_R11_INT_CLR_KEY_STATUS, clear_irq); - if (event_mask) snd_soc_jack_report(nau8821->jack, event, event_mask); @@ -1521,7 +1532,7 @@ static int nau8821_resume_setup(struct nau8821 *nau8821) nau8821_configure_sysclk(nau8821, NAU8821_CLK_DIS, 0); if (nau8821->irq) { /* Clear all interruption status */ - nau8821_int_status_clear_all(regmap); + nau8821_irq_status_clear(regmap, 0); /* Enable both insertion and ejection interruptions, and then * bypass de-bounce circuit. @@ -1651,7 +1662,8 @@ int nau8821_enable_jack_detect(struct snd_soc_component *component, nau8821->jack = jack; /* Initiate jack detection work queue */ - INIT_WORK(&nau8821->jdet_work, nau8821_jdet_work); + INIT_DELAYED_WORK(&nau8821->jdet_work, nau8821_jdet_work); + ret = devm_request_threaded_irq(nau8821->dev, nau8821->irq, NULL, nau8821_interrupt, IRQF_TRIGGER_LOW | IRQF_ONESHOT, "nau8821", nau8821); @@ -1856,7 +1868,23 @@ static const struct dmi_system_id nau8821_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"), DMI_MATCH(DMI_BOARD_NAME, "CW14Q01P-V2"), }, - .driver_data = (void *)(NAU8821_JD_ACTIVE_HIGH), + .driver_data = (void *)(NAU8821_QUIRK_JD_ACTIVE_HIGH), + }, + { + /* Valve Steam Deck LCD */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_MATCH(DMI_PRODUCT_NAME, "Jupiter"), + }, + .driver_data = (void *)(NAU8821_QUIRK_JD_DB_BYPASS), + }, + { + /* Valve Steam Deck OLED */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_MATCH(DMI_PRODUCT_NAME, "Galileo"), + }, + .driver_data = (void *)(NAU8821_QUIRK_JD_DB_BYPASS), }, {} }; @@ -1898,9 +1926,12 @@ static int nau8821_i2c_probe(struct i2c_client *i2c) nau8821_check_quirks(); - if (nau8821_quirk & NAU8821_JD_ACTIVE_HIGH) + if (nau8821_quirk & NAU8821_QUIRK_JD_ACTIVE_HIGH) nau8821->jkdet_polarity = 0; + if (nau8821_quirk & NAU8821_QUIRK_JD_DB_BYPASS) + dev_dbg(dev, "Force bypassing jack detection debounce circuit\n"); + nau8821_print_device_properties(nau8821); nau8821_reset_chip(nau8821->regmap); diff --git a/sound/soc/codecs/nau8821.h b/sound/soc/codecs/nau8821.h index f0935ffafcbe..88602923780d 100644 --- a/sound/soc/codecs/nau8821.h +++ b/sound/soc/codecs/nau8821.h @@ -561,7 +561,7 @@ struct nau8821 { struct regmap *regmap; struct snd_soc_dapm_context *dapm; struct snd_soc_jack *jack; - struct work_struct jdet_work; + struct delayed_work jdet_work; int irq; int clk_id; int micbias_voltage; diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index a4bd29d7220b..5f7b505d5414 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -281,6 +281,10 @@ static void rt721_sdca_jack_preset(struct rt721_sdca_priv *rt721) rt_sdca_index_write(rt721->mbq_regmap, RT721_BOOST_CTRL, RT721_BST_4CH_TOP_GATING_CTRL1, 0x002a); regmap_write(rt721->regmap, 0x2f58, 0x07); + + regmap_write(rt721->regmap, 0x2f51, 0x00); + rt_sdca_index_write(rt721->mbq_regmap, RT721_HDA_SDCA_FLOAT, + RT721_MISC_CTL, 0x0004); } static void rt721_sdca_jack_init(struct rt721_sdca_priv *rt721) diff --git a/sound/soc/codecs/rt721-sdca.h b/sound/soc/codecs/rt721-sdca.h index 71fac9cd8739..24ce188562ba 100644 --- a/sound/soc/codecs/rt721-sdca.h +++ b/sound/soc/codecs/rt721-sdca.h @@ -137,6 +137,7 @@ struct rt721_sdca_dmic_kctrl_priv { #define RT721_HDA_LEGACY_UAJ_CTL 0x02 #define RT721_HDA_LEGACY_CTL1 0x05 #define RT721_HDA_LEGACY_RESET_CTL 0x06 +#define RT721_MISC_CTL 0x07 #define RT721_XU_REL_CTRL 0x0c #define RT721_GE_REL_CTRL1 0x0d #define RT721_HDA_LEGACY_GPIO_WAKE_EN_CTL 0x0e diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 1539b70881d1..8f37aa00e62e 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -108,8 +108,11 @@ static const struct i2c_device_id tasdevice_id[] = { { "tas2570", TAS2570 }, { "tas2572", TAS2572 }, { "tas2781", TAS2781 }, + { "tas5802", TAS5802 }, + { "tas5815", TAS5815 }, { "tas5825", TAS5825 }, { "tas5827", TAS5827 }, + { "tas5828", TAS5828 }, {} }; MODULE_DEVICE_TABLE(i2c, tasdevice_id); @@ -124,8 +127,11 @@ static const struct of_device_id tasdevice_of_match[] = { { .compatible = "ti,tas2570" }, { .compatible = "ti,tas2572" }, { .compatible = "ti,tas2781" }, + { .compatible = "ti,tas5802" }, + { .compatible = "ti,tas5815" }, { .compatible = "ti,tas5825" }, { .compatible = "ti,tas5827" }, + { .compatible = "ti,tas5828" }, {}, }; MODULE_DEVICE_TABLE(of, tasdevice_of_match); @@ -1665,8 +1671,10 @@ static void tasdevice_fw_ready(const struct firmware *fmw, } tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK; - /* There is no calibration required for TAS5825/TAS5827. */ - if (tas_priv->chip_id < TAS5825) { + /* There is no calibration required for + * TAS5802/TAS5815/TAS5825/TAS5827/TAS5828. + */ + if (tas_priv->chip_id < TAS5802) { ret = tasdevice_create_cali_ctrls(tas_priv); if (ret) { dev_err(tas_priv->dev, "cali controls error\n"); @@ -1720,8 +1728,11 @@ out: switch (tas_priv->chip_id) { case TAS2563: case TAS2781: + case TAS5802: + case TAS5815: case TAS5825: case TAS5827: + case TAS5828: /* If DSP FW fail, DSP kcontrol won't be created. */ tasdevice_dsp_remove(tas_priv); } @@ -1882,8 +1893,11 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec) p = (struct snd_kcontrol_new *)tas2781_snd_controls; size = ARRAY_SIZE(tas2781_snd_controls); break; + case TAS5802: + case TAS5815: case TAS5825: case TAS5827: + case TAS5828: p = (struct snd_kcontrol_new *)tas5825_snd_controls; size = ARRAY_SIZE(tas5825_snd_controls); break; @@ -1943,7 +1957,8 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) { struct i2c_client *client = (struct i2c_client *)tas_priv->client; unsigned int dev_addrs[TASDEVICE_MAX_CHANNELS]; - int i, ndev = 0; + int ndev = 0; + int i, rc; if (tas_priv->isacpi) { ndev = device_property_read_u32_array(&client->dev, @@ -1954,8 +1969,12 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) } else { ndev = (ndev < ARRAY_SIZE(dev_addrs)) ? ndev : ARRAY_SIZE(dev_addrs); - ndev = device_property_read_u32_array(&client->dev, + rc = device_property_read_u32_array(&client->dev, "ti,audio-slots", dev_addrs, ndev); + if (rc != 0) { + ndev = 1; + dev_addrs[0] = client->addr; + } } tas_priv->irq = @@ -2054,8 +2073,11 @@ static const struct acpi_device_id tasdevice_acpi_match[] = { { "TXNW2570", TAS2570 }, { "TXNW2572", TAS2572 }, { "TXNW2781", TAS2781 }, + { "TXNW5802", TAS5802 }, + { "TXNW5815", TAS5815 }, { "TXNW5825", TAS5825 }, { "TXNW5827", TAS5827 }, + { "TXNW5828", TAS5828 }, {}, }; diff --git a/sound/soc/codecs/tas2783-sdw.c b/sound/soc/codecs/tas2783-sdw.c index 1fb4227b711e..e273b80d033e 100644 --- a/sound/soc/codecs/tas2783-sdw.c +++ b/sound/soc/codecs/tas2783-sdw.c @@ -762,10 +762,17 @@ static void tas2783_fw_ready(const struct firmware *fmw, void *context) goto out; } - mutex_lock(&tas_dev->pde_lock); img_sz = fmw->size; buf = fmw->data; offset += FW_DL_OFFSET; + if (offset >= (img_sz - FW_FL_HDR)) { + dev_err(tas_dev->dev, + "firmware is too small"); + ret = -EINVAL; + goto out; + } + + mutex_lock(&tas_dev->pde_lock); while (offset < (img_sz - FW_FL_HDR)) { memset(&hdr, 0, sizeof(hdr)); offset += read_header(&buf[offset], &hdr); @@ -776,6 +783,14 @@ static void tas2783_fw_ready(const struct firmware *fmw, void *context) /* size also includes the header */ file_blk_size = hdr.length - FW_FL_HDR; + /* make sure that enough data is there */ + if (offset + file_blk_size > img_sz) { + ret = -EINVAL; + dev_err(tas_dev->dev, + "corrupt firmware file"); + break; + } + switch (hdr.file_id) { case 0: ret = sdw_nwrite_no_pm(tas_dev->sdw_peripheral, @@ -808,7 +823,8 @@ static void tas2783_fw_ready(const struct firmware *fmw, void *context) break; } mutex_unlock(&tas_dev->pde_lock); - tas2783_update_calibdata(tas_dev); + if (!ret) + tas2783_update_calibdata(tas_dev); out: if (!ret) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index add907cb2706..8c8f39d04972 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1207,24 +1207,14 @@ static int wcd9380_probe(struct sdw_slave *pdev, regcache_cache_only(wcd->regmap, true); } - pm_runtime_set_autosuspend_delay(dev, 3000); - pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - ret = component_add(dev, &wcd_sdw_component_ops); if (ret) - goto err_disable_rpm; - - return 0; + return ret; -err_disable_rpm: - pm_runtime_disable(dev); + /* Set suspended until aggregate device is bind */ pm_runtime_set_suspended(dev); - pm_runtime_dont_use_autosuspend(dev); - return ret; + return 0; } static int wcd9380_remove(struct sdw_slave *pdev) @@ -1233,10 +1223,6 @@ static int wcd9380_remove(struct sdw_slave *pdev) component_del(dev, &wcd_sdw_component_ops); - pm_runtime_disable(dev); - pm_runtime_set_suspended(dev); - pm_runtime_dont_use_autosuspend(dev); - return 0; } diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index aabd90a8b3ec..cac26ba0aa4b 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -131,7 +131,7 @@ static struct fsl_micfil_soc_data fsl_micfil_imx943 = { .fifos = 8, .fifo_depth = 32, .dataline = 0xf, - .formats = SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_DSD_U32_BE, + .formats = SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_DSD_U32_LE, .use_edma = true, .use_verid = true, .volume_sx = false, @@ -823,7 +823,7 @@ static int fsl_micfil_hw_params(struct snd_pcm_substream *substream, break; } - if (format == SNDRV_PCM_FORMAT_DSD_U32_BE) { + if (format == SNDRV_PCM_FORMAT_DSD_U32_LE) { micfil->dec_bypass = true; /* * According to equation 29 in RM: diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 757e7868e322..72bfc91e21b9 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -353,7 +353,6 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai, break; case SND_SOC_DAIFMT_PDM: val_cr2 |= FSL_SAI_CR2_BCP; - val_cr4 &= ~FSL_SAI_CR4_MF; sai->is_pdm_mode = true; break; case SND_SOC_DAIFMT_RIGHT_J: @@ -638,7 +637,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr5 |= FSL_SAI_CR5_WNW(slot_width); val_cr5 |= FSL_SAI_CR5_W0W(slot_width); - if (sai->is_lsb_first || sai->is_pdm_mode) + if (sai->is_lsb_first) val_cr5 |= FSL_SAI_CR5_FBT(0); else val_cr5 |= FSL_SAI_CR5_FBT(word_width - 1); @@ -653,12 +652,12 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr4 |= FSL_SAI_CR4_CHMOD; /* - * For SAI provider mode, when Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will - * generate bclk and frame clock for Tx(Rx), we should set RCR4(TCR4), - * RCR5(TCR5) for playback(capture), or there will be sync error. + * When Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will provide bclk and + * frame clock for Tx(Rx). We should set RCR4(TCR4), RCR5(TCR5) + * for playback(capture), or there will be sync error. */ - if (!sai->is_consumer_mode[tx] && fsl_sai_dir_is_synced(sai, adir)) { + if (fsl_sai_dir_is_synced(sai, adir)) { regmap_update_bits(sai->regmap, FSL_SAI_xCR4(!tx, ofs), FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK | FSL_SAI_CR4_CHMOD_MASK, diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index d31058e2de5b..80c001120cdd 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -651,6 +651,7 @@ static void avs_dai_fe_shutdown(struct snd_pcm_substream *substream, struct snd_ data = snd_soc_dai_get_dma_data(dai, substream); + disable_work_sync(&data->period_elapsed_work); snd_hdac_ext_stream_release(data->host_stream, HDAC_EXT_STREAM_TYPE_HOST); avs_dai_shutdown(substream, dai); } @@ -754,6 +755,8 @@ static int avs_dai_fe_prepare(struct snd_pcm_substream *substream, struct snd_so data = snd_soc_dai_get_dma_data(dai, substream); host_stream = data->host_stream; + if (runtime->state == SNDRV_PCM_STATE_XRUN) + hdac_stream(host_stream)->prepared = false; if (hdac_stream(host_stream)->prepared) return 0; diff --git a/sound/soc/intel/avs/probes.c b/sound/soc/intel/avs/probes.c index 693ecfe68fd0..74096236984a 100644 --- a/sound/soc/intel/avs/probes.c +++ b/sound/soc/intel/avs/probes.c @@ -14,8 +14,8 @@ #include "debug.h" #include "messages.h" -static int avs_dsp_init_probe(struct avs_dev *adev, union avs_connector_node_id node_id, - size_t buffer_size) +static int avs_dsp_init_probe(struct avs_dev *adev, struct snd_compr_params *params, int bps, + union avs_connector_node_id node_id, size_t buffer_size) { struct avs_probe_cfg cfg = {{0}}; struct avs_module_entry mentry; @@ -27,12 +27,16 @@ static int avs_dsp_init_probe(struct avs_dev *adev, union avs_connector_node_id return ret; /* - * Probe module uses no cycles, audio data format and input and output - * frame sizes are unused. It is also not owned by any pipeline. + * Probe module uses no cycles, input and output frame sizes are unused. + * It is also not owned by any pipeline. */ cfg.base.ibs = 1; /* BSS module descriptor is always segment of index=2. */ cfg.base.is_pages = mentry.segments[2].flags.length; + cfg.base.audio_fmt.sampling_freq = params->codec.sample_rate; + cfg.base.audio_fmt.bit_depth = bps; + cfg.base.audio_fmt.num_channels = params->codec.ch_out; + cfg.base.audio_fmt.valid_bit_depth = bps; cfg.gtw_cfg.node_id = node_id; cfg.gtw_cfg.dma_buffer_size = buffer_size; @@ -128,8 +132,6 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, struct hdac_ext_stream *host_stream = avs_compr_get_host_stream(cstream); struct snd_compr_runtime *rtd = cstream->runtime; struct avs_dev *adev = to_avs_dev(dai->dev); - /* compr params do not store bit depth, default to S32_LE. */ - snd_pcm_format_t format = SNDRV_PCM_FORMAT_S32_LE; unsigned int format_val; int bps, ret; @@ -142,7 +144,7 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, ret = snd_compr_malloc_pages(cstream, rtd->buffer_size); if (ret < 0) return ret; - bps = snd_pcm_format_physical_width(format); + bps = snd_pcm_format_physical_width(params->codec.format); if (bps < 0) return bps; format_val = snd_hdac_stream_format(params->codec.ch_out, bps, params->codec.sample_rate); @@ -166,7 +168,7 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, node_id.vindex = hdac_stream(host_stream)->stream_tag - 1; node_id.dma_type = AVS_DMA_HDA_HOST_INPUT; - ret = avs_dsp_init_probe(adev, node_id, rtd->dma_bytes); + ret = avs_dsp_init_probe(adev, params, bps, node_id, rtd->dma_bytes); if (ret < 0) { dev_err(dai->dev, "probe init failed: %d\n", ret); avs_dsp_enable_d0ix(adev); diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 3c8b10e21ceb..4853f4f31786 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -227,33 +227,6 @@ static const struct snd_soc_acpi_endpoint cs42l43_amp_spkagg_endpoints[] = { }, }; -static const struct snd_soc_acpi_endpoint cs42l43_endpoints[] = { - { /* Jack Playback Endpoint */ - .num = 0, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* DMIC Capture Endpoint */ - .num = 1, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* Jack Capture Endpoint */ - .num = 2, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* Speaker Playback Endpoint */ - .num = 3, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, -}; - static const struct snd_soc_acpi_adr_device cs42l43_2_adr[] = { { .adr = 0x00023001fa424301ull, @@ -305,15 +278,6 @@ static const struct snd_soc_acpi_adr_device cs35l56_3_3amp_adr[] = { } }; -static const struct snd_soc_acpi_adr_device cs42l43_3_adr[] = { - { - .adr = 0x00033001FA424301ull, - .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), - .endpoints = cs42l43_endpoints, - .name_prefix = "cs42l43" - } -}; - static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = { { .adr = 0x000030025D071101ull, @@ -486,15 +450,6 @@ static const struct snd_soc_acpi_link_adr ptl_cs42l43_l2_cs35l56x6_l13[] = { {} }; -static const struct snd_soc_acpi_link_adr ptl_cs42l43_l3[] = { - { - .mask = BIT(3), - .num_adr = ARRAY_SIZE(cs42l43_3_adr), - .adr_d = cs42l43_3_adr, - }, - {} -}; - static const struct snd_soc_acpi_link_adr ptl_rt721_l0[] = { { .mask = BIT(0), @@ -714,13 +669,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { }, { .link_mask = BIT(3), - .links = ptl_cs42l43_l3, - .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-ptl-cs42l43-l3.tplg", - .get_function_tplg_files = sof_sdw_get_tplg_files, - }, - { - .link_mask = BIT(3), .links = ptl_sdw_rt712_vb_l3_rt1320_l3, .drv_name = "sof_sdw", .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb, diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c index 5d025ad72263..c63b3444bc17 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c +++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c @@ -3176,7 +3176,6 @@ err_pm_put: static void mt8195_afe_pcm_dev_remove(struct platform_device *pdev) { - pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8195_afe_runtime_suspend(&pdev->dev); } diff --git a/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c b/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c index 10793bbe9275..d48252cd96ac 100644 --- a/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c +++ b/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c @@ -2238,7 +2238,6 @@ static void mt8365_afe_pcm_dev_remove(struct platform_device *pdev) mt8365_afe_disable_top_cg(afe, MT8365_TOP_CG_AFE); - pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8365_afe_runtime_suspend(&pdev->dev); } diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c index 06a802f9dba5..67e9ca18883c 100644 --- a/sound/soc/qcom/qdsp6/q6asm.c +++ b/sound/soc/qcom/qdsp6/q6asm.c @@ -377,9 +377,9 @@ static void q6asm_audio_client_free_buf(struct audio_client *ac, spin_lock_irqsave(&ac->lock, flags); port->num_periods = 0; + spin_unlock_irqrestore(&ac->lock, flags); kfree(port->buf); port->buf = NULL; - spin_unlock_irqrestore(&ac->lock, flags); } /** diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index 78e327bc2f07..187f37ffe328 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -192,6 +192,7 @@ static int sc8280xp_platform_probe(struct platform_device *pdev) static const struct of_device_id snd_sc8280xp_dt_match[] = { {.compatible = "qcom,qcm6490-idp-sndcard", "qcm6490"}, + {.compatible = "qcom,qcs615-sndcard", "qcs615"}, {.compatible = "qcom,qcs6490-rb3gen2-sndcard", "qcs6490"}, {.compatible = "qcom,qcs8275-sndcard", "qcs8300"}, {.compatible = "qcom,qcs9075-sndcard", "sa8775p"}, diff --git a/sound/soc/renesas/rcar/ssiu.c b/sound/soc/renesas/rcar/ssiu.c index faf351126d57..244fb833292a 100644 --- a/sound/soc/renesas/rcar/ssiu.c +++ b/sound/soc/renesas/rcar/ssiu.c @@ -509,7 +509,7 @@ void rsnd_parse_connect_ssiu(struct rsnd_dai *rdai, int rsnd_ssiu_probe(struct rsnd_priv *priv) { struct device *dev = rsnd_priv_to_dev(priv); - struct device_node *node; + struct device_node *node __free(device_node) = rsnd_ssiu_of_node(priv); struct rsnd_ssiu *ssiu; struct rsnd_mod_ops *ops; const int *list = NULL; @@ -522,7 +522,6 @@ int rsnd_ssiu_probe(struct rsnd_priv *priv) * see * rsnd_ssiu_bufsif_to_id() */ - node = rsnd_ssiu_of_node(priv); if (node) nr = rsnd_node_count(priv, node, SSIU_NAME); else diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c index e00940814157..81b883e8ac92 100644 --- a/sound/soc/renesas/rz-ssi.c +++ b/sound/soc/renesas/rz-ssi.c @@ -85,6 +85,7 @@ struct rz_ssi_stream { struct snd_pcm_substream *substream; int fifo_sample_size; /* sample capacity of SSI FIFO */ int dma_buffer_pos; /* The address for the next DMA descriptor */ + int completed_dma_buf_pos; /* The address of the last completed DMA descriptor. */ int period_counter; /* for keeping track of periods transferred */ int sample_width; int buffer_pos; /* current frame position in the buffer */ @@ -215,6 +216,7 @@ static void rz_ssi_stream_init(struct rz_ssi_stream *strm, rz_ssi_set_substream(strm, substream); strm->sample_width = samples_to_bytes(runtime, 1); strm->dma_buffer_pos = 0; + strm->completed_dma_buf_pos = 0; strm->period_counter = 0; strm->buffer_pos = 0; @@ -437,6 +439,10 @@ static void rz_ssi_pointer_update(struct rz_ssi_stream *strm, int frames) snd_pcm_period_elapsed(strm->substream); strm->period_counter = current_period; } + + strm->completed_dma_buf_pos += runtime->period_size; + if (strm->completed_dma_buf_pos >= runtime->buffer_size) + strm->completed_dma_buf_pos = 0; } static int rz_ssi_pio_recv(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) @@ -778,10 +784,14 @@ no_dma: return -ENODEV; } -static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi) +static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) { + struct snd_pcm_substream *substream = strm->substream; + struct snd_pcm_runtime *runtime = substream->runtime; int ret; + strm->dma_buffer_pos = strm->completed_dma_buf_pos + runtime->period_size; + if (rz_ssi_is_stream_running(&ssi->playback) || rz_ssi_is_stream_running(&ssi->capture)) return 0; @@ -794,16 +804,6 @@ static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi) ssi->hw_params_cache.channels); } -static void rz_ssi_streams_suspend(struct rz_ssi_priv *ssi) -{ - if (rz_ssi_is_stream_running(&ssi->playback) || - rz_ssi_is_stream_running(&ssi->capture)) - return; - - ssi->playback.dma_buffer_pos = 0; - ssi->capture.dma_buffer_pos = 0; -} - static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { @@ -813,7 +813,7 @@ static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, switch (cmd) { case SNDRV_PCM_TRIGGER_RESUME: - ret = rz_ssi_trigger_resume(ssi); + ret = rz_ssi_trigger_resume(ssi, strm); if (ret) return ret; @@ -852,7 +852,6 @@ static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, case SNDRV_PCM_TRIGGER_SUSPEND: rz_ssi_stop(ssi, strm); - rz_ssi_streams_suspend(ssi); break; case SNDRV_PCM_TRIGGER_STOP: diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index 13f68f7b6dd6..0ccb6775f4de 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -894,7 +894,8 @@ static int find_sdca_entity_control(struct device *dev, struct sdca_entity *enti return ret; } - control->values = devm_kzalloc(dev, hweight64(control->cn_list), GFP_KERNEL); + control->values = devm_kcalloc(dev, hweight64(control->cn_list), + sizeof(int), GFP_KERNEL); if (!control->values) return -ENOMEM; diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 56c72ef27e7b..3848c7df1916 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -312,6 +312,26 @@ struct asoc_sdw_codec_info codec_info_list[] = { .dai_num = 1, }, { + .part_id = 0x1321, + .dais = { + { + .direction = {true, false}, + .dai_name = "rt1320-aif1", + .component_name = "rt1320", + .dai_type = SOC_SDW_DAI_TYPE_AMP, + .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID}, + .init = asoc_sdw_rt_amp_init, + .exit = asoc_sdw_rt_amp_exit, + .rtd_init = asoc_sdw_rt_amp_spk_rtd_init, + .controls = generic_spk_controls, + .num_controls = ARRAY_SIZE(generic_spk_controls), + .widgets = generic_spk_widgets, + .num_widgets = ARRAY_SIZE(generic_spk_widgets), + }, + }, + .dai_num = 1, + }, + { .part_id = 0x714, .version_id = 3, .ignore_internal_dmic = true, @@ -618,7 +638,6 @@ struct asoc_sdw_codec_info codec_info_list[] = { { .direction = {true, false}, .dai_name = "cs42l43-dp6", - .component_name = "cs42l43", .dai_type = SOC_SDW_DAI_TYPE_AMP, .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID}, .init = asoc_sdw_cs42l43_spk_init, @@ -1258,7 +1277,7 @@ static int is_sdca_endpoint_present(struct device *dev, struct sdw_slave *slave; struct device *sdw_dev; const char *sdw_codec_name; - int i; + int ret, i; dlc = kzalloc(sizeof(*dlc), GFP_KERNEL); if (!dlc) @@ -1288,13 +1307,16 @@ static int is_sdca_endpoint_present(struct device *dev, } slave = dev_to_sdw_dev(sdw_dev); - if (!slave) - return -EINVAL; + if (!slave) { + ret = -EINVAL; + goto put_device; + } /* Make sure BIOS provides SDCA properties */ if (!slave->sdca_data.interface_revision) { dev_warn(&slave->dev, "SDCA properties not found in the BIOS\n"); - return 1; + ret = 1; + goto put_device; } for (i = 0; i < slave->sdca_data.num_functions; i++) { @@ -1303,7 +1325,8 @@ static int is_sdca_endpoint_present(struct device *dev, if (dai_type == dai_info->dai_type) { dev_dbg(&slave->dev, "DAI type %d sdca function %s found\n", dai_type, slave->sdca_data.function[i].name); - return 1; + ret = 1; + goto put_device; } } @@ -1311,7 +1334,11 @@ static int is_sdca_endpoint_present(struct device *dev, "SDCA device function for DAI type %d not supported, skip endpoint\n", dai_info->dai_type); - return 0; + ret = 0; + +put_device: + put_device(sdw_dev); + return ret; } int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card, diff --git a/sound/usb/card.c b/sound/usb/card.c index 1d5a65eac933..270dad84d825 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -891,10 +891,16 @@ get_alias_quirk(struct usb_device *dev, unsigned int id) */ static int try_to_register_card(struct snd_usb_audio *chip, int ifnum) { + struct usb_interface *iface; + if (check_delayed_register_option(chip) == ifnum || - chip->last_iface == ifnum || - usb_interface_claimed(usb_ifnum_to_if(chip->dev, chip->last_iface))) + chip->last_iface == ifnum) + return snd_card_register(chip->card); + + iface = usb_ifnum_to_if(chip->dev, chip->last_iface); + if (iface && usb_interface_claimed(iface)) return snd_card_register(chip->card); + return 0; } diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 880f5afcce60..cc15624ecaff 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -1362,6 +1362,11 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, ep->sample_rem = ep->cur_rate % ep->pps; ep->packsize[0] = ep->cur_rate / ep->pps; ep->packsize[1] = (ep->cur_rate + (ep->pps - 1)) / ep->pps; + if (ep->packsize[1] > ep->maxpacksize) { + usb_audio_dbg(chip, "Too small maxpacksize %u for rate %u / pps %u\n", + ep->maxpacksize, ep->cur_rate, ep->pps); + return -EINVAL; + } /* calculate the frequency in 16.16 format */ ep->freqm = ep->freqn; diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 34bcbfd8b54e..72b900505d2c 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1147,6 +1147,14 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; + case USB_ID(0x045e, 0x070f): /* MS LifeChat LX-3000 Headset */ + if (!strcmp(kctl->id.name, "Speaker Playback Volume")) { + usb_audio_info(chip, + "set volume quirk for MS LifeChat LX-3000\n"); + cval->res = 192; + } + break; + case USB_ID(0x0471, 0x0101): case USB_ID(0x0471, 0x0104): case USB_ID(0x0471, 0x0105): @@ -1189,6 +1197,13 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, cval->min = -14208; /* Mute under it */ } break; + case USB_ID(0x12d1, 0x3a07): /* Huawei Technologies Co., Ltd. CM-Q3 */ + if (!strcmp(kctl->id.name, "PCM Playback Volume")) { + usb_audio_info(chip, + "set volume quirk for Huawei Technologies Co., Ltd. CM-Q3\n"); + cval->min = -11264; /* Mute under it */ + } + break; } } @@ -3071,6 +3086,8 @@ static int snd_usb_mixer_controls_badd(struct usb_mixer_interface *mixer, int i; assoc = usb_ifnum_to_if(dev, ctrlif)->intf_assoc; + if (!assoc) + return -EINVAL; /* Detect BADD capture/playback channels from AS EP descriptors */ for (i = 0; i < assoc->bInterfaceCount; i++) { diff --git a/sound/usb/mixer_s1810c.c b/sound/usb/mixer_s1810c.c index 15960d25e748..6e09e074c0e7 100644 --- a/sound/usb/mixer_s1810c.c +++ b/sound/usb/mixer_s1810c.c @@ -178,7 +178,7 @@ snd_sc1810c_get_status_field(struct usb_device *dev, pkt_out.fields[SC1810C_STATE_F1_IDX] = SC1810C_SET_STATE_F1; pkt_out.fields[SC1810C_STATE_F2_IDX] = SC1810C_SET_STATE_F2; - ret = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), + ret = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), SC1810C_SET_STATE_REQ, SC1810C_SET_STATE_REQTYPE, (*seqnum), 0, &pkt_out, sizeof(pkt_out)); @@ -597,15 +597,6 @@ int snd_sc1810_init_mixer(struct usb_mixer_interface *mixer) if (!list_empty(&chip->mixer_list)) return 0; - dev_info(&dev->dev, - "Presonus Studio 1810c, device_setup: %u\n", chip->setup); - if (chip->setup == 1) - dev_info(&dev->dev, "(8out/18in @ 48kHz)\n"); - else if (chip->setup == 2) - dev_info(&dev->dev, "(6out/8in @ 192kHz)\n"); - else - dev_info(&dev->dev, "(8out/14in @ 96kHz)\n"); - ret = snd_s1810c_init_mixer_maps(chip); if (ret < 0) return ret; @@ -634,16 +625,28 @@ int snd_sc1810_init_mixer(struct usb_mixer_interface *mixer) if (ret < 0) return ret; - // The 1824c has a Mono Main switch instead of a - // A/B select switch. - if (mixer->chip->usb_id == USB_ID(0x194f, 0x010d)) { - ret = snd_s1810c_switch_init(mixer, &snd_s1824c_mono_sw); + switch (chip->usb_id) { + case USB_ID(0x194f, 0x010c): /* Presonus Studio 1810c */ + dev_info(&dev->dev, + "Presonus Studio 1810c, device_setup: %u\n", chip->setup); + if (chip->setup == 1) + dev_info(&dev->dev, "(8out/18in @ 48kHz)\n"); + else if (chip->setup == 2) + dev_info(&dev->dev, "(6out/8in @ 192kHz)\n"); + else + dev_info(&dev->dev, "(8out/14in @ 96kHz)\n"); + + ret = snd_s1810c_switch_init(mixer, &snd_s1810c_ab_sw); if (ret < 0) return ret; - } else if (mixer->chip->usb_id == USB_ID(0x194f, 0x010c)) { - ret = snd_s1810c_switch_init(mixer, &snd_s1810c_ab_sw); + + break; + case USB_ID(0x194f, 0x010d): /* Presonus Studio 1824c */ + ret = snd_s1810c_switch_init(mixer, &snd_s1824c_mono_sw); if (ret < 0) return ret; + + break; } return ret; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 634cb4fb586f..5e30bff69f82 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2022,6 +2022,8 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x16d0, 0x09d8): /* NuPrime IDA-8 */ case USB_ID(0x16d0, 0x09db): /* NuPrime Audio DAC-9 */ case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */ + case USB_ID(0x16d0, 0x0ab1): /* PureAudio APA DAC */ + case USB_ID(0x16d0, 0xeca1): /* PureAudio Lotus DAC5, DAC5 SE, DAC5 Pro */ case USB_ID(0x1db5, 0x0003): /* Bryston BDA3 */ case USB_ID(0x20a0, 0x4143): /* WaveIO USB Audio 2.0 */ case USB_ID(0x22e1, 0xca01): /* HDTA Serenade DSD */ @@ -2153,6 +2155,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { DEVICE_FLG(0x045e, 0x083c, /* MS USB Link headset */ QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_CTL_MSG_DELAY | QUIRK_FLAG_DISABLE_AUTOSUSPEND), + DEVICE_FLG(0x045e, 0x070f, /* MS LifeChat LX-3000 Headset */ + QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x046d, 0x0807, /* Logitech Webcam C500 */ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_MIC_RES_384), DEVICE_FLG(0x046d, 0x0808, /* Logitech Webcam C600 */ @@ -2180,6 +2184,9 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_MIC_RES_384), DEVICE_FLG(0x046d, 0x09a4, /* Logitech QuickCam E 3500 */ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x046d, 0x0a8f, /* Logitech H390 headset */ + QUIRK_FLAG_CTL_MSG_DELAY_1M | + QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x0499, 0x1506, /* Yamaha THR5 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ @@ -2262,6 +2269,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1038, 0x1294, /* SteelSeries Arctis Pro Wireless */ + QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x1101, 0x0003, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x12d1, 0x3a07, /* Huawei Technologies Co., Ltd. */ @@ -2292,6 +2301,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CLOCK_SOURCE), DEVICE_FLG(0x1686, 0x00dd, /* Zoom R16/24 */ QUIRK_FLAG_TX_LENGTH | QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x16d0, 0x0ab1, /* PureAudio APA DAC */ + QUIRK_FLAG_DSD_RAW), + DEVICE_FLG(0x16d0, 0xeca1, /* PureAudio Lotus DAC5, DAC5 SE and DAC5 Pro */ + QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x17aa, 0x1046, /* Lenovo ThinkStation P620 Rear Line-in, Line-out and Microphone */ QUIRK_FLAG_DISABLE_AUTOSUSPEND), DEVICE_FLG(0x17aa, 0x104d, /* Lenovo ThinkStation P620 Internal Speaker + Front Headset */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 06fc0479a23f..4091a776e37a 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -444,6 +444,7 @@ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ #define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ #define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ @@ -495,6 +496,9 @@ #define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ #define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ +#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ +#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ +#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */ /* * BUG word(s) @@ -551,4 +555,5 @@ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ #define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ +#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index f627196eb796..9e1720d73244 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -315,9 +315,12 @@ #define PERF_CAP_PT_IDX 16 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define PERF_CAP_LBR_FMT 0x3f #define PERF_CAP_PEBS_TRAP BIT_ULL(6) #define PERF_CAP_ARCH_REG BIT_ULL(7) #define PERF_CAP_PEBS_FORMAT 0xf00 +#define PERF_CAP_FW_WRITES BIT_ULL(13) #define PERF_CAP_PEBS_BASELINE BIT_ULL(14) #define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17) #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ @@ -633,6 +636,11 @@ #define MSR_AMD_PPIN 0xc00102f1 #define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 + +#define MSR_AMD64_CPUID_EXT_FEAT 0xc0011005 +#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT 54 +#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT) + #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_TW_CFG 0xc0011023 @@ -701,8 +709,15 @@ #define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) #define MSR_AMD64_SNP_SMT_PROT_BIT 17 #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) -#define MSR_AMD64_SNP_RESV_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) +#define MSR_AMD64_SNP_RESV_BIT 19 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) +#define MSR_AMD64_SAVIC_CONTROL 0xc0010138 +#define MSR_AMD64_SAVIC_EN_BIT 0 +#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT) +#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1 +#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT) #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 #define MSR_AMD64_RMP_CFG 0xc0010136 @@ -735,6 +750,7 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303 /* AMD Hardware Feedback Support MSRs */ #define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 @@ -1225,6 +1241,8 @@ /* - AMD: */ #define MSR_IA32_MBA_BW_BASE 0xc0000200 #define MSR_IA32_SMBA_BW_BASE 0xc0000280 +#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd +#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff #define MSR_IA32_EVT_CFG_BASE 0xc0000400 /* AMD-V MSRs */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 0f15d683817d..d420c9c066d4 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -35,6 +35,11 @@ #define MC_VECTOR 18 #define XM_VECTOR 19 #define VE_VECTOR 20 +#define CP_VECTOR 21 + +#define HV_VECTOR 28 +#define VC_VECTOR 29 +#define SX_VECTOR 30 /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT @@ -411,6 +416,35 @@ struct kvm_xcrs { __u64 padding[16]; }; +#define KVM_X86_REG_TYPE_MSR 2 +#define KVM_X86_REG_TYPE_KVM 3 + +#define KVM_X86_KVM_REG_SIZE(reg) \ +({ \ + reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \ +}) + +#define KVM_X86_REG_TYPE_SIZE(type, reg) \ +({ \ + __u64 type_size = (__u64)type << 32; \ + \ + type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \ + type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \ + 0; \ + type_size; \ +}) + +#define KVM_X86_REG_ID(type, index) \ + (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index) + +#define KVM_X86_REG_MSR(index) \ + KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index) +#define KVM_X86_REG_KVM(index) \ + KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index) + +/* KVM-defined registers starting from 0 */ +#define KVM_REG_GUEST_SSP 0 + #define KVM_SYNC_X86_REGS (1UL << 0) #define KVM_SYNC_X86_SREGS (1UL << 1) #define KVM_SYNC_X86_EVENTS (1UL << 2) diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 9c640a521a67..650e3256ea7d 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -118,6 +118,10 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SAVIC 0x8000001a +#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 +#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1 +#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index f0f4a4cf84a7..1baa86dfe029 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -93,7 +93,10 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_SEAMCALL 76 #define EXIT_REASON_TDCALL 77 +#define EXIT_REASON_MSR_READ_IMM 84 +#define EXIT_REASON_MSR_WRITE_IMM 85 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -158,7 +161,9 @@ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ { EXIT_REASON_NOTIFY, "NOTIFY" }, \ - { EXIT_REASON_TDCALL, "TDCALL" } + { EXIT_REASON_TDCALL, "TDCALL" }, \ + { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \ + { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 009633294b09..35aeeaf5f711 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -182,7 +182,7 @@ bpftool prog tracelog bpftool prog tracelog { stdout | stderr } *PROG* Dump the BPF stream of the program. BPF programs can write to these streams - at runtime with the **bpf_stream_vprintk**\ () kfunc. The kernel may write + at runtime with the **bpf_stream_vprintk_impl**\ () kfunc. The kernel may write error messages to the standard error stream. This facility should be used only for debugging purposes. diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 49b0add392b1..95646290cb89 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -107,7 +107,7 @@ all: $(FILES) __BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1 BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl - BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd + BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -ldl -lz -llzma -lzstd __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1 @@ -115,7 +115,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD_ALL) || $(BUILD_ALL) -lopcodes -liberty + $(BUILD_ALL) $(OUTPUT)test-hello.bin: $(BUILD) diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index e974ec932ec1..35f33780ca6c 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -10,7 +10,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned int generic___fls(unsigned long word) +static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word) { unsigned int num = BITS_PER_LONG - 1; diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index 26f3ce1dd6e4..8eed3437edb9 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -10,7 +10,7 @@ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static __always_inline int generic_fls(unsigned int x) +static __always_inline __attribute_const__ int generic_fls(unsigned int x) { int r = 32; diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h index 866f2b2304ff..b5f58dd261a3 100644 --- a/tools/include/asm-generic/bitops/fls64.h +++ b/tools/include/asm-generic/bitops/fls64.h @@ -16,7 +16,7 @@ * at position 64. */ #if BITS_PER_LONG == 32 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { __u32 h = x >> 32; if (h) @@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x) return fls(x); } #elif BITS_PER_LONG == 64 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { if (x == 0) return 0; diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index e63a71d3c607..3cd5cf15e3c9 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -597,35 +597,66 @@ struct drm_set_version { int drm_dd_minor; }; -/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/** + * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl. + * @handle: Handle of the object to be closed. + * @pad: Padding. + * + * Releases the handle to an mm object. + */ struct drm_gem_close { - /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/* DRM_IOCTL_GEM_FLINK ioctl argument type */ +/** + * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl. + * @handle: Handle for the object being named. + * @name: Returned global name. + * + * Create a global name for an object, returning the name. + * + * Note that the name does not hold a reference; when the object + * is freed, the name goes away. + */ struct drm_gem_flink { - /** Handle for the object being named */ __u32 handle; - - /** Returned global name */ __u32 name; }; -/* DRM_IOCTL_GEM_OPEN ioctl argument type */ +/** + * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl. + * @name: Name of object being opened. + * @handle: Returned handle for the object. + * @size: Returned size of the object + * + * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. + */ struct drm_gem_open { - /** Name of object being opened */ __u32 name; - - /** Returned handle for the object */ __u32 handle; - - /** Returned size of the object */ __u64 size; }; /** + * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl. + * @handle: The handle of a gem object. + * @new_handle: An available gem handle. + * + * This ioctl changes the handle of a GEM object to the specified one. + * The new handle must be unused. On success the old handle is closed + * and all further IOCTL should refer to the new handle only. + * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle. + */ +struct drm_gem_change_handle { + __u32 handle; + __u32 new_handle; +}; + +/** * DRM_CAP_DUMB_BUFFER * * If set to 1, the driver supports creating dumb buffers via the @@ -1309,6 +1340,14 @@ extern "C" { */ #define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) +/** + * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle + * + * Some applications (notably CRIU) need objects to have specific gem handles. + * This ioctl changes the object at one gem handle to use a new gem handle. + */ +#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f0f0d49d2544..52f6000ab020 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -962,6 +962,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1598,6 +1599,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 80c028540656..d4e4e388e625 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -315,20 +315,20 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) -extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, - __u32 len__sz, void *aux__prog) __weak __ksym; - -#define bpf_stream_printk(stream_id, fmt, args...) \ -({ \ - static const char ___fmt[] = fmt; \ - unsigned long long ___param[___bpf_narg(args)]; \ - \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ - \ - bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\ +extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args, + __u32 len__sz, void *aux__prog) __weak __ksym; + +#define bpf_stream_printk(stream_id, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL); \ }) /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index a8f6cd4841b0..dbe32a5d02cd 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -311,7 +311,7 @@ struct pt_regs___arm64 { #define __PT_RET_REG regs[31] #define __PT_FP_REG __unsupported__ #define __PT_RC_REG gpr[3] -#define __PT_SP_REG sp +#define __PT_SP_REG gpr[1] #define __PT_IP_REG nip #elif defined(bpf_target_sparc) diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 29481989ea76..ced7dce44efb 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -313,7 +313,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) struct nlattr *attr; size_t len; - len = strlen(str); + len = strlen(str) + 1; if (__ynl_attr_put_overflow(nlh, len)) return; @@ -321,7 +321,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) attr->nla_type = attr_type; strcpy((char *)ynl_attr_data(attr), str); - attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); + attr->nla_len = NLA_HDRLEN + len; nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); } diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index 9b523cbb3568..fd0f6b8d54d1 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -44,6 +44,9 @@ def print_field(reply, *desc): Pretty-print a set of fields from the reply. desc specifies the fields and the optional type (bool/yn). """ + if not reply: + return + if len(desc) == 0: return print_field(reply, *zip(reply.keys(), reply.keys())) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 58086b101057..aadeb3abcad8 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -861,6 +861,18 @@ class TypeIndexedArray(Type): return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] + def free_needs_iter(self): + return self.sub_type == 'nest' + + def _free_lines(self, ri, var, ref): + lines = [] + if self.sub_type == 'nest': + lines += [ + f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)", + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', + ] + lines += f"free({var}->{ref}{self.c_name});", + return lines class TypeNestTypeValue(Type): def _complex_member_type(self, ri): diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a5770570b106..9004fbc06769 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -217,6 +217,7 @@ static bool is_rust_noreturn(const struct symbol *func) * these come from the Rust standard library). */ return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") || + str_ends_with(func->name, "_4core6option13expect_failed") || str_ends_with(func->name, "_4core6option13unwrap_failed") || str_ends_with(func->name, "_4core6result13unwrap_failed") || str_ends_with(func->name, "_4core9panicking5panic") || @@ -3515,8 +3516,11 @@ static bool skip_alt_group(struct instruction *insn) { struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL; + if (!insn->alt_group) + return false; + /* ANNOTATE_IGNORE_ALTERNATIVE */ - if (insn->alt_group && insn->alt_group->ignore) + if (insn->alt_group->ignore) return true; /* @@ -4710,8 +4714,8 @@ static int check_abs_references(struct objtool_file *file) for_each_reloc(sec->rsec, reloc) { if (arch_absolute_reloc(file->elf, reloc)) { - WARN("section %s has absolute relocation at offset 0x%lx", - sec->name, reloc_offset(reloc)); + WARN("section %s has absolute relocation at offset 0x%llx", + sec->name, (unsigned long long)reloc_offset(reloc)); ret++; } } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 5700516aa84a..2dd5f5a60568 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -354,9 +354,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt -FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl -FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl - CORE_CFLAGS += -fno-omit-frame-pointer CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra @@ -930,6 +927,8 @@ ifdef BUILD_NONDISTRO ifeq ($(feature-libbfd), 1) EXTLIBS += -lbfd -lopcodes + FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl + FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl else # we are on a system that requires -liberty and (maybe) -lz # to link against -lbfd; test each case individually here diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 92cf0fe2291e..ced2a1deecd7 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -345,6 +345,7 @@ 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq 335 common uretprobe sys_uretprobe +336 common uprobe sys_uprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 078634461df2..e8962c985d34 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1867,6 +1867,7 @@ static int __cmd_report(bool display_info) eops.sample = process_sample_event; eops.comm = perf_event__process_comm; eops.mmap = perf_event__process_mmap; + eops.mmap2 = perf_event__process_mmap2; eops.namespaces = perf_event__process_namespaces; eops.tracing_data = perf_event__process_tracing_data; session = perf_session__new(&data, &eops); @@ -2023,6 +2024,7 @@ static int __cmd_contention(int argc, const char **argv) eops.sample = process_sample_event; eops.comm = perf_event__process_comm; eops.mmap = perf_event__process_mmap; + eops.mmap2 = perf_event__process_mmap2; eops.tracing_data = perf_event__process_tracing_data; perf_env__init(&host_env); diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index 7248a74ca2a3..6dd90519f45c 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -13,15 +13,18 @@ cleanup() { rm -f ${perfdata} rm -f ${result} rm -f ${errout} - trap - EXIT TERM INT + trap - EXIT TERM INT ERR } trap_cleanup() { + if (( $? == 139 )); then #SIGSEGV + err=1 + fi echo "Unexpected signal in ${FUNCNAME[1]}" cleanup exit ${err} } -trap trap_cleanup EXIT TERM INT +trap trap_cleanup EXIT TERM INT ERR check() { if [ "$(id -u)" != 0 ]; then @@ -145,7 +148,7 @@ test_aggr_cgroup() fi # the perf lock contention output goes to the stderr - perf lock con -a -b -g -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} + perf lock con -a -b --lock-cgroup -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 @@ -271,7 +274,7 @@ test_cgroup_filter() return fi - perf lock con -a -b -g -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result} + perf lock con -a -b --lock-cgroup -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result should have a cgroup result:" "$(cat "${result}")" err=1 @@ -279,7 +282,7 @@ test_cgroup_filter() fi cgroup=$(cat "${result}" | awk '{ print $3 }') - perf lock con -a -b -g -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result} + perf lock con -a -b --lock-cgroup -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result should have a result with cgroup filter:" "$(cat "${cgroup}")" err=1 @@ -338,4 +341,5 @@ test_aggr_task_stack_filter test_cgroup_filter test_csv_output +cleanup exit ${err} diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index f291ab4f94eb..3741ea1b73d8 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -111,6 +111,7 @@ #define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ #define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ +#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */ #define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 0bd678a4a10e..beb4c2d1e41c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -430,10 +430,13 @@ typedef int __bitwise __kernel_rwf_t; /* buffered IO that drops the cache after reading or writing data */ #define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) +/* prevent pipe and socket writes from raising SIGPIPE */ +#define RWF_NOSIGNAL ((__force __kernel_rwf_t)0x00000100) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ - RWF_DONTCACHE) + RWF_DONTCACHE | RWF_NOSIGNAL) #define PROCFS_IOCTL_MAGIC 'f' diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index ed3aed264aeb..51c4e8c82b1e 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -177,7 +177,17 @@ struct prctl_mm_map { #define PR_GET_TID_ADDRESS 40 +/* + * Flags for PR_SET_THP_DISABLE are only applicable when disabling. Bit 0 + * is reserved, so PR_GET_THP_DISABLE can return "1 | flags", to effectively + * return "1" when no flags were specified for PR_SET_THP_DISABLE. + */ #define PR_SET_THP_DISABLE 41 +/* + * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE / + * VM_HUGEPAGE, MADV_COLLAPSE). + */ +# define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) #define PR_GET_THP_DISABLE 42 /* diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4f2a6e10ed5c..4e12be579140 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1022,12 +1022,9 @@ static int write_bpf_prog_info(struct feat_fd *ff, down_read(&env->bpf_progs.lock); - if (env->bpf_progs.infos_cnt == 0) - goto out; - ret = do_write(ff, &env->bpf_progs.infos_cnt, sizeof(env->bpf_progs.infos_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.infos_cnt == 0) goto out; root = &env->bpf_progs.infos; @@ -1067,13 +1064,10 @@ static int write_bpf_btf(struct feat_fd *ff, down_read(&env->bpf_progs.lock); - if (env->bpf_progs.btfs_cnt == 0) - goto out; - ret = do_write(ff, &env->bpf_progs.btfs_cnt, sizeof(env->bpf_progs.btfs_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.btfs_cnt == 0) goto out; root = &env->bpf_progs.btfs; diff --git a/tools/perf/util/libbfd.c b/tools/perf/util/libbfd.c index 01147fbf73b3..6434c2dccd4a 100644 --- a/tools/perf/util/libbfd.c +++ b/tools/perf/util/libbfd.c @@ -38,6 +38,39 @@ struct a2l_data { asymbol **syms; }; +static bool perf_bfd_lock(void *bfd_mutex) +{ + mutex_lock(bfd_mutex); + return true; +} + +static bool perf_bfd_unlock(void *bfd_mutex) +{ + mutex_unlock(bfd_mutex); + return true; +} + +static void perf_bfd_init(void) +{ + static struct mutex bfd_mutex; + + mutex_init_recursive(&bfd_mutex); + + if (bfd_init() != BFD_INIT_MAGIC) { + pr_err("Error initializing libbfd\n"); + return; + } + if (!bfd_thread_init(perf_bfd_lock, perf_bfd_unlock, &bfd_mutex)) + pr_err("Error initializing libbfd threading\n"); +} + +static void ensure_bfd_init(void) +{ + static pthread_once_t bfd_init_once = PTHREAD_ONCE_INIT; + + pthread_once(&bfd_init_once, perf_bfd_init); +} + static int bfd_error(const char *string) { const char *errmsg; @@ -132,6 +165,7 @@ static struct a2l_data *addr2line_init(const char *path) bfd *abfd; struct a2l_data *a2l = NULL; + ensure_bfd_init(); abfd = bfd_openr(path, NULL); if (abfd == NULL) return NULL; @@ -288,6 +322,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) bfd *abfd; u64 start, len; + ensure_bfd_init(); abfd = bfd_openr(debugfile, NULL); if (!abfd) return -1; @@ -393,6 +428,7 @@ int libbfd__read_build_id(const char *filename, struct build_id *bid, bool block if (fd < 0) return -1; + ensure_bfd_init(); abfd = bfd_fdopenr(filename, /*target=*/NULL, fd); if (!abfd) return -1; @@ -421,6 +457,7 @@ int libbfd_filename__read_debuglink(const char *filename, char *debuglink, asection *section; bfd *abfd; + ensure_bfd_init(); abfd = bfd_openr(filename, NULL); if (!abfd) return -1; @@ -480,6 +517,7 @@ int symbol__disassemble_bpf_libbfd(struct symbol *sym __maybe_unused, memset(tpath, 0, sizeof(tpath)); perf_exe(tpath, sizeof(tpath)); + ensure_bfd_init(); bfdf = bfd_openr(tpath, NULL); if (bfdf == NULL) abort(); diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c index bca7f0717f35..7aa1f3f55a7d 100644 --- a/tools/perf/util/mutex.c +++ b/tools/perf/util/mutex.c @@ -17,7 +17,7 @@ static void check_err(const char *fn, int err) #define CHECK_ERR(err) check_err(__func__, err) -static void __mutex_init(struct mutex *mtx, bool pshared) +static void __mutex_init(struct mutex *mtx, bool pshared, bool recursive) { pthread_mutexattr_t attr; @@ -27,21 +27,27 @@ static void __mutex_init(struct mutex *mtx, bool pshared) /* In normal builds enable error checking, such as recursive usage. */ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); #endif + if (recursive) + CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)); if (pshared) CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); - CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr)); CHECK_ERR(pthread_mutexattr_destroy(&attr)); } void mutex_init(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/false); + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/false); } void mutex_init_pshared(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/true); + __mutex_init(mtx, /*pshared=*/true, /*recursive=*/false); +} + +void mutex_init_recursive(struct mutex *mtx) +{ + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/true); } void mutex_destroy(struct mutex *mtx) diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h index 38458f00846f..70232d8d094f 100644 --- a/tools/perf/util/mutex.h +++ b/tools/perf/util/mutex.h @@ -104,6 +104,8 @@ void mutex_init(struct mutex *mtx); * process-private attribute. */ void mutex_init_pshared(struct mutex *mtx); +/* Initializes a mutex that may be recursively held on the same thread. */ +void mutex_init_recursive(struct mutex *mtx); void mutex_destroy(struct mutex *mtx); void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cc26b7bf302b..948d3e8ad782 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -112,9 +112,13 @@ static bool symbol_type__filter(char __symbol_type) // 'N' first seen in: // ffffffff9b35d130 N __pfx__RNCINvNtNtNtCsbDUBuN8AbD4_4core4iter8adapters3map12map_try_foldjNtCs6vVzKs5jPr6_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ // a seemingly Rust mangled name + // Ditto for '1': + // root@x1:~# grep ' 1 ' /proc/kallsyms + // ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + // ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ char symbol_type = toupper(__symbol_type); return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' || - __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N'; + __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N' || __symbol_type == '1'; } static int prefix_underscores_count(const char *str) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 70b28c1e653e..f2a2fd236ca8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -50,6 +50,7 @@ CONFIG_IPV6_SIT=y CONFIG_IPV6_TUNNEL=y CONFIG_KEYS=y CONFIG_LIRC=y +CONFIG_LIVEPATCH=y CONFIG_LWTUNNEL=y CONFIG_MODULE_SIG=y CONFIG_MODULE_SRCVERSION_ALL=y @@ -111,6 +112,8 @@ CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y CONFIG_PACKET=y CONFIG_RC_CORE=y +CONFIG_SAMPLES=y +CONFIG_SAMPLE_LIVEPATCH=m CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SYN_COOKIES=y diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c index bb143de68875..e27d66b75fb1 100644 --- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -144,11 +144,17 @@ static void test_parse_test_list_file(void) if (!ASSERT_OK(ferror(fp), "prepare tmp")) goto out_fclose; + if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp")) + goto out_fclose; + init_test_filter_set(&set); - ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"); + if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file")) + goto out_fclose; + + if (!ASSERT_EQ(set.cnt, 4, "test count")) + goto out_free_set; - ASSERT_EQ(set.cnt, 4, "test count"); ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count"); ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name"); @@ -158,8 +164,8 @@ static void test_parse_test_list_file(void) ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name"); ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name"); +out_free_set: free_test_filter_set(&set); - out_fclose: fclose(fp); out_remove: diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c new file mode 100644 index 000000000000..72aa5376c30e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "testing_helpers.h" +#include "livepatch_trampoline.skel.h" + +static int load_livepatch(void) +{ + char path[4096]; + + /* CI will set KBUILD_OUTPUT */ + snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko", + getenv("KBUILD_OUTPUT") ? : "../../../.."); + + return load_module(path, env_verbosity > VERBOSE_NONE); +} + +static void unload_livepatch(void) +{ + /* Disable the livepatch before unloading the module */ + system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled"); + + unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE); +} + +static void read_proc_cmdline(void) +{ + char buf[4096]; + int fd, ret; + + fd = open("/proc/cmdline", O_RDONLY); + if (!ASSERT_OK_FD(fd, "open /proc/cmdline")) + return; + + ret = read(fd, buf, sizeof(buf)); + if (!ASSERT_GT(ret, 0, "read /proc/cmdline")) + goto out; + + ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp"); + +out: + close(fd); +} + +static void __test_livepatch_trampoline(bool fexit_first) +{ + struct livepatch_trampoline *skel = NULL; + int err; + + skel = livepatch_trampoline__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + skel->bss->my_pid = getpid(); + + if (!fexit_first) { + /* fentry program is loaded first by default */ + err = livepatch_trampoline__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + } else { + /* Manually load fexit program first. */ + skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline); + if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit")) + goto out; + + skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline); + if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry")) + goto out; + } + + read_proc_cmdline(); + + ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit"); + ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit"); +out: + livepatch_trampoline__destroy(skel); +} + +void test_livepatch_trampoline(void) +{ + int retry_cnt = 0; + +retry: + if (load_livepatch()) { + if (retry_cnt) { + ASSERT_OK(1, "load_livepatch"); + goto out; + } + /* + * Something else (previous run of the same test?) loaded + * the KLP module. Unload the KLP module and retry. + */ + unload_livepatch(); + retry_cnt++; + goto retry; + } + + if (test__start_subtest("fentry_first")) + __test_livepatch_trampoline(false); + + if (test__start_subtest("fexit_first")) + __test_livepatch_trampoline(true); +out: + unload_livepatch(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index f8eb7f9d4fd2..8fade8bdc451 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -6,11 +6,13 @@ #include <netinet/in.h> #include <test_progs.h> #include <unistd.h> +#include <errno.h> #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" +#include "mptcp_sockmap.skel.h" #define NS_TEST "mptcp_ns" #define ADDR_1 "10.0.1.1" @@ -436,6 +438,142 @@ close_cgroup: close(cgroup_fd); } +/* Test sockmap on MPTCP server handling non-mp-capable clients. */ +static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, client_fd1 = -1, client_fd2 = -1; + int server_fd1 = -1, server_fd2 = -1, sent, recvd; + char snd[9] = "123456789"; + char rcv[10]; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client without MPTCP enabled */ + client_fd1 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd1 = accept(listen_fd, NULL, 0); + skel->bss->sk_index = 1; + client_fd2 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd2 = accept(listen_fd, NULL, 0); + /* test normal redirect behavior: data sent by client_fd1 can be + * received by client_fd2 + */ + skel->bss->redirect_idx = 1; + sent = send(client_fd1, snd, sizeof(snd), 0); + if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)")) + goto end; + + /* try to recv more bytes to avoid truncation check */ + recvd = recv(client_fd2, rcv, sizeof(rcv), 0); + if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)")) + goto end; + +end: + if (client_fd1 >= 0) + close(client_fd1); + if (client_fd2 >= 0) + close(client_fd2); + if (server_fd1 >= 0) + close(server_fd1); + if (server_fd2 >= 0) + close(server_fd2); + close(listen_fd); +} + +/* Test sockmap rejection of MPTCP sockets - both server and client sides. */ +static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, server_fd = -1, client_fd1 = -1; + int err, zero = 0; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client with MPTCP enabled */ + client_fd1 = connect_to_fd(listen_fd, 0); + if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1")) + goto end; + + /* bpf_sock_map_update() called from sockops should reject MPTCP sk */ + if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject")) + goto end; + + server_fd = accept(listen_fd, NULL, 0); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &server_fd, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed")) + goto end; + + /* MPTCP client should also be disallowed */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &client_fd1, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed")) + goto end; +end: + if (client_fd1 >= 0) + close(client_fd1); + if (server_fd >= 0) + close(server_fd); + close(listen_fd); +} + +static void test_mptcp_sockmap(void) +{ + struct mptcp_sockmap *skel; + struct netns_obj *netns; + int cgroup_fd, err; + + cgroup_fd = test__join_cgroup("/mptcp_sockmap"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap")) + return; + + skel = mptcp_sockmap__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap")) + goto close_cgroup; + + skel->links.mptcp_sockmap_inject = + bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap")) + goto skel_destroy; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect), + bpf_map__fd(skel->maps.sock_map), + BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap")) + goto skel_destroy; + + if (endpoint_init("subflow") < 0) + goto close_netns; + + test_sockmap_with_mptcp_fallback(skel); + test_sockmap_reject_mptcp(skel); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_sockmap__destroy(skel); +close_cgroup: + close(cgroup_fd); +} + void test_mptcp(void) { if (test__start_subtest("base")) @@ -444,4 +582,6 @@ void test_mptcp(void) test_mptcpify(); if (test__start_subtest("subflow")) test_subflow(); + if (test__start_subtest("sockmap")) + test_mptcp_sockmap(); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c new file mode 100644 index 000000000000..c9efdd2a5b18 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "stacktrace_ips.skel.h" + +#ifdef __x86_64__ +static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...) +{ + __u64 ips[PERF_MAX_STACK_DEPTH]; + struct ksyms *ksyms = NULL; + int i, err = 0; + va_list args; + + /* sorted by addr */ + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) + return -1; + + /* unlikely, but... */ + if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max")) + return -1; + + err = bpf_map_lookup_elem(fd, &key, ips); + if (err) + goto out; + + /* + * Compare all symbols provided via arguments with stacktrace ips, + * and their related symbol addresses.t + */ + va_start(args, cnt); + + for (i = 0; i < cnt; i++) { + unsigned long val; + struct ksym *ksym; + + val = va_arg(args, unsigned long); + ksym = ksym_search_local(ksyms, ips[i]); + if (!ASSERT_OK_PTR(ksym, "ksym_search_local")) + break; + ASSERT_EQ(ksym->addr, val, "stack_cmp"); + } + + va_end(args); + +out: + free_kallsyms_local(ksyms); + return err; +} + +static void test_stacktrace_ips_kprobe_multi(bool retprobe) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, + .retprobe = retprobe + ); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts( + skel->progs.kprobe_multi_test, + "bpf_testmod_stacktrace_test", &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void test_stacktrace_ips_raw_tp(void) +{ + __u32 info_len = sizeof(struct bpf_prog_info); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_prog_info info = {}; + struct stacktrace_ips *skel; + __u64 bpf_prog_ksym = 0; + int err; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.rawtp_test = bpf_program__attach_raw_tracepoint( + skel->progs.rawtp_test, + "bpf_testmod_test_read"); + if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint")) + goto cleanup; + + /* get bpf program address */ + info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym); + info.nr_jited_ksyms = 1; + err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test), + &info, &info_len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2, + bpf_prog_ksym, + ksym_get_addr("bpf_trace_run2")); + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void __test_stacktrace_ips(void) +{ + if (test__start_subtest("kprobe_multi")) + test_stacktrace_ips_kprobe_multi(false); + if (test__start_subtest("kretprobe_multi")) + test_stacktrace_ips_kprobe_multi(true); + if (test__start_subtest("raw_tp")) + test_stacktrace_ips_raw_tp(); +} +#else +static void __test_stacktrace_ips(void) +{ + test__skip(); +} +#endif + +void test_stacktrace_ips(void) +{ + __test_stacktrace_ips(); +} diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c index 05fa5ce7fc59..d00fd570255a 100644 --- a/tools/testing/selftests/bpf/progs/iters_looping.c +++ b/tools/testing/selftests/bpf/progs/iters_looping.c @@ -161,3 +161,56 @@ int simplest_loop(void *ctx) return 0; } + +__used +static void iterator_with_diff_stack_depth(int x) +{ + struct bpf_iter_num iter; + + asm volatile ( + "if r1 == 42 goto 0f;" + "*(u64 *)(r10 - 128) = 0;" + "0:" + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + /* consume next item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "goto 1b;" + "2:" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common, "r6" + ); +} + +SEC("socket") +__success +__naked int widening_stack_size_bug(void *ctx) +{ + /* + * Depending on iterator_with_diff_stack_depth() parameter value, + * subprogram stack depth is either 8 or 128 bytes. Arrange values so + * that it is 128 on a first call and 8 on a second. This triggered a + * bug in verifier's widen_imprecise_scalars() logic. + */ + asm volatile ( + "r6 = 0;" + "r1 = 0;" + "1:" + "call iterator_with_diff_stack_depth;" + "r1 = 42;" + "r6 += 1;" + "if r6 < 2 goto 1b;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c new file mode 100644 index 000000000000..15579d5bcd91 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +int fentry_hit; +int fexit_hit; +int my_pid; + +SEC("fentry/cmdline_proc_show") +int BPF_PROG(fentry_cmdline) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + fentry_hit = 1; + return 0; +} + +SEC("fexit/cmdline_proc_show") +int BPF_PROG(fexit_cmdline) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + fexit_hit = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c new file mode 100644 index 000000000000..d4eef0cbadb9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +int sk_index; +int redirect_idx; +int trace_port; +int helper_ret; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 100); +} sock_map SEC(".maps"); + +SEC("sockops") +int mptcp_sockmap_inject(struct bpf_sock_ops *skops) +{ + struct bpf_sock *sk; + + /* only accept specified connection */ + if (skops->local_port != trace_port || + skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) + return 1; + + sk = skops->sk; + if (!sk) + return 1; + + /* update sk handler */ + helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST); + + return 1; +} + +SEC("sk_skb/stream_verdict") +int mptcp_sockmap_redirect(struct __sk_buff *skb) +{ + /* redirect skb to the sk under sock_map[redirect_idx] */ + return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0); +} diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c new file mode 100644 index 000000000000..a96c8150d7f5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16384); + __type(key, __u32); + __type(value, stack_trace_t); +} stackmap SEC(".maps"); + +extern bool CONFIG_UNWINDER_ORC __kconfig __weak; + +/* + * This function is here to have CONFIG_UNWINDER_ORC + * used and added to object BTF. + */ +int unused(void) +{ + return CONFIG_UNWINDER_ORC ? 0 : 1; +} + +__u32 stack_key; + +SEC("kprobe.multi") +int kprobe_multi_test(struct pt_regs *ctx) +{ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + +SEC("raw_tp/bpf_testmod_test_read") +int rawtp_test(void *ctx) +{ + /* Skip ebpf program entry in the stack. */ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c index b4a0d0cc8ec8..3662515f0107 100644 --- a/tools/testing/selftests/bpf/progs/stream_fail.c +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -10,7 +10,7 @@ SEC("syscall") __failure __msg("Possibly NULL pointer passed") int stream_vprintk_null_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL); return 0; } @@ -18,7 +18,7 @@ SEC("syscall") __failure __msg("R3 type=scalar expected=") int stream_vprintk_scalar_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL); return 0; } @@ -26,7 +26,7 @@ SEC("syscall") __failure __msg("arg#1 doesn't point to a const string") int stream_vprintk_string_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c index 23217f06a3ec..663a80990f8f 100644 --- a/tools/testing/selftests/bpf/progs/task_work.c +++ b/tools/testing/selftests/bpf/progs/task_work.c @@ -66,7 +66,7 @@ int oncpu_hash_map(struct pt_regs *args) if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL); return 0; } @@ -80,7 +80,7 @@ int oncpu_array_map(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL); + bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL); return 0; } @@ -102,6 +102,6 @@ int oncpu_lru_map(struct pt_regs *args) work = bpf_map_lookup_elem(&lrumap, &key); if (!work || work->data[0]) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c index 77fe8f28facd..1270953fd092 100644 --- a/tools/testing/selftests/bpf/progs/task_work_fail.c +++ b/tools/testing/selftests/bpf/progs/task_work_fail.c @@ -53,7 +53,7 @@ int mismatch_map(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL); return 0; } @@ -65,7 +65,7 @@ int no_map_task_work(struct pt_regs *args) struct bpf_task_work tw; task = bpf_get_current_task_btf(); - bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL); return 0; } @@ -76,7 +76,7 @@ int task_work_null(struct pt_regs *args) struct task_struct *task; task = bpf_get_current_task_btf(); - bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL); return 0; } @@ -91,6 +91,6 @@ int map_null(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c index 90fca06fff56..55e555f7f41b 100644 --- a/tools/testing/selftests/bpf/progs/task_work_stress.c +++ b/tools/testing/selftests/bpf/progs/task_work_stress.c @@ -51,8 +51,8 @@ int schedule_task_work(void *ctx) if (!work) return 0; } - err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap, - process_work, NULL); + err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap, + process_work, NULL); if (err) __sync_fetch_and_add(&schedule_error, 1); else diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 6630a92b1b47..1204fbc58178 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -225,7 +225,7 @@ int trusted_to_untrusted(void *ctx) } char mem[16]; -u32 off; +u32 offset; SEC("tp_btf/sys_enter") __success @@ -240,9 +240,9 @@ int anything_to_untrusted(void *ctx) /* scalar to untrusted */ subprog_untrusted(0); /* variable offset to untrusted (map) */ - subprog_untrusted((void *)mem + off); + subprog_untrusted((void *)mem + offset); /* variable offset to untrusted (trusted) */ - subprog_untrusted((void *)bpf_get_current_task_btf() + off); + subprog_untrusted((void *)bpf_get_current_task_btf() + offset); return 0; } @@ -298,12 +298,12 @@ int anything_to_untrusted_mem(void *ctx) /* scalar to untrusted mem */ subprog_void_untrusted(0); /* variable offset to untrusted mem (map) */ - subprog_void_untrusted((void *)mem + off); + subprog_void_untrusted((void *)mem + offset); /* variable offset to untrusted mem (trusted) */ - subprog_void_untrusted(bpf_get_current_task_btf() + off); + subprog_void_untrusted(bpf_get_current_task_btf() + offset); /* variable offset to untrusted char/enum (map) */ - subprog_char_untrusted(mem + off); - subprog_enum_untrusted((void *)mem + off); + subprog_char_untrusted(mem + offset); + subprog_enum_untrusted((void *)mem + offset); return 0; } diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 8074bc5f6f20..ed0a4721d8fd 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -417,6 +417,30 @@ noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d, return a + (long)b + c + d + (long)e + f + g + h + i + j + k; } +noinline void bpf_testmod_stacktrace_test(void) +{ + /* used for stacktrace test as attach function */ + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_3(void) +{ + bpf_testmod_stacktrace_test(); + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_2(void) +{ + bpf_testmod_stacktrace_test_3(); + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_1(void) +{ + bpf_testmod_stacktrace_test_2(); + asm volatile (""); +} + int bpf_testmod_fentry_ok; noinline ssize_t @@ -497,6 +521,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, 21, 22, 23, 24, 25, 26) != 231) goto out; + bpf_testmod_stacktrace_test_1(); + bpf_testmod_fentry_ok = 1; out: return -EIO; /* always fail */ diff --git a/tools/testing/selftests/cachestat/.gitignore b/tools/testing/selftests/cachestat/.gitignore index d6c30b43a4bb..abbb13b6e96b 100644 --- a/tools/testing/selftests/cachestat/.gitignore +++ b/tools/testing/selftests/cachestat/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only test_cachestat +tmpshmcstat diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c index c952640f163b..ab838bcb9ec5 100644 --- a/tools/testing/selftests/cachestat/test_cachestat.c +++ b/tools/testing/selftests/cachestat/test_cachestat.c @@ -226,7 +226,7 @@ bool run_cachestat_test(enum file_type type) int syscall_ret; size_t compute_len = PS * 512; struct cachestat_range cs_range = { PS, compute_len }; - char *filename = "tmpshmcstat"; + char *filename = "tmpshmcstat", *map; struct cachestat cs; bool ret = true; int fd; @@ -257,7 +257,7 @@ bool run_cachestat_test(enum file_type type) } break; case FILE_MMAP: - char *map = mmap(NULL, filesize, PROT_READ | PROT_WRITE, + map = mmap(NULL, filesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index 9dc90a1b386d..7ab2824ed7b5 100644 --- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -25,6 +25,26 @@ static inline int values_close(long a, long b, int err) return labs(a - b) <= (a + b) / 100 * err; } +/* + * Checks if two given values differ by less than err% of their sum and assert + * with detailed debug info if not. + */ +static inline int values_close_report(long a, long b, int err) +{ + long diff = labs(a - b); + long limit = (a + b) / 100 * err; + double actual_err = (a + b) ? (100.0 * diff / (a + b)) : 0.0; + int close = diff <= limit; + + if (!close) + fprintf(stderr, + "[FAIL] actual=%ld expected=%ld | diff=%ld | limit=%ld | " + "tolerance=%d%% | actual_error=%.2f%%\n", + a, b, diff, limit, err, actual_err); + + return close; +} + extern ssize_t read_text(const char *path, char *buf, size_t max_len); extern ssize_t write_text(const char *path, char *buf, ssize_t len); diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index 2a60e6c41940..d54e2317efff 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -219,7 +219,7 @@ static int test_cpucg_stats(const char *root) if (user_usec <= 0) goto cleanup; - if (!values_close(usage_usec, expected_usage_usec, 1)) + if (!values_close_report(usage_usec, expected_usage_usec, 1)) goto cleanup; ret = KSFT_PASS; @@ -291,7 +291,7 @@ static int test_cpucg_nice(const char *root) user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec"); - if (!values_close(nice_usec, expected_nice_usec, 1)) + if (!values_close_report(nice_usec, expected_nice_usec, 1)) goto cleanup; ret = KSFT_PASS; @@ -404,7 +404,7 @@ overprovision_validate(const struct cpu_hogger *children, int num_children) goto cleanup; delta = children[i + 1].usage - children[i].usage; - if (!values_close(delta, children[0].usage, 35)) + if (!values_close_report(delta, children[0].usage, 35)) goto cleanup; } @@ -444,7 +444,7 @@ underprovision_validate(const struct cpu_hogger *children, int num_children) int ret = KSFT_FAIL, i; for (i = 0; i < num_children - 1; i++) { - if (!values_close(children[i + 1].usage, children[0].usage, 15)) + if (!values_close_report(children[i + 1].usage, children[0].usage, 15)) goto cleanup; } @@ -573,16 +573,16 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned) nested_leaf_usage = leaf[1].usage + leaf[2].usage; if (overprovisioned) { - if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) + if (!values_close_report(leaf[0].usage, nested_leaf_usage, 15)) goto cleanup; - } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15)) + } else if (!values_close_report(leaf[0].usage * 2, nested_leaf_usage, 15)) goto cleanup; child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec"); if (child_usage <= 0) goto cleanup; - if (!values_close(child_usage, nested_leaf_usage, 1)) + if (!values_close_report(child_usage, nested_leaf_usage, 1)) goto cleanup; ret = KSFT_PASS; @@ -691,7 +691,7 @@ static int test_cpucg_max(const char *root) expected_usage_usec = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (!values_close(usage_usec, expected_usage_usec, 10)) + if (!values_close_report(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; @@ -762,7 +762,7 @@ static int test_cpucg_max_nested(const char *root) expected_usage_usec = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (!values_close(usage_usec, expected_usage_usec, 10)) + if (!values_close_report(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 6e41635bd55a..71ee69e524d7 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -18,6 +18,7 @@ TEST_PROGS := \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ netcons_sysdata.sh \ + netcons_torture.sh \ netpoll_basic.py \ ping.py \ psp.py \ diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 402d4ee84f2e..6c5c60adb5e8 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -14,6 +14,7 @@ TEST_PROGS := \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ + netcons_over_bonding.sh \ # end of TEST_PROGS TEST_FILES := \ @@ -24,6 +25,7 @@ TEST_FILES := \ TEST_INCLUDES := \ ../../../net/lib.sh \ + ../lib/sh/lib_netcons.sh \ ../../../net/forwarding/lib.sh \ # end of TEST_INCLUDES diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 6bb290abd48b..991494376223 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -1,5 +1,6 @@ CONFIG_BONDING=y CONFIG_BRIDGE=y +CONFIG_CONFIGFS_FS=y CONFIG_DUMMY=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y @@ -9,6 +10,9 @@ CONFIG_MACVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y CONFIG_NET_CLS_MATCHALL=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y CONFIG_NETDEVSIM=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh new file mode 100755 index 000000000000..477cc9379500 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh @@ -0,0 +1,361 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 +# +# This selftest exercises trying to have multiple netpoll users at the same +# time. +# +# This selftest has multiple smalls test inside, and the goal is to +# get interfaces with bonding and netconsole in different orders in order +# to catch any possible issue. +# +# The main test composes of four interfaces being created using netdevsim; two +# of them are bonded to serve as the netconsole's transmit interface. The +# remaining two interfaces are similarly bonded and assigned to a separate +# network namespace, which acts as the receive interface, where socat monitors +# for incoming messages. +# +# A netconsole message is then sent to ensure it is properly received across +# this configuration. +# +# Later, run a few other tests, to make sure that bonding and netconsole +# cannot coexist. +# +# The test's objective is to exercise netpoll usage when managed simultaneously +# by multiple subsystems (netconsole and bonding). +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true +modprobe bonding 2> /dev/null || true +modprobe veth 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup_bond EXIT + +FORMAT="extended" +IP_VERSION="ipv4" +VETH0="veth"$(( RANDOM % 256)) +VETH1="veth"$((256 + RANDOM % 256)) +TXNS="" +RXNS="" + +# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with +# the bonding interfaces +function setup_bonding_ifaces() { + local RAND=$(( RANDOM % 100 )) + BOND_TX_MAIN_IF="bond_tx_$RAND" + BOND_RX_MAIN_IF="bond_rx_$RAND" + + # Setup TX + if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + then + echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2 + # only clean nsim ifaces and namespace. Nothing else has been + # initialized + cleanup_bond_nsim + trap - EXIT + exit "${ksft_skip}" + fi + + # create_netdevsim() got the interface up, but it needs to be down + # before being enslaved. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # Setup RX + ip -n "${RXNS}" \ + link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX_MAIN_IF}" up + + export DSTIF="${BOND_RX_MAIN_IF}" + export SRCIF="${BOND_TX_MAIN_IF}" +} + +# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface +# and the other two will be bond to the RX interface (on the other namespace) +function create_ifaces_bond() { + BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}") + BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}") + BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}") + BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}") +} + +# netdevsim link BOND_TX to BOND_RX interfaces +function link_ifaces_bond() { + local BOND_TX1_SLAVE_IFIDX + local BOND_TX2_SLAVE_IFIDX + local BOND_RX1_SLAVE_IFIDX + local BOND_RX2_SLAVE_IFIDX + local TXNS_FD + local RXNS_FD + + BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex) + BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex) + BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex) + BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex) + + exec {TXNS_FD}</var/run/netns/"${TXNS}" + exec {RXNS_FD}</var/run/netns/"${RXNS}" + + # Linking TX ifaces to the RX ones (on the other namespace) + echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + + exec {TXNS_FD}<&- + exec {RXNS_FD}<&- +} + +function create_all_ifaces() { + # setup_ns function is coming from lib.sh + setup_ns TXNS RXNS + export NAMESPACE="${RXNS}" + + # Create two interfaces for RX and two for TX + create_ifaces_bond + # Link netlink ifaces + link_ifaces_bond +} + +# configure DSTIF and SRCIF IPs +function configure_ifaces_ips() { + local IP_VERSION=${1:-"ipv4"} + select_ipv4_or_ipv6 "${IP_VERSION}" + + ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}" + ip -n "${RXNS}" link set "${DSTIF}" up + + ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}" + ip -n "${TXNS}" link set "${SRCIF}" up +} + +function test_enable_netpoll_on_enslaved_iface() { + echo 0 > "${NETCONS_PATH}"/enabled + + # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and + # linked to BOND_RX1_SLAVE_IF inside the namespace. + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # This should fail with the following message in dmesg: + # netpoll: netconsole: ethX is a slave device, aborting + set +e + enable_netcons_ns 2> /dev/null + set -e + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Bonding and netpoll cannot co-exists." >&2 + exit "${ksft_fail}" + fi +} + +function test_delete_bond_and_reenable_target() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond + + # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore + # netpoll can be plugged in there + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # this should work, since the interface is not enslaved + enable_netcons_ns + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Unable to start netpoll on an unbond iface." >&2 + exit "${ksft_fail}" + fi +} + +# Send a netconsole message to the netconsole target +function test_send_netcons_msg_through_bond_iface() { + # Listen for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat +} + +# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF. +# Given BOND_TX_MAIN_IF was deleted, recreate it first +function test_enslave_netcons_enabled_iface { + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2 + exit "${ksft_fail}" + fi + + # recreate the bonding iface. it got deleted by previous + # test (test_delete_bond_and_reenable_target) + ip -n "${TXNS}" \ + link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + + # sub-interface need to be down before attaching to bonding + # This will also disable netconsole. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2 + exit "${ksft_fail}" + fi +} + +# Get netconsole enabled on a bonding interface and attach a second +# sub-interface. +function test_enslave_iface_to_bond { + # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now + echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name + enable_netcons_ns + + # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is + # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF. + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2 + exit "${ksft_fail}" + fi +} + +function test_enslave_iff_disabled_netpoll_iface { + local ret + + # Create two interfaces. veth interfaces it known to have + # IFF_DISABLE_NETPOLL set + if ! ip link add "${VETH0}" type veth peer name "${VETH1}" + then + echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2 + exit "${ksft_skip}" + fi + set +e + # This will print RTNETLINK answers: Device or resource busy + ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null + ret=$? + set -e + if [[ $ret -eq 0 ]] + then + echo "test failed: veth interface could not be enslaved" + exit "${ksft_fail}" + fi +} + +# Given that netconsole picks the current net namespace, we need to enable it +# from inside the TXNS namespace +function enable_netcons_ns() { + ip netns exec "${TXNS}" sh -c \ + "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled" +} + +#################### +# Tests start here # +#################### + +# Create regular interfaces using netdevsim and link them +create_all_ifaces + +# Setup the bonding interfaces +# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF +# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF +setup_bonding_ifaces + +# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF +configure_ifaces_ips "${IP_VERSION}" + +_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}" +enable_netcons_ns +set_user_data + +# Test #1 : Create an bonding interface and attach netpoll into +# the bonding interface. Netconsole/netpoll should work on +# the bonding interface. +test_send_netcons_msg_through_bond_iface +echo "test #1: netpoll on bonding interface worked. Test passed" >&2 + +# Test #2: Attach netpoll to an enslaved interface +# Try to attach netpoll to an enslaved sub-interface (while still being part of +# a bonding interface), which shouldn't be allowed +test_enable_netpoll_on_enslaved_iface +echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2 + +# Test #3: Unplug the sub-interface from bond and enable netconsole +# Detach the interface from a bonding interface and attach netpoll again +test_delete_bond_and_reenable_target +echo "test #3: Able to attach to an unbound interface. Test passed." >&2 + +# Test #4: Enslave a sub-interface that had netconsole enabled +# Try to enslave an interface that has netconsole/netpoll enabled. +# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it +test_enslave_netcons_enabled_iface +echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2 + +# Test #5: Enslave a sub-interface to a bonding interface +# Enslave an interface to a bond interface that has netpoll attached +# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of +# it. Netconsole is currently disabled +test_enslave_iface_to_bond +echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2 + +# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface +# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is +# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface +# and it should fail, with netpoll: veth0 doesn't support polling +test_enslave_iff_disabled_netpoll_iface +echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2 + +cleanup_bond +trap - EXIT +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 8e1085e89647..87f89fd92f8c 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,11 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later +SRCIP="" # to be populated later SRCIP4="192.0.2.1" SRCIP6="fc00::1" DSTIF="" # to be populated later +DSTIP="" # to be populated later DSTIP4="192.0.2.2" DSTIP6="fc00::2" @@ -28,17 +30,23 @@ NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" # NAMESPACE will be populated by setup_ns with a random value NAMESPACE="" -# IDs for netdevsim +# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test +# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the +# same time. NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_BOND_TX_1=$((768 + RANDOM % 256)) +NSIM_BOND_TX_2=$((1024 + RANDOM % 256)) +NSIM_BOND_RX_1=$((1280 + RANDOM % 256)) +NSIM_BOND_RX_2=$((1536 + RANDOM % 256)) NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" +NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" # Used to create and delete namespaces source "${LIBDIR}"/../../../../net/lib.sh # Create netdevsim interfaces create_ifaces() { - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" udevadm settle 2> /dev/null || true @@ -113,31 +121,38 @@ function set_network() { configure_ip } -function create_dynamic_target() { - local FORMAT=${1:-"extended"} +function _create_dynamic_target() { + local FORMAT="${1:?FORMAT parameter required}" + local NCPATH="${2:?NCPATH parameter required}" DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') # Create a dynamic target - mkdir "${NETCONS_PATH}" + mkdir "${NCPATH}" - echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip - echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip - echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac - echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + echo "${DSTIP}" > "${NCPATH}"/remote_ip + echo "${SRCIP}" > "${NCPATH}"/local_ip + echo "${DSTMAC}" > "${NCPATH}"/remote_mac + echo "${SRCIF}" > "${NCPATH}"/dev_name if [ "${FORMAT}" == "basic" ] then # Basic target does not support release - echo 0 > "${NETCONS_PATH}"/release - echo 0 > "${NETCONS_PATH}"/extended + echo 0 > "${NCPATH}"/release + echo 0 > "${NCPATH}"/extended elif [ "${FORMAT}" == "extended" ] then - echo 1 > "${NETCONS_PATH}"/extended + echo 1 > "${NCPATH}"/extended fi +} - echo 1 > "${NETCONS_PATH}"/enabled +function create_dynamic_target() { + local FORMAT=${1:-"extended"} + local NCPATH=${2:-"$NETCONS_PATH"} + _create_dynamic_target "${FORMAT}" "${NCPATH}" + + echo 1 > "${NCPATH}"/enabled # This will make sure that the kernel was able to # load the netconsole driver configuration. The console message @@ -185,14 +200,26 @@ function do_cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } -function cleanup() { +function cleanup_netcons() { # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled + # do not fail if the target is already disabled + if [[ ! -d "${NETCONS_PATH}" ]] + then + # in some cases this is called before netcons path is created + return + fi + if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]] + then + echo 0 > "${NETCONS_PATH}"/enabled || true + fi # Remove all the keys that got created during the selftest find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete # Remove the configfs entry rmdir "${NETCONS_PATH}" +} +function cleanup() { + cleanup_netcons do_cleanup } @@ -369,3 +396,24 @@ function wait_for_port() { # more frequently on IPv6 sleep 1 } + +# Clean up netdevsim ifaces created for bonding test +function cleanup_bond_nsim() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond || true + ip -n "${RXNS}" \ + link delete "${BOND_RX_MAIN_IF}" type bond || true + + cleanup_netdevsim "$NSIM_BOND_TX_1" + cleanup_netdevsim "$NSIM_BOND_TX_2" + cleanup_netdevsim "$NSIM_BOND_RX_1" + cleanup_netdevsim "$NSIM_BOND_RX_2" +} + +# cleanup tests that use bonding interfaces +function cleanup_bond() { + cleanup_netcons + cleanup_bond_nsim + cleanup_all_ns + ip link delete "${VETH0}" || true +} diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh new file mode 100755 index 000000000000..2ce9ee3719d1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_torture.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Repeatedly send kernel messages, toggles netconsole targets on and off, +# creates and deletes targets in parallel, and toggles the source interface to +# simulate stress conditions. +# +# This test aims to verify the robustness of netconsole under dynamic +# configurations and concurrent operations. +# +# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make +# sure no issues is reported. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Number of times the main loop run +ITERATIONS=${1:-150} + +# Only test extended format +FORMAT="extended" +# And ipv6 only +IP_VERSION="ipv6" + +# Create, enable and delete some targets. +create_and_delete_random_target() { + COUNT=2 + RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_) + + if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \ + [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then + echo "Function didn't finish yet, skipping it." >&2 + return + fi + + # enable COUNT targets + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + + # Basic population so the target can come up + _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}" + done + + echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg + # disable them all + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]] + then + echo 0 > "${RND_TARGET_PATH}"/enabled + fi + rmdir "${RND_TARGET_PATH}" + done +} + +# Disable and enable the target mid-air, while messages +# are being transmitted. +toggle_netcons_target() { + for i in $(seq 2) + do + if [ ! -d "${NETCONS_PATH}" ] + then + break + fi + echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + # Try to enable a bit harder, given it might fail to enable + # Write to `enabled` might fail depending on the lock, which is + # highly contentious here + for _ in $(seq 5) + do + echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + done + done +} + +toggle_iface(){ + ip link set "${SRCIF}" down + ip link set "${SRCIF}" up +} + +# Start here + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network "${IP_VERSION}" +# Create a dynamic target for netconsole +create_dynamic_target "${FORMAT}" + +for i in $(seq "$ITERATIONS") +do + for _ in $(seq 10) + do + echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg + done + wait + + if (( i % 30 == 0 )); then + toggle_netcons_target & + fi + + if (( i % 50 == 0 )); then + # create some targets, enable them, send msg and disable + # all in a parallel thread + create_and_delete_random_target & + fi + + if (( i % 70 == 0 )); then + toggle_iface & + fi +done +wait + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index daf51113c827..df10c7243511 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -20,4 +20,8 @@ TEST_PROGS := \ udp_tunnel_nic.sh \ # end of TEST_PROGS +TEST_FILES := \ + ethtool-common.sh +# end of TEST_FILES + include ../../../lib.mk diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index c62165fabd0c..cfa16aa1f39a 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -20,6 +20,10 @@ sample_events() { echo 0 > tracing_on echo 0 > events/enable +# Clear functions caused by page cache; run sample_events twice +sample_events +sample_events + echo "Get the most frequently calling function" echo > trace sample_events diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py index 5d2ffa3d5977..ece0ba8e7d34 100644 --- a/tools/testing/selftests/hid/tests/test_multitouch.py +++ b/tools/testing/selftests/hid/tests/test_multitouch.py @@ -1752,6 +1752,52 @@ class TestWin8TSConfidence(BaseTest.TestWin8Multitouch): assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + @pytest.mark.skip_if_uhdev( + lambda uhdev: "Confidence" not in uhdev.fields, + "Device not compatible, missing Confidence usage", + ) + def test_mt_confidence_bad_multi_release(self): + """Check for the sticky finger being properly detected. + + We first inject 3 fingers, then release only the second. + After 100 ms, we should receive a generated event about the + 2 missing fingers being released. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + # send 3 touches + t0 = Touch(1, 50, 10) + t1 = Touch(2, 150, 100) + t2 = Touch(3, 250, 200) + r = uhdev.event([t0, t1, t2]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + # release the second + t1.tipswitch = False + r = uhdev.event([t1]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + # only the second is released + assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1 + assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1 + + # wait for the timer to kick in + time.sleep(0.2) + + events = uhdev.next_sync_events() + self.debug_reports([], uhdev, events) + + # now all 3 fingers are released + assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events + assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + + class TestElanXPS9360(BaseTest.TestWin8Multitouch): def create_device(self): return Digitizer( @@ -2086,3 +2132,12 @@ class Testsynaptics_06cb_ce08(BaseTest.TestPTP): input_info=(BusType.I2C, 0x06CB, 0xCE08), rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 01 09 02 a1 01 85 18 09 01 a1 00 05 09 19 01 29 03 46 00 00 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 06 00 ff 09 02 a1 01 85 20 09 01 a1 00 09 03 15 00 26 ff 00 35 00 46 ff 00 75 08 95 05 81 02 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 45 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 45 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0", ) + +class Testsynaptics_06cb_ce26(TestWin8TSConfidence): + def create_device(self): + return PTP( + "uhid test synaptics_06cb_ce26", + max_contacts=5, + input_info=(BusType.I2C, 0x06CB, 0xCE26), + rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0", + ) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 3eebf5e3b974..bb4d33dde3c8 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2638,6 +2638,8 @@ TEST_F(vfio_compat_mock_domain, map) ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + /* Unmap of empty is success */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); /* UNMAP_FLAG_ALL requires 0 iova/size */ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 772ca1db6e59..9f472c20c190 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -1044,8 +1044,8 @@ static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents) }; while (nvevents--) { - if (!ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), - &trigger_vevent_cmd)) + if (ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), + &trigger_vevent_cmd)) return -1; } return 0; diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index 91906414a474..993c9e38e729 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -1020,7 +1020,7 @@ static void set_counter_defaults(void) { const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; uint64_t freq = read_sysreg(CNTFRQ_EL0); - uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq); + int width = ilog2(MIN_ROLLOVER_SECS * freq); width = clamp(width, 56, 64); CVAL_MAX = GENMASK_ULL(width - 1, 0); diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c index 592b26ded779..d8fe17a6cc59 100644 --- a/tools/testing/selftests/kvm/arm64/external_aborts.c +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -359,6 +359,44 @@ static void test_mmio_ease(void) kvm_vm_free(vm); } +static void test_serror_amo_guest(void) +{ + /* + * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked) + * since the write is redirected to memory. But don't write (intentionally) + * broken code! + */ + sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0); + isb(); + + GUEST_SYNC(0); + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + /* + * KVM treats the effective value of AMO as 1 when + * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when + * unmasked. + */ + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror_amo(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + int main(void) { test_mmio_abort(); @@ -369,4 +407,9 @@ int main(void) test_serror_emulated(); test_mmio_ease(); test_s1ptw_abort(); + + if (!test_supports_el2()) + return 0; + + test_serror_amo(); } diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 011fad95dd02..0a3a94c4cca1 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -63,8 +63,13 @@ static struct feature_id_reg feat_id_regs[] = { REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP), REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), + REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), }; bool filter_reg(__u64 reg) @@ -345,9 +350,20 @@ static __u64 base_regs[] = { KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */ - ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 0, 2), + + /* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ + KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0), + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_TIMER_CNT, + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ @@ -704,6 +720,7 @@ static __u64 el2_regs[] = { SYS_REG(VMPIDR_EL2), SYS_REG(SCTLR_EL2), SYS_REG(ACTLR_EL2), + SYS_REG(SCTLR2_EL2), SYS_REG(HCR_EL2), SYS_REG(MDCR_EL2), SYS_REG(CPTR_EL2), @@ -755,6 +772,10 @@ static __u64 el2_regs[] = { SYS_REG(VSESR_EL2), }; +static __u64 el2_e2h0_regs[] = { + /* Empty */ +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -789,6 +810,15 @@ static __u64 el2_regs[] = { .regs = el2_regs, \ .regs_n = ARRAY_SIZE(el2_regs), \ } +#define EL2_E2H0_SUBLIST \ + EL2_SUBLIST, \ + { \ + .name = "EL2 E2H0", \ + .capability = KVM_CAP_ARM_EL2_E2H0, \ + .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \ + .regs = el2_e2h0_regs, \ + .regs_n = ARRAY_SIZE(el2_e2h0_regs), \ + } static struct vcpu_reg_list vregs_config = { .sublists = { @@ -897,6 +927,65 @@ static struct vcpu_reg_list el2_pauth_pmu_config = { }, }; +static struct vcpu_reg_list el2_e2h0_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, @@ -911,5 +1000,12 @@ struct vcpu_reg_list *vcpu_configs[] = { &el2_sve_pmu_config, &el2_pauth_config, &el2_pauth_pmu_config, + + &el2_e2h0_vregs_config, + &el2_e2h0_vregs_pmu_config, + &el2_e2h0_sve_config, + &el2_e2h0_sve_pmu_config, + &el2_e2h0_pauth_config, + &el2_e2h0_pauth_pmu_config, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8ff1e853f7f8..5e24f77868b5 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -249,11 +249,14 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_MPIDR_EL1); + GUEST_REG_SYNC(SYS_CLIDR_EL1); GUEST_REG_SYNC(SYS_CTR_EL0); GUEST_REG_SYNC(SYS_MIDR_EL1); GUEST_REG_SYNC(SYS_REVIDR_EL1); diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index 87922a89b134..687d04463983 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -123,6 +123,7 @@ static void guest_setup_gic(void) static void guest_code(size_t nr_lpis) { guest_setup_gic(); + local_irq_enable(); GUEST_SYNC(0); @@ -331,7 +332,7 @@ static void setup_vm(void) { int i; - vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index b3ca6737f304..e7d9aeb418d3 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -14,8 +14,6 @@ #include <linux/bitmap.h> #include <linux/falloc.h> #include <linux/sizes.h> -#include <setjmp.h> -#include <signal.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -24,7 +22,9 @@ #include "test_util.h" #include "ucall_common.h" -static void test_file_read_write(int fd) +static size_t page_size; + +static void test_file_read_write(int fd, size_t total_size) { char buf[64]; @@ -38,18 +38,22 @@ static void test_file_read_write(int fd) "pwrite on a guest_mem fd should fail"); } -static void test_mmap_supported(int fd, size_t page_size, size_t total_size) +static void test_mmap_cow(int fd, size_t size) +{ + void *mem; + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); +} + +static void test_mmap_supported(int fd, size_t total_size) { const char val = 0xaa; char *mem; size_t i; int ret; - mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); - TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); - - mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); memset(mem, val, total_size); for (i = 0; i < total_size; i++) @@ -68,45 +72,37 @@ static void test_mmap_supported(int fd, size_t page_size, size_t total_size) for (i = 0; i < total_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); - ret = munmap(mem, total_size); - TEST_ASSERT(!ret, "munmap() should succeed."); -} - -static sigjmp_buf jmpbuf; -void fault_sigbus_handler(int signum) -{ - siglongjmp(jmpbuf, 1); + kvm_munmap(mem, total_size); } -static void test_fault_overflow(int fd, size_t page_size, size_t total_size) +static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) { - struct sigaction sa_old, sa_new = { - .sa_handler = fault_sigbus_handler, - }; - size_t map_size = total_size * 4; const char val = 0xaa; char *mem; size_t i; - int ret; - mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); - sigaction(SIGBUS, &sa_new, &sa_old); - if (sigsetjmp(jmpbuf, 1) == 0) { - memset(mem, 0xaa, map_size); - TEST_ASSERT(false, "memset() should have triggered SIGBUS."); - } - sigaction(SIGBUS, &sa_old, NULL); + TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); + TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size])); - for (i = 0; i < total_size; i++) + for (i = 0; i < accessible_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); - ret = munmap(mem, map_size); - TEST_ASSERT(!ret, "munmap() should succeed."); + kvm_munmap(mem, map_size); +} + +static void test_fault_overflow(int fd, size_t total_size) +{ + test_fault_sigbus(fd, total_size, total_size * 4); +} + +static void test_fault_private(int fd, size_t total_size) +{ + test_fault_sigbus(fd, 0, total_size); } -static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) +static void test_mmap_not_supported(int fd, size_t total_size) { char *mem; @@ -117,7 +113,7 @@ static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) TEST_ASSERT_EQ(mem, MAP_FAILED); } -static void test_file_size(int fd, size_t page_size, size_t total_size) +static void test_file_size(int fd, size_t total_size) { struct stat sb; int ret; @@ -128,7 +124,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size) TEST_ASSERT_EQ(sb.st_blksize, page_size); } -static void test_fallocate(int fd, size_t page_size, size_t total_size) +static void test_fallocate(int fd, size_t total_size) { int ret; @@ -165,7 +161,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size) TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); } -static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) +static void test_invalid_punch_hole(int fd, size_t total_size) { struct { off_t offset; @@ -196,8 +192,7 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) } static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, - uint64_t guest_memfd_flags, - size_t page_size) + uint64_t guest_memfd_flags) { size_t size; int fd; @@ -214,7 +209,6 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) { int fd1, fd2, ret; struct stat st1, st2; - size_t page_size = getpagesize(); fd1 = __vm_create_guest_memfd(vm, page_size, 0); TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); @@ -239,9 +233,9 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) close(fd1); } -static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags) +static void test_guest_memfd_flags(struct kvm_vm *vm) { - size_t page_size = getpagesize(); + uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); uint64_t flag; int fd; @@ -260,43 +254,57 @@ static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags) } } -static void test_guest_memfd(unsigned long vm_type) +#define gmem_test(__test, __vm, __flags) \ +do { \ + int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \ + \ + test_##__test(fd, page_size * 4); \ + close(fd); \ +} while (0) + +static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) { - uint64_t flags = 0; - struct kvm_vm *vm; - size_t total_size; - size_t page_size; - int fd; + test_create_guest_memfd_multiple(vm); + test_create_guest_memfd_invalid_sizes(vm, flags); - page_size = getpagesize(); - total_size = page_size * 4; + gmem_test(file_read_write, vm, flags); - vm = vm_create_barebones_type(vm_type); + if (flags & GUEST_MEMFD_FLAG_MMAP) { + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { + gmem_test(mmap_supported, vm, flags); + gmem_test(fault_overflow, vm, flags); + } else { + gmem_test(fault_private, vm, flags); + } - if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP)) - flags |= GUEST_MEMFD_FLAG_MMAP; + gmem_test(mmap_cow, vm, flags); + } else { + gmem_test(mmap_not_supported, vm, flags); + } - test_create_guest_memfd_multiple(vm); - test_create_guest_memfd_invalid_sizes(vm, flags, page_size); + gmem_test(file_size, vm, flags); + gmem_test(fallocate, vm, flags); + gmem_test(invalid_punch_hole, vm, flags); +} - fd = vm_create_guest_memfd(vm, total_size, flags); +static void test_guest_memfd(unsigned long vm_type) +{ + struct kvm_vm *vm = vm_create_barebones_type(vm_type); + uint64_t flags; - test_file_read_write(fd); + test_guest_memfd_flags(vm); - if (flags & GUEST_MEMFD_FLAG_MMAP) { - test_mmap_supported(fd, page_size, total_size); - test_fault_overflow(fd, page_size, total_size); - } else { - test_mmap_not_supported(fd, page_size, total_size); - } + __test_guest_memfd(vm, 0); - test_file_size(fd, page_size, total_size); - test_fallocate(fd, page_size, total_size); - test_invalid_punch_hole(fd, page_size, total_size); + flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + if (flags & GUEST_MEMFD_FLAG_MMAP) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP); - test_guest_memfd_flags(vm, flags); + /* MMAP should always be supported if INIT_SHARED is supported. */ + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); - close(fd); kvm_vm_free(vm); } @@ -328,22 +336,26 @@ static void test_guest_memfd_guest(void) size_t size; int fd, i; - if (!kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP)) + if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS)) return; vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); - TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP), - "Default VM type should always support guest_memfd mmap()"); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, + "Default VM type should support MMAP, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED, + "Default VM type should support INIT_SHARED, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); size = vm->page_size; - fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP); + fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); - mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); memset(mem, 0xaa, size); - munmap(mem, size); + kvm_munmap(mem, size); virt_pg_map(vm, gpa, gpa); vcpu_args_set(vcpu, 2, gpa, size); @@ -351,8 +363,7 @@ static void test_guest_memfd_guest(void) TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); - mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); for (i = 0; i < size; i++) TEST_ASSERT_EQ(mem[i], 0xff); @@ -366,6 +377,8 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + page_size = getpagesize(); + /* * Not all architectures support KVM_CAP_VM_TYPES. However, those that * support guest_memfd have that support for the default VM type. diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 6f481475c135..ff928716574d 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -305,7 +305,17 @@ void test_wants_mte(void); void test_disable_default_vgic(void); bool vm_supports_el2(struct kvm_vm *vm); -static bool vcpu_has_el2(struct kvm_vcpu *vcpu) + +static inline bool test_supports_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + bool supported = vm_supports_el2(vm); + + kvm_vm_free(vm); + return supported; +} + +static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu) { return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); } diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 26cc30290e76..d3f3e455c031 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -286,6 +286,31 @@ static inline bool kvm_has_cap(long cap) #define __KVM_SYSCALL_ERROR(_name, _ret) \ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) +static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd, + off_t offset) +{ + void *mem; + + mem = mmap(NULL, size, prot, flags, fd, offset); + TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", + (int)(unsigned long)MAP_FAILED)); + + return mem; +} + +static inline void *kvm_mmap(size_t size, int prot, int flags, int fd) +{ + return __kvm_mmap(size, prot, flags, fd, 0); +} + +static inline void kvm_munmap(void *mem, size_t size) +{ + int ret; + + ret = munmap(mem, size); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); +} + /* * Use the "inner", double-underscore macro when reporting errors from within * other macros so that the name of ioctl() and not its literal numeric value @@ -1273,4 +1298,6 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); uint32_t guest_get_vcpuid(void); +bool kvm_arch_has_default_irqchip(void); + #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index c6ef895fbd9a..b4872ba8ed12 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -8,6 +8,8 @@ #ifndef SELFTEST_KVM_TEST_UTIL_H #define SELFTEST_KVM_TEST_UTIL_H +#include <setjmp.h> +#include <signal.h> #include <stdlib.h> #include <stdarg.h> #include <stdbool.h> @@ -78,6 +80,23 @@ do { \ __builtin_unreachable(); \ } while (0) +extern sigjmp_buf expect_sigbus_jmpbuf; +void expect_sigbus_handler(int signum); + +#define TEST_EXPECT_SIGBUS(action) \ +do { \ + struct sigaction sa_old, sa_new = { \ + .sa_handler = expect_sigbus_handler, \ + }; \ + \ + sigaction(SIGBUS, &sa_new, &sa_old); \ + if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \ + action; \ + TEST_FAIL("'%s' should have triggered SIGBUS", #action); \ + } \ + sigaction(SIGBUS, &sa_old, NULL); \ +} while (0) + size_t parse_size(const char *size); int64_t timespec_to_ns(struct timespec ts); diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c index 7c301b4c7005..5d7590d01868 100644 --- a/tools/testing/selftests/kvm/irqfd_test.c +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -89,11 +89,19 @@ static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) int main(int argc, char *argv[]) { pthread_t racing_thread; + struct kvm_vcpu *unused; int r, i; - /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */ - vm1 = vm_create(1); - vm2 = vm_create(1); + TEST_REQUIRE(kvm_arch_has_default_irqchip()); + + /* + * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also + * create an unused vCPU as certain architectures (like arm64) need to + * complete IRQ chip initialization after all possible vCPUs for a VM + * have been created. + */ + vm1 = vm_create_with_one_vcpu(&unused, NULL); + vm2 = vm_create_with_one_vcpu(&unused, NULL); WRITE_ONCE(__eventfd, kvm_new_eventfd()); diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 09f270545646..0e2f8ed90f30 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -15,6 +15,8 @@ #include "gic_v3.h" #include "processor.h" +#define GITS_COLLECTION_TARGET_SHIFT 16 + static u64 its_read_u64(unsigned long offset) { return readq_relaxed(GITS_BASE_GVA + offset); @@ -163,6 +165,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col) its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); } +static u64 procnum_to_rdbase(u32 vcpu_id) +{ + return vcpu_id << GITS_COLLECTION_TARGET_SHIFT; +} + #define GITS_CMDQ_POLL_ITERATIONS 0 static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) @@ -217,7 +224,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val its_encode_cmd(&cmd, GITS_CMD_MAPC); its_encode_collection(&cmd, collection_id); - its_encode_target(&cmd, vcpu_id); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); its_encode_valid(&cmd, valid); its_send_cmd(cmdq_base, &cmd); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 369a4c87dd8f..54f6d17c78f7 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -725,3 +725,8 @@ void kvm_arch_vm_release(struct kvm_vm *vm) if (vm->arch.has_gic) close(vm->arch.gic_fd); } + +bool kvm_arch_has_default_irqchip(void) +{ + return request_vgic && kvm_supports_vgic_v3(); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6743fbd9bd67..1a93d6361671 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -741,13 +741,11 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) int ret; if (vcpu->dirty_gfns) { - ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size); vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->run, vcpu_mmap_sz()); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->run, vcpu_mmap_sz()); ret = close(vcpu->fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); @@ -783,20 +781,16 @@ void kvm_vm_release(struct kvm_vm *vmp) static void __vm_mem_region_delete(struct kvm_vm *vm, struct userspace_mem_region *region) { - int ret; - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); rb_erase(®ion->hva_node, &vm->regions.hva_tree); hash_del(®ion->slot_node); sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); - ret = munmap(region->mmap_start, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_start, region->mmap_size); if (region->fd >= 0) { /* There's an extra map when using shared memory. */ - ret = munmap(region->mmap_alias, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_alias, region->mmap_size); close(region->fd); } if (region->region.guest_memfd >= 0) @@ -1053,12 +1047,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->fd = kvm_memfd_alloc(region->mmap_size, src_type == VM_MEM_SRC_SHARED_HUGETLB); - region->mmap_start = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_start != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); TEST_ASSERT(!is_backing_src_hugetlb(src_type) || region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), @@ -1129,12 +1120,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, /* If shared memory, create an alias. */ if (region->fd >= 0) { - region->mmap_alias = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_alias != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_alias = kvm_mmap(region->mmap_size, + PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); /* Align host alias address */ region->host_alias = align_ptr_up(region->mmap_alias, alignment); @@ -1344,10 +1333,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); - vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), - PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); - TEST_ASSERT(vcpu->run != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd); if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) vcpu->stats.fd = vcpu_get_stats_fd(vcpu); @@ -1794,9 +1781,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); - addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, - page_size * KVM_DIRTY_LOG_PAGE_OFFSET); - TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); vcpu->dirty_gfns = addr; vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); @@ -2344,3 +2330,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) pg = paddr >> vm->page_shift; return sparsebit_is_set(region->protected_phy_pages, pg); } + +__weak bool kvm_arch_has_default_irqchip(void) +{ + return false; +} diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 20cfe970e3e3..8ceeb17c819a 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 03eb99af9b8d..8a1848586a85 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,6 +18,13 @@ #include "test_util.h" +sigjmp_buf expect_sigbus_jmpbuf; + +void __attribute__((used)) expect_sigbus_handler(int signum) +{ + siglongjmp(expect_sigbus_jmpbuf, 1); +} + /* * Random number generator that is usable from guest code. This is the * Park-Miller LCG using standard constants. diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index c748cd9b2eef..b418502c5ecc 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1318,3 +1318,8 @@ bool sys_clocksource_is_based_on_tsc(void) return ret; } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 6a437d2be9fa..37b7e6524533 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -339,8 +339,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); fd = kvm_memfd_alloc(slot_size, hugepages); - mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); @@ -413,7 +412,7 @@ int main(int argc, char *argv[]) for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); - munmap(mem, slot_size / 2); + kvm_munmap(mem, slot_size / 2); /* Sanity check that the vCPUs actually ran. */ for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c index 0350a8896a2f..f04768c1d2e4 100644 --- a/tools/testing/selftests/kvm/pre_fault_memory_test.c +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -10,6 +10,7 @@ #include <test_util.h> #include <kvm_util.h> #include <processor.h> +#include <pthread.h> /* Arbitrarily chosen values */ #define TEST_SIZE (SZ_2M + PAGE_SIZE) @@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa) GUEST_DONE(); } -static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, - u64 left) +struct slot_worker_data { + struct kvm_vm *vm; + u64 gpa; + uint32_t flags; + bool worker_ready; + bool prefault_ready; + bool recreate_slot; +}; + +static void *delete_slot_worker(void *__data) +{ + struct slot_worker_data *data = __data; + struct kvm_vm *vm = data->vm; + + WRITE_ONCE(data->worker_ready, true); + + while (!READ_ONCE(data->prefault_ready)) + cpu_relax(); + + vm_mem_region_delete(vm, TEST_SLOT); + + while (!READ_ONCE(data->recreate_slot)) + cpu_relax(); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa, + TEST_SLOT, TEST_NPAGES, data->flags); + + return NULL; +} + +static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, + u64 size, u64 expected_left, bool private) { struct kvm_pre_fault_memory range = { - .gpa = gpa, + .gpa = base_gpa + offset, .size = size, .flags = 0, }; - u64 prev; + struct slot_worker_data data = { + .vm = vcpu->vm, + .gpa = base_gpa, + .flags = private ? KVM_MEM_GUEST_MEMFD : 0, + }; + bool slot_recreated = false; + pthread_t slot_worker; int ret, save_errno; + u64 prev; + + /* + * Concurrently delete (and recreate) the slot to test KVM's handling + * of a racing memslot deletion with prefaulting. + */ + pthread_create(&slot_worker, NULL, delete_slot_worker, &data); - do { + while (!READ_ONCE(data.worker_ready)) + cpu_relax(); + + WRITE_ONCE(data.prefault_ready, true); + + for (;;) { prev = range.size; ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); save_errno = errno; @@ -49,18 +98,65 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, "%sexpecting range.size to change on %s", ret < 0 ? "not " : "", ret < 0 ? "failure" : "success"); - } while (ret >= 0 ? range.size : save_errno == EINTR); - TEST_ASSERT(range.size == left, - "Completed with %lld bytes left, expected %" PRId64, - range.size, left); + /* + * Immediately retry prefaulting if KVM was interrupted by an + * unrelated signal/event. + */ + if (ret < 0 && save_errno == EINTR) + continue; + + /* + * Tell the worker to recreate the slot in order to complete + * prefaulting (if prefault didn't already succeed before the + * slot was deleted) and/or to prepare for the next testcase. + * Wait for the worker to exit so that the next invocation of + * prefaulting is guaranteed to complete (assuming no KVM bugs). + */ + if (!slot_recreated) { + WRITE_ONCE(data.recreate_slot, true); + pthread_join(slot_worker, NULL); + slot_recreated = true; + + /* + * Retry prefaulting to get a stable result, i.e. to + * avoid seeing random EAGAIN failures. Don't retry if + * prefaulting already succeeded, as KVM disallows + * prefaulting with size=0, i.e. blindly retrying would + * result in test failures due to EINVAL. KVM should + * always return success if all bytes are prefaulted, + * i.e. there is no need to guard against EAGAIN being + * returned. + */ + if (range.size) + continue; + } + + /* + * All done if there are no remaining bytes to prefault, or if + * prefaulting failed (EINTR was handled above, and EAGAIN due + * to prefaulting a memslot that's being actively deleted should + * be impossible since the memslot has already been recreated). + */ + if (!range.size || ret < 0) + break; + } - if (left == 0) - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + TEST_ASSERT(range.size == expected_left, + "Completed with %llu bytes left, expected %lu", + range.size, expected_left); + + /* + * Assert success if prefaulting the entire range should succeed, i.e. + * complete with no bytes remaining. Otherwise prefaulting should have + * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when + * no memslot exists). + */ + if (!expected_left) + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); else - /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */ - __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, - "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, + KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); } static void __test_pre_fault_memory(unsigned long vm_type, bool private) @@ -97,9 +193,10 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private) if (private) vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0); - pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE); + + pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private); + pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); + pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); vcpu_args_set(vcpu, 1, guest_test_virt_mem); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index d265b34c54be..50bc1c38225a 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm) self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); - self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, - PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); - ASSERT_NE(self->run, MAP_FAILED); + self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd); /** * For virtual cpus that have been created with S390 user controlled * virtual machines, the resulting vcpu fd can be memory mapped at page * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of * the virtual cpu's hardware control block. */ - self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, - PROT_READ | PROT_WRITE, MAP_SHARED, - self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); - ASSERT_NE(self->sie_block, MAP_FAILED); + self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd, + KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); TH_LOG("VM created %p %p", self->run, self->sie_block); @@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm) FIXTURE_TEARDOWN(uc_kvm) { - munmap(self->sie_block, PAGE_SIZE); - munmap(self->run, self->kvm_run_size); + kvm_munmap(self->sie_block, PAGE_SIZE); + kvm_munmap(self->run, self->kvm_run_size); close(self->vcpu_fd); close(self->vm_fd); close(self->kvm_fd); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index ce3ac0fd6dfb..7fe427ff9b38 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -433,10 +433,10 @@ static void test_add_max_memory_regions(void) pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); - mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + + mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); for (slot = 0; slot < max_mem_slots; slot++) @@ -446,9 +446,8 @@ static void test_add_max_memory_regions(void) mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); /* Check it cannot be added memory slots beyond the limit */ - mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); + mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, (uint64_t)max_mem_slots * MEM_REGION_SIZE, @@ -456,8 +455,8 @@ static void test_add_max_memory_regions(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); - munmap(mem_extra, MEM_REGION_SIZE); + kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); + kvm_munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index 4046131e7888..d9e5b967f815 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Test various bareudp tunnel configurations. diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index ecd34f364125..892895659c7e 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -176,6 +176,8 @@ run_test() local rcv_dmac=$(mac_get $rcv_if_name) local should_receive + setup_wait + tcpdump_start $rcv_if_name mc_route_prepare $send_if_name diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 2b1d9f2b3e9e..cfc39f70635d 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -754,11 +754,11 @@ static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); } @@ -989,6 +989,7 @@ static void check_recv_pkts(int fd, int *correct_payload, static void gro_sender(void) { + const int fin_delay_us = 100 * 1000; static char fin_pkt[MAX_HDR_LEN]; struct sockaddr_ll daddr = {}; int txfd = -1; @@ -1032,15 +1033,22 @@ static void gro_sender(void) write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "tcp") == 0) { send_changed_checksum(txfd, &daddr); + /* Adding sleep before sending FIN so that it is not + * received prior to other packets. + */ + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_seq(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_ts(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_diff_opt(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "ip") == 0) { send_changed_ECN(txfd, &daddr); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index b148cadb96d0..fc7e22b503d3 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -710,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { - if (cfg_rcv_trunc) - return 0; + /* expected reset, continue to read */ + if (cfg_rcv_trunc && + (errno == ECONNRESET || + errno == EPIPE)) { + fds.events &= ~POLLOUT; + continue; + } + perror("write"); return 111; } @@ -737,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, } if (fds.revents & (POLLERR | POLLNVAL)) { - if (cfg_rcv_trunc) - return 0; + if (cfg_rcv_trunc) { + fds.events &= ~(POLLERR | POLLNVAL); + continue; + } fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; @@ -1433,7 +1441,7 @@ static void parse_opts(int argc, char **argv) */ if (cfg_truncate < 0) { cfg_rcv_trunc = true; - signal(SIGPIPE, handle_signal); + signal(SIGPIPE, SIG_IGN); } break; case 'j': diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 47ecb5b3836e..9b7b93f8eb0c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -492,7 +492,7 @@ do_transfer() "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then if [ ${stat_ooo_now} -eq 0 ]; then mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ "than expected (${expect_ackrx})" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c90d8e8b95cb..41503c241989 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2324,7 +2324,7 @@ laminar_endp_tests() { # no laminar endpoints: routing rules are used if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2336,7 +2336,7 @@ laminar_endp_tests() # laminar endpoints: this endpoint is used if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2348,7 +2348,7 @@ laminar_endp_tests() # laminar endpoints: these endpoints are used if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2363,7 +2363,7 @@ laminar_endp_tests() # laminar endpoints: only one endpoint is used if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2376,7 +2376,7 @@ laminar_endp_tests() # laminar endpoints: subflow and laminar flags if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 4 pm_nl_set_limits $ns2 2 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2532,7 +2532,7 @@ remove_tests() if reset "remove single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 @@ -2545,8 +2545,8 @@ remove_tests() if reset "remove multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-2 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2557,7 +2557,7 @@ remove_tests() # single address, remove if reset "remove single address"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 1 addr_nr_ns1=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2570,9 +2570,9 @@ remove_tests() # subflow and signal, remove if reset "remove subflow and signal"; then pm_nl_set_limits $ns1 0 2 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 2 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2584,10 +2584,10 @@ remove_tests() # subflows and signal, remove if reset "remove subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2599,9 +2599,9 @@ remove_tests() # addresses remove if reset "remove addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2614,10 +2614,10 @@ remove_tests() # invalid addresses remove if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup # broadcast IP: no packet for this address will be received on ns1 - pm_nl_add_endpoint $ns1 224.0.0.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2631,10 +2631,10 @@ remove_tests() # subflows and signal, flush if reset "flush subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2647,9 +2647,9 @@ remove_tests() if reset "flush subflows"; then pm_nl_set_limits $ns1 3 3 pm_nl_set_limits $ns2 3 3 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2666,9 +2666,9 @@ remove_tests() # addresses flush if reset "flush addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2681,9 +2681,9 @@ remove_tests() # invalid addresses flush if reset "flush invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -3806,7 +3806,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3831,7 +3831,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create destroy subflow @@ -3839,7 +3839,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3859,7 +3859,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create id 0 subflow @@ -3867,7 +3867,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3880,7 +3880,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm remove initial subflow @@ -3888,7 +3888,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3904,7 +3904,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3912,7 +3912,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3930,7 +3930,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi } @@ -3939,11 +3939,11 @@ endpoint_tests() # subflow_rebuild_header is needed to support the implicit flag # userspace pm type prevents add_addr if reset "implicit EP" && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - { speed=slow \ + { test_linkfail=128 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -3960,17 +3960,17 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then start_events pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 0 3 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - { test_linkfail=4 speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4015,7 +4015,7 @@ endpoint_tests() chk_mptcp_info subflows 3 subflows 3 done - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -4040,7 +4040,7 @@ endpoint_tests() # remove and re-add if reset_with_events "delete re-add signal" && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 @@ -4048,7 +4048,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal - { test_linkfail=4 speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4089,7 +4089,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -4115,13 +4115,13 @@ endpoint_tests() # flush and re-add if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 1 2 # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - { test_linkfail=4 speed=20 \ + { test_linkfail=128 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4137,7 +4137,7 @@ endpoint_tests() wait_mpj $ns2 pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid join_syn_tx=3 join_connect_err=1 \ chk_join_nr 2 2 2 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index d62e653d48b0..f4388900016a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -350,6 +350,27 @@ mptcp_lib_kill_wait() { wait "${1}" 2>/dev/null } +# $1: PID +mptcp_lib_pid_list_children() { + local curr="${1}" + # evoke 'ps' only once + local pids="${2:-"$(ps o pid,ppid)"}" + + echo "${curr}" + + local pid + for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do + mptcp_lib_pid_list_children "${pid}" "${pids}" + done +} + +# $1: PID +mptcp_lib_kill_group_wait() { + # Some users might not have procps-ng: cannot use "kill -- -PID" + mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null + wait "${1}" 2>/dev/null +} + # $1: IP address mptcp_lib_is_v6() { [ -z "${1##*:*}" ] diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c index f02f1f95d227..a04dac0b8027 100644 --- a/tools/testing/selftests/net/sctp_hello.c +++ b/tools/testing/selftests/net/sctp_hello.c @@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len static int do_client(int argc, char *argv[]) { struct sockaddr_storage ss; - char buf[] = "hello"; int csk, ret, len; if (argc < 5) { @@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[]) set_addr(&ss, argv[3], argv[4], &len); ret = connect(csk, (struct sockaddr *)&ss, len); - if (ret < 0) { - printf("failed to connect to peer\n"); + if (ret < 0) return -1; - } - ret = send(csk, buf, strlen(buf) + 1, 0); - if (ret < 0) { - printf("failed to send msg %d\n", ret); - return -1; - } + recv(csk, NULL, 0, 0); close(csk); return 0; @@ -75,7 +68,6 @@ int main(int argc, char *argv[]) { struct sockaddr_storage ss; int lsk, csk, ret, len; - char buf[20]; if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s server|client ...\n", argv[0]); @@ -125,11 +117,6 @@ int main(int argc, char *argv[]) return -1; } - ret = recv(csk, buf, sizeof(buf), 0); - if (ret <= 0) { - printf("failed to recv msg %d\n", ret); - return -1; - } close(csk); close(lsk); diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh index c854034b6aa1..667b211aa8a1 100755 --- a/tools/testing/selftests/net/sctp_vrf.sh +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -20,9 +20,9 @@ setup() { modprobe sctp_diag setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS - ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null - ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null - ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0 ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 @@ -62,17 +62,40 @@ setup() { } cleanup() { - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + wait_client $CLIENT_NS1 + wait_client $CLIENT_NS2 + stop_server cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS } -wait_server() { +start_server() { local IFACE=$1 local CNT=0 - until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ - grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do - [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE & + disown + until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do + [ $((CNT++)) -eq 30 ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +stop_server() { + local CNT=0 + + ip netns exec $SERVER_NS pkill sctp_hello + while ip netns exec $SERVER_NS ss -SaH | grep -q .; do + [ $((CNT++)) -eq 30 ] && break + sleep 0.1 + done +} + +wait_client() { + local CLIENT_NS=$1 + local CNT=0 + + while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do + [ $((CNT++)) -eq 30 ] && break sleep 0.1 done } @@ -81,14 +104,12 @@ do_test() { local CLIENT_NS=$1 local IFACE=$2 - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE 2>&1 >/dev/null & - disown - wait_server $IFACE || return $RET + start_server $IFACE || return $RET timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT RET=$? + wait_client $CLIENT_NS + stop_server return $RET } @@ -96,25 +117,21 @@ do_testx() { local IFACE1=$1 local IFACE2=$2 - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE1 2>&1 >/dev/null & - disown - wait_server $IFACE1 || return $RET - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE2 2>&1 >/dev/null & - disown - wait_server $IFACE2 || return $RET + start_server $IFACE1 || return $RET + start_server $IFACE2 || return $RET timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \ timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT RET=$? + wait_client $CLIENT_NS1 + wait_client $CLIENT_NS2 + stop_server return $RET } testup() { - ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1 echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } echo "[PASS]" @@ -123,7 +140,7 @@ testup() { do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } echo "[PASS]" - ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0 echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } echo "[PASS]" @@ -160,7 +177,7 @@ testup() { do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } echo "[PASS]" - echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y " do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } echo "[PASS]" } diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 998e5a2f4579..0091bcd91c2c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -961,5 +961,49 @@ "teardown": [ "$TC qdisc del dev $DUMMY root" ] + }, + { + "id": "4989", + "name": "Try to add an fq child to an ingress qdisc", + "category": [ + "qdisc", + "ingress" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle ffff:0 ingress" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress" + ] + }, + { + "id": "c2b0", + "name": "Try to add an fq child to a clsact qdisc", + "category": [ + "qdisc", + "ingress" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle ffff:0 clsact" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY clsact" + ] } ] diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index 5288e768b207..68625362add2 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -236,7 +236,7 @@ TEST_F(user, perf_empty_events) { ASSERT_EQ(1 << reg.enable_bit, self->check); /* Ensure write shows up at correct offset */ - ASSERT_NE(-1, write(self->data_fd, ®.write_index, + ASSERT_NE(-1, write(self->data_fd, (void *)®.write_index, sizeof(reg.write_index))); val = (void *)(((char *)perf_page) + perf_page->data_offset); ASSERT_EQ(PERF_RECORD_SAMPLE, *val); diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h index ed31606e01b7..69ec0c856481 100644 --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -4,9 +4,12 @@ #include <fcntl.h> #include <string.h> -#include <linux/vfio.h> + +#include <uapi/linux/types.h> +#include <linux/iommufd.h> #include <linux/list.h> #include <linux/pci_regs.h> +#include <linux/vfio.h> #include "../../../kselftest.h" @@ -185,6 +188,13 @@ struct vfio_pci_device { struct vfio_pci_driver driver; }; +struct iova_allocator { + struct iommu_iova_range *ranges; + u32 nranges; + u32 range_idx; + u64 range_offset; +}; + /* * Return the BDF string of the device that the test should use. * @@ -206,10 +216,36 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_ void vfio_pci_device_cleanup(struct vfio_pci_device *device); void vfio_pci_device_reset(struct vfio_pci_device *device); -void vfio_pci_dma_map(struct vfio_pci_device *device, - struct vfio_dma_region *region); -void vfio_pci_dma_unmap(struct vfio_pci_device *device, - struct vfio_dma_region *region); +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, + u32 *nranges); + +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device); +void iova_allocator_cleanup(struct iova_allocator *allocator); +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size); + +int __vfio_pci_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region); +int __vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region, + u64 *unmapped); +int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped); + +static inline void vfio_pci_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + VFIO_ASSERT_EQ(__vfio_pci_dma_map(device, region), 0); +} + +static inline void vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + VFIO_ASSERT_EQ(__vfio_pci_dma_unmap(device, region, NULL), 0); +} + +static inline void vfio_pci_dma_unmap_all(struct vfio_pci_device *device) +{ + VFIO_ASSERT_EQ(__vfio_pci_dma_unmap_all(device, NULL), 0); +} void vfio_pci_config_access(struct vfio_pci_device *device, bool write, size_t config, size_t size, void *data); diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index 0921b2451ba5..b479a359da12 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -2,6 +2,7 @@ #include <dirent.h> #include <fcntl.h> #include <libgen.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -11,11 +12,12 @@ #include <sys/mman.h> #include <uapi/linux/types.h> +#include <linux/iommufd.h> #include <linux/limits.h> #include <linux/mman.h> +#include <linux/overflow.h> #include <linux/types.h> #include <linux/vfio.h> -#include <linux/iommufd.h> #include "../../../kselftest.h" #include <vfio_util.h> @@ -28,6 +30,249 @@ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ } while (0) +static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz, + u32 *cap_offset) +{ + struct vfio_info_cap_header *hdr; + + if (!*cap_offset) + return NULL; + + VFIO_ASSERT_LT(*cap_offset, bufsz); + VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr)); + + hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset); + *cap_offset = hdr->next; + + return hdr; +} + +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info, + u16 cap_id) +{ + struct vfio_info_cap_header *hdr; + u32 cap_offset = info->cap_offset; + u32 max_depth; + u32 depth = 0; + + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) + return NULL; + + if (cap_offset) + VFIO_ASSERT_GE(cap_offset, sizeof(*info)); + + max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr); + + while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) { + depth++; + VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n"); + + if (hdr->id == cap_id) + return hdr; + } + + return NULL; +} + +/* Return buffer including capability chain, if present. Free with free() */ +static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device) +{ + struct vfio_iommu_type1_info *info; + + info = malloc(sizeof(*info)); + VFIO_ASSERT_NOT_NULL(info); + + *info = (struct vfio_iommu_type1_info) { + .argsz = sizeof(*info), + }; + + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); + + info = realloc(info, info->argsz); + VFIO_ASSERT_NOT_NULL(info); + + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); + + return info; +} + +/* + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to + * report iommufd's iommu_iova_range. Free with free(). + */ +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct vfio_iommu_type1_info_cap_iova_range *cap_range; + struct vfio_iommu_type1_info *info; + struct vfio_info_cap_header *hdr; + struct iommu_iova_range *ranges = NULL; + + info = vfio_iommu_get_info(device); + hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + VFIO_ASSERT_NOT_NULL(hdr); + + cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header); + VFIO_ASSERT_GT(cap_range->nr_iovas, 0); + + ranges = calloc(cap_range->nr_iovas, sizeof(*ranges)); + VFIO_ASSERT_NOT_NULL(ranges); + + for (u32 i = 0; i < cap_range->nr_iovas; i++) { + ranges[i] = (struct iommu_iova_range){ + .start = cap_range->iova_ranges[i].start, + .last = cap_range->iova_ranges[i].end, + }; + } + + *nranges = cap_range->nr_iovas; + + free(info); + return ranges; +} + +/* Return iova ranges of the device's IOAS. Free with free() */ +static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct iommu_iova_range *ranges; + int ret; + + struct iommu_ioas_iova_ranges query = { + .size = sizeof(query), + .ioas_id = device->ioas_id, + }; + + ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); + VFIO_ASSERT_EQ(ret, -1); + VFIO_ASSERT_EQ(errno, EMSGSIZE); + VFIO_ASSERT_GT(query.num_iovas, 0); + + ranges = calloc(query.num_iovas, sizeof(*ranges)); + VFIO_ASSERT_NOT_NULL(ranges); + + query.allowed_iovas = (uintptr_t)ranges; + + ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); + *nranges = query.num_iovas; + + return ranges; +} + +static int iova_range_comp(const void *a, const void *b) +{ + const struct iommu_iova_range *ra = a, *rb = b; + + if (ra->start < rb->start) + return -1; + + if (ra->start > rb->start) + return 1; + + return 0; +} + +/* Return sorted IOVA ranges of the device. Free with free(). */ +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct iommu_iova_range *ranges; + + if (device->iommufd) + ranges = iommufd_iova_ranges(device, nranges); + else + ranges = vfio_iommu_iova_ranges(device, nranges); + + if (!ranges) + return NULL; + + VFIO_ASSERT_GT(*nranges, 0); + + /* Sort and check that ranges are sane and non-overlapping */ + qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp); + VFIO_ASSERT_LT(ranges[0].start, ranges[0].last); + + for (u32 i = 1; i < *nranges; i++) { + VFIO_ASSERT_LT(ranges[i].start, ranges[i].last); + VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start); + } + + return ranges; +} + +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device) +{ + struct iova_allocator *allocator; + struct iommu_iova_range *ranges; + u32 nranges; + + ranges = vfio_pci_iova_ranges(device, &nranges); + VFIO_ASSERT_NOT_NULL(ranges); + + allocator = malloc(sizeof(*allocator)); + VFIO_ASSERT_NOT_NULL(allocator); + + *allocator = (struct iova_allocator){ + .ranges = ranges, + .nranges = nranges, + .range_idx = 0, + .range_offset = 0, + }; + + return allocator; +} + +void iova_allocator_cleanup(struct iova_allocator *allocator) +{ + free(allocator->ranges); + free(allocator); +} + +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size) +{ + VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n"); + VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n"); + + for (;;) { + struct iommu_iova_range *range; + iova_t iova, last; + + VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges, + "IOVA allocator out of space\n"); + + range = &allocator->ranges[allocator->range_idx]; + iova = range->start + allocator->range_offset; + + /* Check for sufficient space at the current offset */ + if (check_add_overflow(iova, size - 1, &last) || + last > range->last) + goto next_range; + + /* Align iova to size */ + iova = last & ~(size - 1); + + /* Check for sufficient space at the aligned iova */ + if (check_add_overflow(iova, size - 1, &last) || + last > range->last) + goto next_range; + + if (last == range->last) { + allocator->range_idx++; + allocator->range_offset = 0; + } else { + allocator->range_offset = last - range->start + 1; + } + + return iova; + +next_range: + allocator->range_idx++; + allocator->range_offset = 0; + } +} + iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) { struct vfio_dma_region *region; @@ -141,7 +386,7 @@ static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index, ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info); } -static void vfio_iommu_dma_map(struct vfio_pci_device *device, +static int vfio_iommu_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region) { struct vfio_iommu_type1_dma_map args = { @@ -152,10 +397,13 @@ static void vfio_iommu_dma_map(struct vfio_pci_device *device, .size = region->size, }; - ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args); + if (ioctl(device->container_fd, VFIO_IOMMU_MAP_DMA, &args)) + return -errno; + + return 0; } -static void iommufd_dma_map(struct vfio_pci_device *device, +static int iommufd_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region) { struct iommu_ioas_map args = { @@ -169,54 +417,108 @@ static void iommufd_dma_map(struct vfio_pci_device *device, .ioas_id = device->ioas_id, }; - ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args); + if (ioctl(device->iommufd, IOMMU_IOAS_MAP, &args)) + return -errno; + + return 0; } -void vfio_pci_dma_map(struct vfio_pci_device *device, +int __vfio_pci_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region) { + int ret; + if (device->iommufd) - iommufd_dma_map(device, region); + ret = iommufd_dma_map(device, region); else - vfio_iommu_dma_map(device, region); + ret = vfio_iommu_dma_map(device, region); + + if (ret) + return ret; list_add(®ion->link, &device->dma_regions); + + return 0; } -static void vfio_iommu_dma_unmap(struct vfio_pci_device *device, - struct vfio_dma_region *region) +static int vfio_iommu_dma_unmap(int fd, u64 iova, u64 size, u32 flags, + u64 *unmapped) { struct vfio_iommu_type1_dma_unmap args = { .argsz = sizeof(args), - .iova = region->iova, - .size = region->size, + .iova = iova, + .size = size, + .flags = flags, }; - ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args); + if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args)) + return -errno; + + if (unmapped) + *unmapped = args.size; + + return 0; } -static void iommufd_dma_unmap(struct vfio_pci_device *device, - struct vfio_dma_region *region) +static int iommufd_dma_unmap(int fd, u64 iova, u64 length, u32 ioas_id, + u64 *unmapped) { struct iommu_ioas_unmap args = { .size = sizeof(args), - .iova = region->iova, - .length = region->size, - .ioas_id = device->ioas_id, + .iova = iova, + .length = length, + .ioas_id = ioas_id, }; - ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args); + if (ioctl(fd, IOMMU_IOAS_UNMAP, &args)) + return -errno; + + if (unmapped) + *unmapped = args.length; + + return 0; } -void vfio_pci_dma_unmap(struct vfio_pci_device *device, - struct vfio_dma_region *region) +int __vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region, u64 *unmapped) { + int ret; + if (device->iommufd) - iommufd_dma_unmap(device, region); + ret = iommufd_dma_unmap(device->iommufd, region->iova, + region->size, device->ioas_id, + unmapped); else - vfio_iommu_dma_unmap(device, region); + ret = vfio_iommu_dma_unmap(device->container_fd, region->iova, + region->size, 0, unmapped); + + if (ret) + return ret; + + list_del_init(®ion->link); + + return 0; +} + +int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped) +{ + int ret; + struct vfio_dma_region *curr, *next; + + if (device->iommufd) + ret = iommufd_dma_unmap(device->iommufd, 0, UINT64_MAX, + device->ioas_id, unmapped); + else + ret = vfio_iommu_dma_unmap(device->container_fd, 0, 0, + VFIO_DMA_UNMAP_FLAG_ALL, unmapped); + + if (ret) + return ret; + + list_for_each_entry_safe(curr, next, &device->dma_regions, link) + list_del_init(&curr->link); - list_del(®ion->link); + return 0; } static void vfio_pci_region_get(struct vfio_pci_device *device, int index, diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index ab19c54a774d..102603d4407d 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -3,6 +3,8 @@ #include <sys/mman.h> #include <unistd.h> +#include <uapi/linux/types.h> +#include <linux/iommufd.h> #include <linux/limits.h> #include <linux/mman.h> #include <linux/sizes.h> @@ -93,6 +95,7 @@ static int iommu_mapping_get(const char *bdf, u64 iova, FIXTURE(vfio_dma_mapping_test) { struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; }; FIXTURE_VARIANT(vfio_dma_mapping_test) { @@ -112,13 +115,17 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous, 0, 0); FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_2mb, SZ_2M, MAP_HUGETLB | MAP_HUGE_2MB); FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB); +#undef FIXTURE_VARIANT_ADD_IOMMU_MODE + FIXTURE_SETUP(vfio_dma_mapping_test) { self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + self->iova_allocator = iova_allocator_init(self->device); } FIXTURE_TEARDOWN(vfio_dma_mapping_test) { + iova_allocator_cleanup(self->iova_allocator); vfio_pci_device_cleanup(self->device); } @@ -129,6 +136,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) struct vfio_dma_region region; struct iommu_mapping mapping; u64 mapping_size = size; + u64 unmapped; int rc; region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); @@ -139,7 +147,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) else ASSERT_NE(region.vaddr, MAP_FAILED); - region.iova = (u64)region.vaddr; + region.iova = iova_allocator_alloc(self->iova_allocator, size); region.size = size; vfio_pci_dma_map(self->device, ®ion); @@ -184,7 +192,9 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) } unmap: - vfio_pci_dma_unmap(self->device, ®ion); + rc = __vfio_pci_dma_unmap(self->device, ®ion, &unmapped); + ASSERT_EQ(rc, 0); + ASSERT_EQ(unmapped, region.size); printf("Unmapped IOVA 0x%lx\n", region.iova); ASSERT_EQ(INVALID_IOVA, __to_iova(self->device, region.vaddr)); ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping)); @@ -192,6 +202,103 @@ unmap: ASSERT_TRUE(!munmap(region.vaddr, size)); } +FIXTURE(vfio_dma_map_limit_test) { + struct vfio_pci_device *device; + struct vfio_dma_region region; + size_t mmap_size; +}; + +FIXTURE_VARIANT(vfio_dma_map_limit_test) { + const char *iommu_mode; +}; + +#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \ +FIXTURE_VARIANT_ADD(vfio_dma_map_limit_test, _iommu_mode) { \ + .iommu_mode = #_iommu_mode, \ +} + +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); + +#undef FIXTURE_VARIANT_ADD_IOMMU_MODE + +FIXTURE_SETUP(vfio_dma_map_limit_test) +{ + struct vfio_dma_region *region = &self->region; + struct iommu_iova_range *ranges; + u64 region_size = getpagesize(); + iova_t last_iova; + u32 nranges; + + /* + * Over-allocate mmap by double the size to provide enough backing vaddr + * for overflow tests + */ + self->mmap_size = 2 * region_size; + + self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + region->vaddr = mmap(NULL, self->mmap_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ASSERT_NE(region->vaddr, MAP_FAILED); + + ranges = vfio_pci_iova_ranges(self->device, &nranges); + VFIO_ASSERT_NOT_NULL(ranges); + last_iova = ranges[nranges - 1].last; + free(ranges); + + /* One page prior to the last iova */ + region->iova = last_iova & ~(region_size - 1); + region->size = region_size; +} + +FIXTURE_TEARDOWN(vfio_dma_map_limit_test) +{ + vfio_pci_device_cleanup(self->device); + ASSERT_EQ(munmap(self->region.vaddr, self->mmap_size), 0); +} + +TEST_F(vfio_dma_map_limit_test, unmap_range) +{ + struct vfio_dma_region *region = &self->region; + u64 unmapped; + int rc; + + vfio_pci_dma_map(self->device, region); + ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr)); + + rc = __vfio_pci_dma_unmap(self->device, region, &unmapped); + ASSERT_EQ(rc, 0); + ASSERT_EQ(unmapped, region->size); +} + +TEST_F(vfio_dma_map_limit_test, unmap_all) +{ + struct vfio_dma_region *region = &self->region; + u64 unmapped; + int rc; + + vfio_pci_dma_map(self->device, region); + ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr)); + + rc = __vfio_pci_dma_unmap_all(self->device, &unmapped); + ASSERT_EQ(rc, 0); + ASSERT_EQ(unmapped, region->size); +} + +TEST_F(vfio_dma_map_limit_test, overflow) +{ + struct vfio_dma_region *region = &self->region; + int rc; + + region->iova = ~(iova_t)0 & ~(region->size - 1); + region->size = self->mmap_size; + + rc = __vfio_pci_dma_map(self->device, region); + ASSERT_EQ(rc, -EOVERFLOW); + + rc = __vfio_pci_dma_unmap(self->device, region, NULL); + ASSERT_EQ(rc, -EOVERFLOW); +} + int main(int argc, char *argv[]) { device_bdf = vfio_selftests_get_bdf(&argc, argv); diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c index 2dbd70b7db62..f69eec8b928d 100644 --- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c +++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c @@ -19,6 +19,7 @@ static const char *device_bdf; } while (0) static void region_setup(struct vfio_pci_device *device, + struct iova_allocator *iova_allocator, struct vfio_dma_region *region, u64 size) { const int flags = MAP_SHARED | MAP_ANONYMOUS; @@ -29,7 +30,7 @@ static void region_setup(struct vfio_pci_device *device, VFIO_ASSERT_NE(vaddr, MAP_FAILED); region->vaddr = vaddr; - region->iova = (u64)vaddr; + region->iova = iova_allocator_alloc(iova_allocator, size); region->size = size; vfio_pci_dma_map(device, region); @@ -44,6 +45,7 @@ static void region_teardown(struct vfio_pci_device *device, FIXTURE(vfio_pci_driver_test) { struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; struct vfio_dma_region memcpy_region; void *vaddr; int msi_fd; @@ -72,14 +74,15 @@ FIXTURE_SETUP(vfio_pci_driver_test) struct vfio_pci_driver *driver; self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + self->iova_allocator = iova_allocator_init(self->device); driver = &self->device->driver; - region_setup(self->device, &self->memcpy_region, SZ_1G); - region_setup(self->device, &driver->region, SZ_2M); + region_setup(self->device, self->iova_allocator, &self->memcpy_region, SZ_1G); + region_setup(self->device, self->iova_allocator, &driver->region, SZ_2M); /* Any IOVA that doesn't overlap memcpy_region and driver->region. */ - self->unmapped_iova = 8UL * SZ_1G; + self->unmapped_iova = iova_allocator_alloc(self->iova_allocator, SZ_1G); vfio_pci_driver_init(self->device); self->msi_fd = self->device->msi_eventfds[driver->msi]; @@ -108,6 +111,7 @@ FIXTURE_TEARDOWN(vfio_pci_driver_test) region_teardown(self->device, &self->memcpy_region); region_teardown(self->device, &driver->region); + iova_allocator_cleanup(self->iova_allocator); vfio_pci_device_cleanup(self->device); } diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index edacebfc1632..8ceeb8a7894f 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -389,9 +389,9 @@ run_test() { local rc host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') - host_warn_cnt_before=$(dmesg --level=warn | wc -l) + host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock') vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') - vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') name=$(echo "${1}" | awk '{ print $1 }') eval test_"${name}" @@ -403,7 +403,7 @@ run_test() { rc=$KSFT_FAIL fi - host_warn_cnt_after=$(dmesg --level=warn | wc -l) + host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock') if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on host" | log_host "${name}" rc=$KSFT_FAIL @@ -415,7 +415,7 @@ run_test() { rc=$KSFT_FAIL fi - vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on vm" | log_host "${name}" rc=$KSFT_FAIL diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c index cf263fe9deaf..ef97916e3873 100644 --- a/tools/tracing/latency/latency-collector.c +++ b/tools/tracing/latency/latency-collector.c @@ -1725,7 +1725,7 @@ static void show_usage(void) "-n, --notrace\t\tIf latency is detected, do not print out the content of\n" "\t\t\tthe trace file to standard output\n\n" -"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" +"-e, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" "-r, --random\t\tArbitrarily sleep a certain amount of time, default\n" "\t\t\t%ld ms, before reading the trace file. The\n" diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 0227e13cd8dd..5f0015c5dd95 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,6 +113,7 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES bool config KVM_GUEST_MEMFD + depends on KVM_GENERIC_MMU_NOTIFIER select XARRAY_MULTI bool diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 94bafd6c558c..ffadc5ee8e04 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -102,8 +102,17 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) return filemap_grab_folio(inode->i_mapping, index); } -static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, - pgoff_t end) +static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode) +{ + if ((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED) + return KVM_FILTER_SHARED; + + return KVM_FILTER_PRIVATE; +} + +static void __kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, + pgoff_t end, + enum kvm_gfn_range_filter attr_filter) { bool flush = false, found_memslot = false; struct kvm_memory_slot *slot; @@ -118,8 +127,7 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, .end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff, .slot = slot, .may_block = true, - /* guest memfd is relevant to only private mappings. */ - .attr_filter = KVM_FILTER_PRIVATE, + .attr_filter = attr_filter, }; if (!found_memslot) { @@ -139,8 +147,21 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, KVM_MMU_UNLOCK(kvm); } -static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, - pgoff_t end) +static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start, + pgoff_t end) +{ + struct list_head *gmem_list = &inode->i_mapping->i_private_list; + enum kvm_gfn_range_filter attr_filter; + struct kvm_gmem *gmem; + + attr_filter = kvm_gmem_get_invalidate_filter(inode); + + list_for_each_entry(gmem, gmem_list, entry) + __kvm_gmem_invalidate_begin(gmem, start, end, attr_filter); +} + +static void __kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, + pgoff_t end) { struct kvm *kvm = gmem->kvm; @@ -151,12 +172,20 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, } } -static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) +static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start, + pgoff_t end) { struct list_head *gmem_list = &inode->i_mapping->i_private_list; + struct kvm_gmem *gmem; + + list_for_each_entry(gmem, gmem_list, entry) + __kvm_gmem_invalidate_end(gmem, start, end); +} + +static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) +{ pgoff_t start = offset >> PAGE_SHIFT; pgoff_t end = (offset + len) >> PAGE_SHIFT; - struct kvm_gmem *gmem; /* * Bindings must be stable across invalidation to ensure the start+end @@ -164,13 +193,11 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) */ filemap_invalidate_lock(inode->i_mapping); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + kvm_gmem_invalidate_begin(inode, start, end); truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_invalidate_end(inode, start, end); filemap_invalidate_unlock(inode->i_mapping); @@ -280,8 +307,9 @@ static int kvm_gmem_release(struct inode *inode, struct file *file) * Zap all SPTEs pointed at by this file. Do not free the backing * memory, as its lifetime is associated with the inode, not the file. */ - kvm_gmem_invalidate_begin(gmem, 0, -1ul); - kvm_gmem_invalidate_end(gmem, 0, -1ul); + __kvm_gmem_invalidate_begin(gmem, 0, -1ul, + kvm_gmem_get_invalidate_filter(inode)); + __kvm_gmem_invalidate_end(gmem, 0, -1ul); list_del(&gmem->entry); @@ -328,6 +356,9 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; + if (!((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED)) + return VM_FAULT_SIGBUS; + folio = kvm_gmem_get_folio(inode, vmf->pgoff); if (IS_ERR(folio)) { int err = PTR_ERR(folio); @@ -400,8 +431,6 @@ static int kvm_gmem_migrate_folio(struct address_space *mapping, static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio) { - struct list_head *gmem_list = &mapping->i_private_list; - struct kvm_gmem *gmem; pgoff_t start, end; filemap_invalidate_lock_shared(mapping); @@ -409,8 +438,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol start = folio->index; end = start + folio_nr_pages(folio); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + kvm_gmem_invalidate_begin(mapping->host, start, end); /* * Do not truncate the range, what action is taken in response to the @@ -421,8 +449,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol * error to userspace. */ - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_invalidate_end(mapping->host, start, end); filemap_invalidate_unlock_shared(mapping); @@ -458,7 +485,7 @@ static const struct inode_operations kvm_gmem_iops = { .setattr = kvm_gmem_setattr, }; -bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm) +bool __weak kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { return true; } @@ -522,12 +549,8 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) { loff_t size = args->size; u64 flags = args->flags; - u64 valid_flags = 0; - if (kvm_arch_supports_gmem_mmap(kvm)) - valid_flags |= GUEST_MEMFD_FLAG_MMAP; - - if (flags & ~valid_flags) + if (flags & ~kvm_gmem_get_supported_flags(kvm)) return -EINVAL; if (size <= 0 || !PAGE_ALIGNED(size)) @@ -600,31 +623,50 @@ err: return r; } -void kvm_gmem_unbind(struct kvm_memory_slot *slot) +static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct kvm_gmem *gmem) { unsigned long start = slot->gmem.pgoff; unsigned long end = start + slot->npages; - struct kvm_gmem *gmem; + + xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL); + + /* + * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn() + * cannot see this memslot. + */ + WRITE_ONCE(slot->gmem.file, NULL); +} + +void kvm_gmem_unbind(struct kvm_memory_slot *slot) +{ struct file *file; /* - * Nothing to do if the underlying file was already closed (or is being - * closed right now), kvm_gmem_release() invalidates all bindings. + * Nothing to do if the underlying file was _already_ closed, as + * kvm_gmem_release() invalidates and nullifies all bindings. */ - file = kvm_gmem_get_file(slot); - if (!file) + if (!slot->gmem.file) return; - gmem = file->private_data; - - filemap_invalidate_lock(file->f_mapping); - xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL); + file = kvm_gmem_get_file(slot); /* - * synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn() - * cannot see this memslot. + * However, if the file is _being_ closed, then the bindings need to be + * removed as kvm_gmem_release() might not run until after the memslot + * is freed. Note, modifying the bindings is safe even though the file + * is dying as kvm_gmem_release() nullifies slot->gmem.file under + * slots_lock, and only puts its reference to KVM after destroying all + * bindings. I.e. reaching this point means kvm_gmem_release() hasn't + * yet destroyed the bindings or freed the gmem_file, and can't do so + * until the caller drops slots_lock. */ - WRITE_ONCE(slot->gmem.file, NULL); + if (!file) { + __kvm_gmem_unbind(slot, slot->gmem.file->private_data); + return; + } + + filemap_invalidate_lock(file->f_mapping); + __kvm_gmem_unbind(slot, file->private_data); filemap_invalidate_unlock(file->f_mapping); fput(file); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 226faeaa8e56..b7a0ae2a7b20 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4928,8 +4928,8 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_KVM_GUEST_MEMFD case KVM_CAP_GUEST_MEMFD: return 1; - case KVM_CAP_GUEST_MEMFD_MMAP: - return !kvm || kvm_arch_supports_gmem_mmap(kvm); + case KVM_CAP_GUEST_MEMFD_FLAGS: + return kvm_gmem_get_supported_flags(kvm); #endif default: break; |
