diff options
962 files changed, 25314 insertions, 7504 deletions
@@ -223,6 +223,8 @@ Dmitry Safonov <0x7f454c46@gmail.com> <d.safonov@partner.samsung.com> Dmitry Safonov <0x7f454c46@gmail.com> <dsafonov@virtuozzo.com> Domen Puncer <domen@coderock.org> Douglas Gilbert <dougg@torque.net> +Drew Fustini <fustini@kernel.org> <drew@pdp7.com> +<duje@dujemihanovic.xyz> <duje.mihanovic@skole.hr> Ed L. Cashin <ecashin@coraid.com> Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org> Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com> @@ -830,3 +832,6 @@ Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com> Yusuke Goda <goda.yusuke@renesas.com> Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com> Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com> +Zijun Hu <zijun.hu@oss.qualcomm.com> <quic_zijuhu@quicinc.com> +Zijun Hu <zijun.hu@oss.qualcomm.com> <zijuhu@codeaurora.org> +Zijun Hu <zijun_hu@htc.com> diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 54195530e97a..d3da88b26a53 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -56,7 +56,7 @@ Date: January 2009 Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../async attribute allows the user space to - enable or diasble the device's suspend and resume callbacks to + enable or disable the device's suspend and resume callbacks to be executed asynchronously (ie. in separate threads, in parallel with the main suspend/resume thread) during system-wide power transitions (eg. suspend to RAM, hibernation). diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index bf85f4de6862..ab8cd337f43a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -584,6 +584,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsa /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Date: January 2018 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index d4140dc6c5ba..615453fcc9ff 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -711,7 +711,7 @@ Description: This file shows the thin provisioning type. This is one of The file is read only. -What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count +What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count Date: February 2018 Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com> Description: This file shows the total physical memory resources. This is diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub index c43be90deab4..ab6014743da5 100644 --- a/Documentation/ABI/testing/sysfs-edac-scrub +++ b/Documentation/ABI/testing/sysfs-edac-scrub @@ -49,6 +49,12 @@ Description: (RO) Supported minimum scrub cycle duration in seconds by the memory scrubber. + Device-based scrub: returns the minimum scrub cycle + supported by the memory device. + + Region-based scrub: returns the max of minimum scrub cycles + supported by individual memory devices that back the region. + What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration Date: March 2025 KernelVersion: 6.15 @@ -57,6 +63,16 @@ Description: (RO) Supported maximum scrub cycle duration in seconds by the memory scrubber. + Device-based scrub: returns the maximum scrub cycle supported + by the memory device. + + Region-based scrub: returns the min of maximum scrub cycles + supported by individual memory devices that back the region. + + If the memory device does not provide maximum scrub cycle + information, return the maximum supported value of the scrub + cycle field. + What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration Date: March 2025 KernelVersion: 6.15 diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst index 1302fd1b55e8..6dba18dbb9ab 100644 --- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in combination with a microcode update. The microcode clears the affected CPU buffers when the VERW instruction is executed. -Kernel reuses the MDS function to invoke the buffer clearing: - - mds_clear_cpu_buffers() +Kernel does the buffer clearing with x86_clear_cpu_buffers(). On MDS affected CPUs, the kernel already invokes CPU buffer clear on kernel/userspace, hypervisor/guest and C-state (idle) transitions. No diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..07e22ba5bfe3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7488,6 +7488,19 @@ having this key zero'ed is acceptable. E.g. in testing scenarios. + tsa= [X86] Control mitigation for Transient Scheduler + Attacks on AMD CPUs. Search the following in your + favourite search engine for more details: + + "Technical guidance for mitigating transient scheduler + attacks". + + off - disable the mitigation + on - enable the mitigation (default) + user - mitigate only user/kernel transitions + vm - mitigate only guest/host transitions + + tsc= Disable clocksource stability checks for TSC. Format: <string> [x86] reliable: mark tsc clocksource as reliable, this diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst index 5a2e6c0ef04a..3518671e1a85 100644 --- a/Documentation/arch/x86/mds.rst +++ b/Documentation/arch/x86/mds.rst @@ -93,7 +93,7 @@ enters a C-state. The kernel provides a function to invoke the buffer clearing: - mds_clear_cpu_buffers() + x86_clear_cpu_buffers() Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. Other than CFLAGS.ZF, this macro doesn't clobber any registers. @@ -185,9 +185,9 @@ Mitigation points idle clearing would be a window dressing exercise and is therefore not activated. - The invocation is controlled by the static key mds_idle_clear which is - switched depending on the chosen mitigation mode and the SMT state of - the system. + The invocation is controlled by the static key cpu_buf_idle_clear which is + switched depending on the chosen mitigation mode and the SMT state of the + system. The buffer clear is only invoked before entering the C-State to prevent that stale data from the idling CPU from spilling to the Hyper-Thread diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml index 9b5f3f3eab19..e69b6343a8eb 100644 --- a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml @@ -118,15 +118,11 @@ $defs: ti,lvds-vod-swing-clock-microvolt: description: LVDS diferential output voltage <min max> for clock lanes in microvolts. - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 2 maxItems: 2 ti,lvds-vod-swing-data-microvolt: description: LVDS diferential output voltage <min max> for data lanes in microvolts. - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 2 maxItems: 2 allOf: diff --git a/Documentation/devicetree/bindings/dpll/dpll-device.yaml b/Documentation/devicetree/bindings/dpll/dpll-device.yaml new file mode 100644 index 000000000000..fb8d7a9a3693 --- /dev/null +++ b/Documentation/devicetree/bindings/dpll/dpll-device.yaml @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dpll/dpll-device.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Digital Phase-Locked Loop (DPLL) Device + +maintainers: + - Ivan Vecera <ivecera@redhat.com> + +description: + Digital Phase-Locked Loop (DPLL) device is used for precise clock + synchronization in networking and telecom hardware. The device can + have one or more channels (DPLLs) and one or more physical input and + output pins. Each DPLL channel can either produce pulse-per-clock signal + or drive ethernet equipment clock. The type of each channel can be + indicated by dpll-types property. + +properties: + $nodename: + pattern: "^dpll(@.*)?$" + + "#address-cells": + const: 0 + + "#size-cells": + const: 0 + + dpll-types: + description: List of DPLL channel types, one per DPLL instance. + $ref: /schemas/types.yaml#/definitions/non-unique-string-array + items: + enum: [pps, eec] + + input-pins: + type: object + description: DPLL input pins + unevaluatedProperties: false + + properties: + "#address-cells": + const: 1 + "#size-cells": + const: 0 + + patternProperties: + "^pin@[0-9a-f]+$": + $ref: /schemas/dpll/dpll-pin.yaml + unevaluatedProperties: false + + required: + - "#address-cells" + - "#size-cells" + + output-pins: + type: object + description: DPLL output pins + unevaluatedProperties: false + + properties: + "#address-cells": + const: 1 + "#size-cells": + const: 0 + + patternProperties: + "^pin@[0-9]+$": + $ref: /schemas/dpll/dpll-pin.yaml + unevaluatedProperties: false + + required: + - "#address-cells" + - "#size-cells" + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/dpll/dpll-pin.yaml b/Documentation/devicetree/bindings/dpll/dpll-pin.yaml new file mode 100644 index 000000000000..51db93b77306 --- /dev/null +++ b/Documentation/devicetree/bindings/dpll/dpll-pin.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dpll/dpll-pin.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: DPLL Pin + +maintainers: + - Ivan Vecera <ivecera@redhat.com> + +description: | + The DPLL pin is either a physical input or output pin that is provided + by a DPLL( Digital Phase-Locked Loop) device. The pin is identified by + its physical order number that is stored in reg property and can have + an additional set of properties like supported (allowed) frequencies, + label, type and may support embedded sync. + + Note that the pin in this context has nothing to do with pinctrl. + +properties: + reg: + description: Hardware index of the DPLL pin. + maxItems: 1 + + connection-type: + description: Connection type of the pin + $ref: /schemas/types.yaml#/definitions/string + enum: [ext, gnss, int, mux, synce] + + esync-control: + description: Indicates whether the pin supports embedded sync functionality. + type: boolean + + label: + description: String exposed as the pin board label + $ref: /schemas/types.yaml#/definitions/string + + supported-frequencies-hz: + description: List of supported frequencies for this pin, expressed in Hz. + +required: + - reg + +additionalProperties: false diff --git a/Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml b/Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml new file mode 100644 index 000000000000..17747f754b84 --- /dev/null +++ b/Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dpll/microchip,zl30731.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip Azurite DPLL device + +maintainers: + - Ivan Vecera <ivecera@redhat.com> + +description: + Microchip Azurite DPLL (ZL3073x) is a family of DPLL devices that + provides up to 5 independent DPLL channels, up to 10 differential or + single-ended inputs and 10 differential or 20 single-ended outputs. + These devices support both I2C and SPI interfaces. + +properties: + compatible: + enum: + - microchip,zl30731 + - microchip,zl30732 + - microchip,zl30733 + - microchip,zl30734 + - microchip,zl30735 + + reg: + maxItems: 1 + +required: + - compatible + - reg + +allOf: + - $ref: /schemas/dpll/dpll-device.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + dpll@70 { + compatible = "microchip,zl30732"; + reg = <0x70>; + dpll-types = "pps", "eec"; + + input-pins { + #address-cells = <1>; + #size-cells = <0>; + + pin@0 { /* REF0P */ + reg = <0>; + connection-type = "ext"; + label = "Input 0"; + supported-frequencies-hz = /bits/ 64 <1 1000>; + }; + }; + + output-pins { + #address-cells = <1>; + #size-cells = <0>; + + pin@3 { /* OUT1N */ + reg = <3>; + connection-type = "gnss"; + esync-control; + label = "Output 1"; + supported-frequencies-hz = /bits/ 64 <1 10000>; + }; + }; + }; + }; + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + dpll@70 { + compatible = "microchip,zl30731"; + reg = <0x70>; + spi-max-frequency = <12500000>; + + dpll-types = "pps"; + + input-pins { + #address-cells = <1>; + #size-cells = <0>; + + pin@0 { /* REF0P */ + reg = <0>; + connection-type = "ext"; + label = "Input 0"; + supported-frequencies-hz = /bits/ 64 <1 1000>; + }; + }; + + output-pins { + #address-cells = <1>; + #size-cells = <0>; + + pin@3 { /* OUT1N */ + reg = <3>; + connection-type = "gnss"; + esync-control; + label = "Output 1"; + supported-frequencies-hz = /bits/ 64 <1 10000>; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml index eddfd329c67b..69ac5db8b914 100644 --- a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml @@ -26,7 +26,8 @@ properties: - const: realtek,rtl9301-i2c reg: - description: Register offset and size this I2C controller. + items: + - description: Register offset and size this I2C controller. "#address-cells": const: 1 diff --git a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml index cb3e1801b0d3..0840e4ab28b7 100644 --- a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml +++ b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml @@ -4,14 +4,14 @@ $id: http://devicetree.org/schemas/input/elan,ekth6915.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Elan eKTH6915 touchscreen controller +title: Elan I2C-HID touchscreen controllers maintainers: - Douglas Anderson <dianders@chromium.org> description: - Supports the Elan eKTH6915 touchscreen controller. - This touchscreen controller uses the i2c-hid protocol with a reset GPIO. + Supports the Elan eKTH6915 and other I2C-HID touchscreen controllers. + These touchscreen controller use the i2c-hid protocol with a reset GPIO. allOf: - $ref: /schemas/input/touchscreen/touchscreen.yaml# @@ -23,12 +23,14 @@ properties: - enum: - elan,ekth5015m - const: elan,ekth6915 + - items: + - const: elan,ekth8d18 + - const: elan,ekth6a12nay - enum: - elan,ekth6915 - elan,ekth6a12nay - reg: - const: 0x10 + reg: true interrupts: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 0ae415f1e69c..2ac709a4c472 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -24,7 +24,7 @@ properties: - allwinner,sun50i-a100-emac - allwinner,sun50i-h6-emac - allwinner,sun50i-h616-emac0 - - allwinner,sun55i-a523-emac0 + - allwinner,sun55i-a523-gmac0 - const: allwinner,sun50i-a64-emac reg: diff --git a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml index c5d8dfe5b801..ec34daff2aa0 100644 --- a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml +++ b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml @@ -59,6 +59,7 @@ properties: - const: ptp_ref iommus: + minItems: 1 maxItems: 2 phy-mode: diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml index 51205f9f2985..815a90808901 100644 --- a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml +++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml @@ -136,6 +136,16 @@ properties: See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt for details for the regulator setup on these boards. + mdio: + $ref: /schemas/net/mdio.yaml# + unevaluatedProperties: false + + properties: + mediatek,pio: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle pointing to the mediatek pinctrl node. + mediatek,mcm: type: boolean description: @@ -190,6 +200,18 @@ required: - reg $defs: + builtin-dsa-port: + patternProperties: + "^(ethernet-)?ports$": + patternProperties: + "^(ethernet-)?port@[0-6]$": + if: + required: [ ethernet ] + then: + properties: + phy-mode: + const: internal + mt7530-dsa-port: patternProperties: "^(ethernet-)?ports$": @@ -297,7 +319,7 @@ allOf: - airoha,en7581-switch - airoha,an7583-switch then: - $ref: "#/$defs/mt7530-dsa-port" + $ref: "#/$defs/builtin-dsa-port" properties: gpio-controller: false mediatek,mcm: false diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 7cbf11bbe99c..66b1cfbbfe22 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -39,6 +39,7 @@ properties: # MAC. - internal - mii + - mii-lite - gmii - sgmii - psgmii diff --git a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml index 55d6a8379025..d14410018bcf 100644 --- a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml +++ b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml @@ -6,9 +6,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Faraday Technology FTGMAC100 gigabit ethernet controller -allOf: - - $ref: ethernet-controller.yaml# - maintainers: - Po-Yu Chuang <ratbert@faraday-tech.com> @@ -35,6 +32,9 @@ properties: - description: MAC IP clock - description: RMII RCLK gate for AST2500/2600 + resets: + maxItems: 1 + clock-names: minItems: 1 items: @@ -74,6 +74,21 @@ required: - reg - interrupts +allOf: + - $ref: ethernet-controller.yaml# + - if: + properties: + compatible: + contains: + enum: + - aspeed,ast2600-mac + then: + properties: + resets: true + else: + properties: + resets: false + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml index 4fdc5328826c..8689de1aaea1 100644 --- a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml +++ b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml @@ -47,6 +47,8 @@ properties: phy-handle: true + fixed-link: true + intel,npe-handle: $ref: /schemas/types.yaml#/definitions/phandle-array items: diff --git a/Documentation/devicetree/bindings/net/mediatek,net.yaml b/Documentation/devicetree/bindings/net/mediatek,net.yaml index 9e02fd80af83..b45f67f92e80 100644 --- a/Documentation/devicetree/bindings/net/mediatek,net.yaml +++ b/Documentation/devicetree/bindings/net/mediatek,net.yaml @@ -40,7 +40,19 @@ properties: interrupts: minItems: 1 - maxItems: 4 + maxItems: 8 + + interrupt-names: + minItems: 1 + items: + - const: fe0 + - const: fe1 + - const: fe2 + - const: fe3 + - const: pdma0 + - const: pdma1 + - const: pdma2 + - const: pdma3 power-domains: maxItems: 1 @@ -54,6 +66,10 @@ properties: - const: gmac - const: ppe + sram: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle to mmio SRAM + mediatek,ethsys: $ref: /schemas/types.yaml#/definitions/phandle description: @@ -135,6 +151,10 @@ allOf: minItems: 3 maxItems: 3 + interrupt-names: + minItems: 3 + maxItems: 3 + clocks: minItems: 4 maxItems: 4 @@ -146,6 +166,8 @@ allOf: - const: gp1 - const: gp2 + sram: false + mediatek,infracfg: false mediatek,wed: false @@ -166,6 +188,9 @@ allOf: interrupts: maxItems: 1 + interrupt-names: + maxItems: 1 + clocks: minItems: 2 maxItems: 2 @@ -175,6 +200,8 @@ allOf: - const: ethif - const: fe + sram: false + mediatek,infracfg: false mediatek,wed: false @@ -192,6 +219,10 @@ allOf: minItems: 3 maxItems: 3 + interrupt-names: + minItems: 3 + maxItems: 3 + clocks: minItems: 11 maxItems: 11 @@ -210,6 +241,8 @@ allOf: - const: sgmii_ck - const: eth2pll + sram: false + mediatek,infracfg: false mediatek,sgmiisys: @@ -232,6 +265,10 @@ allOf: minItems: 3 maxItems: 3 + interrupt-names: + minItems: 3 + maxItems: 3 + clocks: minItems: 17 maxItems: 17 @@ -256,6 +293,8 @@ allOf: - const: sgmii_ck - const: eth2pll + sram: false + mediatek,sgmiisys: minItems: 2 maxItems: 2 @@ -272,7 +311,10 @@ allOf: then: properties: interrupts: - minItems: 4 + minItems: 8 + + interrupt-names: + minItems: 8 clocks: minItems: 15 @@ -310,7 +352,10 @@ allOf: then: properties: interrupts: - minItems: 4 + minItems: 8 + + interrupt-names: + minItems: 8 clocks: minItems: 15 @@ -348,7 +393,10 @@ allOf: then: properties: interrupts: - minItems: 4 + minItems: 8 + + interrupt-names: + minItems: 8 clocks: minItems: 24 @@ -382,7 +430,7 @@ allOf: - const: xgp3 patternProperties: - "^mac@[0-1]$": + "^mac@[0-2]$": type: object unevaluatedProperties: false allOf: @@ -507,7 +555,11 @@ examples: interrupts = <GIC_SPI 196 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 197 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 199 IRQ_TYPE_LEVEL_HIGH>; + <GIC_SPI 199 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 192 IRQ_TYPE_LEVEL_HIGH>; clocks = <ðsys CLK_ETH_FE_EN>, <ðsys CLK_ETH_GP2_EN>, <ðsys CLK_ETH_GP1_EN>, diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 90b79283e228..4e3cbaa06229 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -30,6 +30,7 @@ select: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.00a - snps,dwmac-5.10a - snps,dwmac-5.20 - snps,dwmac-5.30a @@ -98,11 +99,13 @@ properties: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.00a - snps,dwmac-5.10a - snps,dwmac-5.20 - snps,dwmac-5.30a - snps,dwxgmac - snps,dwxgmac-2.10 + - sophgo,sg2042-dwmac - sophgo,sg2044-dwmac - starfive,jh7100-dwmac - starfive,jh7110-dwmac @@ -641,6 +644,7 @@ allOf: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.00a - snps,dwmac-5.10a - snps,dwmac-5.20 - snps,dwmac-5.30a diff --git a/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml new file mode 100644 index 000000000000..b89456f0ef83 --- /dev/null +++ b/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/sophgo,cv1800b-dwmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo CV1800B DWMAC glue layer + +maintainers: + - Inochi Amaoto <inochiama@gmail.com> + +select: + properties: + compatible: + contains: + enum: + - sophgo,cv1800b-dwmac + required: + - compatible + +properties: + compatible: + items: + - const: sophgo,cv1800b-dwmac + - const: snps,dwmac-3.70a + + reg: + maxItems: 1 + + clocks: + items: + - description: GMAC main clock + - description: PTP clock + + clock-names: + items: + - const: stmmaceth + - const: ptp_ref + + interrupts: + maxItems: 1 + + interrupt-names: + maxItems: 1 + + resets: + maxItems: 1 + + reset-names: + const: stmmaceth + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - resets + - reset-names + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/irq.h> + + ethernet@4070000 { + compatible = "sophgo,cv1800b-dwmac", "snps,dwmac-3.70a"; + reg = <0x04070000 0x10000>; + clocks = <&clk 35>, <&clk 36>; + clock-names = "stmmaceth", "ptp_ref"; + interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + phy-handle = <&internal_ephy>; + phy-mode = "internal"; + resets = <&rst 12>; + reset-names = "stmmaceth"; + rx-fifo-depth = <8192>; + tx-fifo-depth = <8192>; + snps,multicast-filter-bins = <0>; + snps,perfect-filter-entries = <1>; + snps,aal; + snps,txpbl = <8>; + snps,rxpbl = <8>; + snps,mtl-rx-config = <&gmac0_mtl_rx_setup>; + snps,mtl-tx-config = <&gmac0_mtl_tx_setup>; + snps,axi-config = <&gmac0_stmmac_axi_setup>; + + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + }; + + gmac0_mtl_rx_setup: rx-queues-config { + snps,rx-queues-to-use = <1>; + queue0 {}; + }; + + gmac0_mtl_tx_setup: tx-queues-config { + snps,tx-queues-to-use = <1>; + queue0 {}; + }; + + gmac0_stmmac_axi_setup: stmmac-axi-config { + snps,blen = <16 8 4 0 0 0 0>; + snps,rd_osr_lmt = <2>; + snps,wr_osr_lmt = <1>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml index 4dd2dc9c678b..ce21979a2d9a 100644 --- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml @@ -15,14 +15,19 @@ select: contains: enum: - sophgo,sg2044-dwmac + - sophgo,sg2042-dwmac required: - compatible properties: compatible: - items: - - const: sophgo,sg2044-dwmac - - const: snps,dwmac-5.30a + oneOf: + - items: + - const: sophgo,sg2042-dwmac + - const: snps,dwmac-5.00a + - items: + - const: sophgo,sg2044-dwmac + - const: snps,dwmac-5.30a reg: maxItems: 1 @@ -80,6 +85,8 @@ examples: interrupt-parent = <&intc>; interrupts = <296 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; + phy-handle = <&phy0>; + phy-mode = "rgmii-id"; resets = <&rst 30>; reset-names = "stmmaceth"; snps,multicast-filter-bins = <0>; @@ -91,7 +98,6 @@ examples: snps,mtl-rx-config = <&gmac0_mtl_rx_setup>; snps,mtl-tx-config = <&gmac0_mtl_tx_setup>; snps,axi-config = <&gmac0_stmmac_axi_setup>; - status = "disabled"; gmac0_mtl_rx_setup: rx-queues-config { snps,rx-queues-to-use = <8>; diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index 33d2016b6509..c6bc27709bf7 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -45,7 +45,7 @@ allOf: - ns16550 - ns16550a then: - anyOf: + oneOf: - required: [ clock-frequency ] - required: [ clocks ] diff --git a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt b/Documentation/devicetree/bindings/serial/altera_jtaguart.txt deleted file mode 100644 index 55a901051e8f..000000000000 --- a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt +++ /dev/null @@ -1,5 +0,0 @@ -Altera JTAG UART - -Required properties: -- compatible : should be "ALTR,juart-1.0" <DEPRECATED> -- compatible : should be "altr,juart-1.0" diff --git a/Documentation/devicetree/bindings/serial/altera_uart.txt b/Documentation/devicetree/bindings/serial/altera_uart.txt deleted file mode 100644 index 81bf7ffb1a81..000000000000 --- a/Documentation/devicetree/bindings/serial/altera_uart.txt +++ /dev/null @@ -1,8 +0,0 @@ -Altera UART - -Required properties: -- compatible : should be "ALTR,uart-1.0" <DEPRECATED> -- compatible : should be "altr,uart-1.0" - -Optional properties: -- clock-frequency : frequency of the clock input to the UART diff --git a/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml b/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml new file mode 100644 index 000000000000..02e20fa591da --- /dev/null +++ b/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/altr,juart-1.0.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Altera JTAG UART + +maintainers: + - Dinh Nguyen <dinguyen@kernel.org> + +properties: + compatible: + const: altr,juart-1.0 + +required: + - compatible + +additionalProperties: false diff --git a/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml b/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml new file mode 100644 index 000000000000..72d4972e1e22 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/altr,uart-1.0.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Altera UART + +maintainers: + - Dinh Nguyen <dinguyen@kernel.org> + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: altr,uart-1.0 + + clock-frequency: + description: Frequency of the clock input to the UART. + +required: + - compatible + +unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml index 234089b5954d..b43df10c5ef4 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas//soc/fsl/fsl,ls1028a-reset.yaml# +$id: http://devicetree.org/schemas/soc/fsl/fsl,ls1028a-reset.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Layerscape Reset Registers Module diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 49e782a33eb6..c38c03c624f0 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -158,6 +158,35 @@ definitions: - name: pse-event-sw-pw-control-error doc: PSE faced an error managing the power control from software + - + name: rxfh-fields + name-prefix: rxh- + enum-name: + header: linux/ethtool.h + type: flags + entries: + - + name: l2da + value: 1 + - + name: vlan + - + name: l3-proto + - + name: ip-src + - + name: ip-dst + - + name: l4-b-0-1 + doc: src port in case of TCP/UDP/SCTP + - + name: l4-b-2-3 + doc: dst port in case of TCP/UDP/SCTP + - + name: gtp-teid + - + name: discard + value: 31 attribute-sets: - @@ -1448,6 +1477,123 @@ attribute-sets: type: u32 name-prefix: ethtool-a- - + name: flow + attr-cnt-name: --ethtool-a-flow-cnt + doc: | + Flow types, corresponding to those defined in the old + ethtool header for RXFH and RXNFC as ${PROTO}_FLOW. + The values are not matching the old ones to avoid carrying + into Netlink the IP_USER_FLOW vs IPV4_FLOW vs IPV4_USER_FLOW confusion. + attributes: + - + name: ether + type: uint + enum: rxfh-fields + - + name: ip4 + type: uint + enum: rxfh-fields + - + name: ip6 + type: uint + enum: rxfh-fields + - + name: tcp4 + type: uint + enum: rxfh-fields + - + name: tcp6 + type: uint + enum: rxfh-fields + - + name: udp4 + type: uint + enum: rxfh-fields + - + name: udp6 + type: uint + enum: rxfh-fields + - + name: sctp4 + type: uint + enum: rxfh-fields + - + name: sctp6 + type: uint + enum: rxfh-fields + - + name: ah4 + type: uint + enum: rxfh-fields + - + name: ah6 + type: uint + enum: rxfh-fields + - + name: esp4 + type: uint + enum: rxfh-fields + - + name: esp6 + type: uint + enum: rxfh-fields + - + name: ah-esp4 + type: uint + enum: rxfh-fields + - + name: ah-esp6 + type: uint + enum: rxfh-fields + - + name: gtpu4 + type: uint + enum: rxfh-fields + - + name: gtpu6 + type: uint + enum: rxfh-fields + - + name: gtpc4 + type: uint + enum: rxfh-fields + - + name: gtpc6 + type: uint + enum: rxfh-fields + - + name: gtpc-teid4 + type: uint + enum: rxfh-fields + - + name: gtpc-teid6 + type: uint + enum: rxfh-fields + - + name: gtpu-eh4 + type: uint + enum: rxfh-fields + - + name: gtpu-eh6 + type: uint + enum: rxfh-fields + - + name: gtpu-ul4 + type: uint + enum: rxfh-fields + - + name: gtpu-ul6 + type: uint + enum: rxfh-fields + - + name: gtpu-dl4 + type: uint + enum: rxfh-fields + - + name: gtpu-dl6 + type: uint + enum: rxfh-fields + - name: rss attr-cnt-name: __ethtool-a-rss-cnt attributes: @@ -1478,6 +1624,10 @@ attribute-sets: - name: start-context type: u32 + - + name: flow-hash + type: nest + nested-attributes: flow - name: plca attr-cnt-name: __ethtool-a-plca-cnt @@ -2307,6 +2457,7 @@ operations: - indir - hkey - input-xfrm + - flow-hash dump: request: attributes: diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml index 39ed1661c7f1..95c3fade7a8d 100644 --- a/Documentation/netlink/specs/handshake.yaml +++ b/Documentation/netlink/specs/handshake.yaml @@ -71,6 +71,9 @@ attribute-sets: - name: peername type: string + - + name: keyring + type: u32 - name: done attributes: @@ -109,6 +112,7 @@ operations: - peer-identity - certificate - peername + - keyring - name: done doc: Handler reports handshake completion diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index dceeb0d763aa..50d92084a49c 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -209,13 +209,10 @@ Libbpf Libbpf is a helper library for eBPF and XDP that makes using these technologies a lot simpler. It also contains specific helper functions -in tools/lib/bpf/xsk.h for facilitating the use of AF_XDP. It -contains two types of functions: those that can be used to make the -setup of AF_XDP socket easier and ones that can be used in the data -plane to access the rings safely and quickly. To see an example on how -to use this API, please take a look at the sample application in -samples/bpf/xdpsock_usr.c which uses libbpf for both setup and data -plane operations. +in tools/testing/selftests/bpf/xsk.h for facilitating the use of +AF_XDP. It contains two types of functions: those that can be used to +make the setup of AF_XDP socket easier and ones that can be used in the +data plane to access the rings safely and quickly. We recommend that you use this library unless you have become a power user. It will make your program a lot simpler. @@ -372,9 +369,8 @@ needs to explicitly notify the kernel to send any packets put on the TX ring. This can be accomplished either by a poll() call, as in the RX path, or by calling sendto(). -An example of how to use this flag can be found in -samples/bpf/xdpsock_user.c. An example with the use of libbpf helpers -would look like this for the TX path: +An example with the use of libbpf helpers would look like this for the +TX path: .. code-block:: c @@ -442,6 +438,15 @@ is created by a privileged process and passed to a non-privileged one. Once the option is set, kernel will refuse attempts to bind that socket to a different interface. Updating the value requires CAP_NET_RAW. +XDP_MAX_TX_SKB_BUDGET setsockopt +-------------------------------- + +This setsockopt sets the maximum number of descriptors that can be handled +and passed to the driver at one send syscall. It is applied in the copy +mode to allow application to tune the per-socket maximum iteration for +better throughput and less frequency of send syscall. +Allowed range is [32, xs->tx->nentries]. + XDP_STATISTICS getsockopt ------------------------- @@ -549,12 +554,12 @@ later in this document. Usage ----- -In order to use AF_XDP sockets two parts are needed. The -user-space application and the XDP program. For a complete setup and -usage example, please refer to the sample application. The user-space -side is xdpsock_user.c and the XDP side is part of libbpf. +In order to use AF_XDP sockets two parts are needed. The user-space +application and the XDP program. For a complete setup and usage example, +please refer to the xdp-project at +https://github.com/xdp-project/bpf-examples/tree/main/AF_XDP-example. -The XDP code sample included in tools/lib/bpf/xsk.c is the following: +The XDP code sample is the following: .. code-block:: c @@ -752,11 +757,12 @@ to facilitate extending a zero-copy driver with multi-buffer support. Sample application ================== - -There is a xdpsock benchmarking/test application included that -demonstrates how to use AF_XDP sockets with private UMEMs. Say that -you would like your UDP traffic from port 4242 to end up in queue 16, -that we will enable AF_XDP on. Here, we use ethtool for this:: +There is a xdpsock benchmarking/test application that can be found at +https://github.com/xdp-project/bpf-examples/tree/main/AF_XDP-example +that demonstrates how to use AF_XDP sockets with private +UMEMs. Say that you would like your UDP traffic from port 4242 to end +up in queue 16, that we will enable AF_XDP on. Here, we use ethtool +for this:: ethtool -N p3p2 rx-flow-hash udp4 fn ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \ @@ -773,7 +779,7 @@ can be displayed with "-h", as usual. This sample application uses libbpf to make the setup and usage of AF_XDP simpler. If you want to know how the raw uapi of AF_XDP is really used to make something more advanced, take a look at the libbpf -code in tools/lib/bpf/xsk.[ch]. +code in tools/testing/selftests/bpf/xsk.[ch]. FAQ ======= diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index a4c1291d2561..f8f5766703d4 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -562,6 +562,12 @@ lacp_rate The default is slow. +broadcast_neighbor + + Option specifying whether to broadcast ARP/ND packets to all + active slaves. This option has no effect in modes other than + 802.3ad mode. The default is off (0). + max_bonds Specifies the number of bonding devices to create for this @@ -767,8 +773,9 @@ num_unsol_na greater than 1. The valid range is 0 - 255; the default value is 1. These options - affect only the active-backup mode. These options were added for - bonding versions 3.3.0 and 3.4.0 respectively. + affect the active-backup or 802.3ad (broadcast_neighbor enabled) mode. + These options were added for bonding versions 3.3.0 and 3.4.0 + respectively. From Linux 3.0 and bonding version 3.7.1, these notifications are generated by the ipv4 and ipv6 code and the numbers of diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 139b4c75a191..40ac552641a3 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -58,7 +58,9 @@ Contents: ti/tlan ti/icssg_prueth wangxun/txgbe + wangxun/txgbevf wangxun/ngbe + wangxun/ngbevf .. only:: subproject and html diff --git a/Documentation/networking/device_drivers/ethernet/wangxun/ngbevf.rst b/Documentation/networking/device_drivers/ethernet/wangxun/ngbevf.rst new file mode 100644 index 000000000000..a39e3d5a1038 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/wangxun/ngbevf.rst @@ -0,0 +1,16 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +================================================================== +Linux Base Virtual Function Driver for Wangxun(R) Gigabit Ethernet +================================================================== + +WangXun Gigabit Virtual Function Linux driver. +Copyright(c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. + +Support +======= +For general information, go to the website at: +https://www.net-swift.com + +If you got any problem, contact Wangxun support team via nic-support@net-swift.com +and Cc: netdev. diff --git a/Documentation/networking/device_drivers/ethernet/wangxun/txgbevf.rst b/Documentation/networking/device_drivers/ethernet/wangxun/txgbevf.rst new file mode 100644 index 000000000000..b2f759b7b518 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/wangxun/txgbevf.rst @@ -0,0 +1,16 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +=========================================================================== +Linux Base Virtual Function Driver for Wangxun(R) 10/25/40 Gigabit Ethernet +=========================================================================== + +WangXun 10/25/40 Gigabit Virtual Function Linux driver. +Copyright(c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. + +Support +======= +For general information, go to the website at: +https://www.net-swift.com + +If you got any problem, contact Wangxun support team via nic-support@net-swift.com +and Cc: netdev. diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index 3da8f4ef2417..211b58177e12 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -140,3 +140,6 @@ own name. * - ``enable_phc`` - Boolean - Enable PHC (PTP Hardware Clock) functionality in the device. + * - ``clock_id`` + - u64 + - Clock ID used by the device for registering DPLL devices and pins. diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index 8319f43b5933..250ae71f4023 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -98,3 +98,4 @@ parameters, info versions, and other features it supports. iosm octeontx2 sfc + zl3073x diff --git a/Documentation/networking/devlink/zl3073x.rst b/Documentation/networking/devlink/zl3073x.rst new file mode 100644 index 000000000000..4b6cfaf38643 --- /dev/null +++ b/Documentation/networking/devlink/zl3073x.rst @@ -0,0 +1,51 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +zl3073x devlink support +======================= + +This document describes the devlink features implemented by the ``zl3073x`` +device driver. + +Parameters +========== + +.. list-table:: Generic parameters implemented + :widths: 5 5 90 + + * - Name + - Mode + - Notes + * - ``clock_id`` + - driverinit + - Set the clock ID that is used by the driver for registering DPLL devices + and pins. + +Info versions +============= + +The ``zl3073x`` driver reports the following versions + +.. list-table:: devlink info versions implemented + :widths: 5 5 5 90 + + * - Name + - Type + - Example + - Description + * - ``asic.id`` + - fixed + - 1E94 + - Chip identification number + * - ``asic.rev`` + - fixed + - 300 + - Chip revision number + * - ``fw`` + - running + - 7006 + - Firmware version number + * - ``custom_cfg`` + - running + - 1.3.0.1 + - Device configuration version customized by OEM diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 07e9808ebd2c..248bc3d93da9 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1969,14 +1969,15 @@ used to ignore context 0s and only dump additional contexts). Kernel response contents: -===================================== ====== ========================== +===================================== ====== =============================== ``ETHTOOL_A_RSS_HEADER`` nested reply header ``ETHTOOL_A_RSS_CONTEXT`` u32 context number ``ETHTOOL_A_RSS_HFUNC`` u32 RSS hash func ``ETHTOOL_A_RSS_INDIR`` binary Indir table bytes ``ETHTOOL_A_RSS_HKEY`` binary Hash key bytes ``ETHTOOL_A_RSS_INPUT_XFRM`` u32 RSS input data transformation -===================================== ====== ========================== + ``ETHTOOL_A_RSS_FLOW_HASH`` nested Header fields included in hash +===================================== ====== =============================== ETHTOOL_A_RSS_HFUNC attribute is bitmap indicating the hash function being used. Current supported options are toeplitz, xor or crc32. @@ -1985,6 +1986,8 @@ indicates queue number. ETHTOOL_A_RSS_INPUT_XFRM attribute is a bitmap indicating the type of transformation applied to the input protocol fields before given to the RSS hfunc. Current supported options are symmetric-xor and symmetric-or-xor. +ETHTOOL_A_RSS_FLOW_HASH carries per-flow type bitmask of which header +fields are included in the hash calculation. PLCA_GET_CFG ============ @@ -2436,7 +2439,7 @@ are netlink only. ``ETHTOOL_SFLAGS`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_GET`` ``ETHTOOL_SPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_SET`` - ``ETHTOOL_GRXFH`` n/a + ``ETHTOOL_GRXFH`` ``ETHTOOL_MSG_RSS_GET`` ``ETHTOOL_SRXFH`` n/a ``ETHTOOL_GGRO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SGRO`` ``ETHTOOL_MSG_FEATURES_SET`` diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 9af5a8935d57..14700ea77e75 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -431,7 +431,7 @@ tcp_dsack - BOOLEAN tcp_early_retrans - INTEGER Tail loss probe (TLP) converts RTOs occurring due to tail - losses into fast recovery (draft-ietf-tcpm-rack). Note that + losses into fast recovery (RFC8985). Note that TLP requires RACK to function properly (see tcp_recovery below) Possible values: @@ -1891,10 +1891,10 @@ proxy_arp_pvlan - BOOLEAN This technology is known by different names: - In RFC 3069 it is called VLAN Aggregation. - Cisco and Allied Telesyn call it Private VLAN. - Hewlett-Packard call it Source-Port filtering or port-isolation. - Ericsson call it MAC-Forced Forwarding (RFC Draft). + - In RFC 3069 it is called VLAN Aggregation. + - Cisco and Allied Telesyn call it Private VLAN. + - Hewlett-Packard call it Source-Port filtering or port-isolation. + - Ericsson call it MAC-Forced Forwarding (RFC Draft). proxy_delay - INTEGER Delay proxy response. @@ -2487,8 +2487,10 @@ fib_notify_on_flag_change - INTEGER ioam6_id - INTEGER Define the IOAM id of this node. Uses only 24 bits out of 32 in total. - Min: 0 - Max: 0xFFFFFF + Possible value range: + + - Min: 0 + - Max: 0xFFFFFF Default: 0xFFFFFF @@ -2496,8 +2498,10 @@ ioam6_id_wide - LONG INTEGER Define the wide IOAM id of this node. Uses only 56 bits out of 64 in total. Can be different from ioam6_id. - Min: 0 - Max: 0xFFFFFFFFFFFFFF + Possible value range: + + - Min: 0 + - Max: 0xFFFFFFFFFFFFFF Default: 0xFFFFFFFFFFFFFF @@ -3322,31 +3326,27 @@ pf_enable - INTEGER https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover for details. - 1: Enable pf. + Possible values: - 0: Disable pf. + - 1: Enable pf. + - 0: Disable pf. Default: 1 pf_expose - INTEGER Unset or enable/disable pf (pf is short for potentially failed) state exposure. Applications can control the exposure of the PF path state - in the SCTP_PEER_ADDR_CHANGE event and the SCTP_GET_PEER_ADDR_INFO - sockopt. When it's unset, no SCTP_PEER_ADDR_CHANGE event with - SCTP_ADDR_PF state will be sent and a SCTP_PF-state transport info - can be got via SCTP_GET_PEER_ADDR_INFO sockopt; When it's enabled, - a SCTP_PEER_ADDR_CHANGE event will be sent for a transport becoming - SCTP_PF state and a SCTP_PF-state transport info can be got via - SCTP_GET_PEER_ADDR_INFO sockopt; When it's disabled, no - SCTP_PEER_ADDR_CHANGE event will be sent and it returns -EACCES when - trying to get a SCTP_PF-state transport info via SCTP_GET_PEER_ADDR_INFO - sockopt. - - 0: Unset pf state exposure, Compatible with old applications. + in the SCTP_PEER_ADDR_CHANGE event and access of SCTP_PF-state + transport info via SCTP_GET_PEER_ADDR_INFO sockopt. - 1: Disable pf state exposure. + Possible values: - 2: Enable pf state exposure. + - 0: Unset pf state exposure (compatible with old applications). No + event will be sent but the transport info can be queried. + - 1: Disable pf state exposure. No event will be sent and trying to + obtain transport info will return -EACCESS. + - 2: Enable pf state exposure. The event will be sent for a transport + becoming SCTP_PF state and transport info can be obtained. Default: 0 @@ -3542,13 +3542,11 @@ sndbuf_policy - INTEGER sctp_mem - vector of 3 INTEGERs: min, pressure, max Number of pages allowed for queueing by all SCTP sockets. - min: Below this number of pages SCTP is not bothered about its - memory appetite. When amount of memory allocated by SCTP exceeds - this number, SCTP starts to moderate memory usage. - - pressure: This value was introduced to follow format of tcp_mem. - - max: Number of pages allowed for queueing by all SCTP sockets. + * min: Below this number of pages SCTP is not bothered about its + memory usage. When amount of memory allocated by SCTP exceeds + this number, SCTP starts to moderate memory usage. + * pressure: This value was introduced to follow format of tcp_mem. + * max: Maximum number of allowed pages. Default is calculated at boot time from amount of available memory. @@ -3556,9 +3554,9 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max Only the first value ("min") is used, "default" and "max" are ignored. - min: Minimal size of receive buffer used by SCTP socket. - It is guaranteed to each SCTP socket (but not association) even - under moderate memory pressure. + * min: Minimal size of receive buffer used by SCTP socket. + It is guaranteed to each SCTP socket (but not association) even + under moderate memory pressure. Default: 4K @@ -3566,14 +3564,16 @@ sctp_wmem - vector of 3 INTEGERs: min, default, max Only the first value ("min") is used, "default" and "max" are ignored. - min: Minimum size of send buffer that can be used by SCTP sockets. - It is guaranteed to each SCTP socket (but not association) even - under moderate memory pressure. + * min: Minimum size of send buffer that can be used by SCTP sockets. + It is guaranteed to each SCTP socket (but not association) even + under moderate memory pressure. Default: 4K addr_scope_policy - INTEGER - Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 + Control IPv4 address scoping (see + https://datatracker.ietf.org/doc/draft-stewart-tsvwg-sctp-ipv4/00/ + for details). - 0 - Disable IPv4 address scoping - 1 - Enable IPv4 address scoping diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst index f64641417c54..7f159043ad5a 100644 --- a/Documentation/networking/phy.rst +++ b/Documentation/networking/phy.rst @@ -333,6 +333,13 @@ Some of the interface modes are described below: SerDes lane, each port having speeds of 2.5G / 1G / 100M / 10M achieved through symbol replication. The PCS expects the standard USXGMII code word. +``PHY_INTERFACE_MODE_MIILITE`` + Non-standard, simplified MII mode, without TXER, RXER, CRS and COL signals + as defined for the MII. The absence of COL signal makes half-duplex link + modes impossible but does not interfere with BroadR-Reach link modes on + Broadcom (and other two-wire Ethernet) PHYs, because they are full-duplex + only. + Pause frames / flow control =========================== diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index c7904a1bc167..36cc7afc2527 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -16,11 +16,13 @@ User interface Creating a TLS connection ------------------------- -First create a new TCP socket and set the TLS ULP. +First create a new TCP socket and once the connection is established set the +TLS ULP. .. code-block:: c sock = socket(AF_INET, SOCK_STREAM, 0); + connect(sock, addr, addrlen); setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls")); Setting the TLS ULP allows us to set/get TLS socket options. Currently diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 1ac62dc3a66f..e1755610b4bc 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -312,7 +312,7 @@ Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). Co-posting selftests --------------------- +~~~~~~~~~~~~~~~~~~~~ Selftests should be part of the same series as the code changes. Specifically for fixes both code change and related test should go into diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9abf93ee5f65..43ed57e048a8 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7196,6 +7196,10 @@ The valid value for 'flags' is: u64 leaf; u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + u64 ret; + u64 vector; + } setup_event_notify; }; } tdx; @@ -7210,21 +7214,24 @@ number from register R11. The remaining field of the union provide the inputs and outputs of the TDVMCALL. Currently the following values of ``nr`` are defined: -* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote -signed by a service hosting TD-Quoting Enclave operating on the host. -Parameters and return value are in the ``get_quote`` field of the union. -The ``gpa`` field and ``size`` specify the guest physical address -(without the shared bit set) and the size of a shared-memory buffer, in -which the TDX guest passes a TD Report. The ``ret`` field represents -the return value of the GetQuote request. When the request has been -queued successfully, the TDX guest can poll the status field in the -shared-memory area to check whether the Quote generation is completed or -not. When completed, the generated Quote is returned via the same buffer. - -* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support -status of TDVMCALLs. The output values for the given leaf should be -placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` -field of the union. + * ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote + signed by a service hosting TD-Quoting Enclave operating on the host. + Parameters and return value are in the ``get_quote`` field of the union. + The ``gpa`` field and ``size`` specify the guest physical address + (without the shared bit set) and the size of a shared-memory buffer, in + which the TDX guest passes a TD Report. The ``ret`` field represents + the return value of the GetQuote request. When the request has been + queued successfully, the TDX guest can poll the status field in the + shared-memory area to check whether the Quote generation is completed or + not. When completed, the generated Quote is returned via the same buffer. + + * ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support + status of TDVMCALLs. The output values for the given leaf should be + placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` + field of the union. + +* ``TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT``: the guest has requested to +set up a notification interrupt for vector ``vector``. KVM may add support for more values in the future that may cause a userspace exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case, diff --git a/Documentation/virt/kvm/x86/intel-tdx.rst b/Documentation/virt/kvm/x86/intel-tdx.rst index 76bdd95334d6..5efac62c92c7 100644 --- a/Documentation/virt/kvm/x86/intel-tdx.rst +++ b/Documentation/virt/kvm/x86/intel-tdx.rst @@ -79,7 +79,20 @@ to be configured to the TDX guest. struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + /* TDG.VP.VMCALL hypercalls executed in kernel and forwarded to + * userspace, respectively + */ + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + + /* TDG.VP.VMCALL instruction executions subfunctions executed in kernel + * and forwarded to userspace, respectively + */ + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/Documentation/wmi/acpi-interface.rst b/Documentation/wmi/acpi-interface.rst index f1b28835d23c..1ef003b033bf 100644 --- a/Documentation/wmi/acpi-interface.rst +++ b/Documentation/wmi/acpi-interface.rst @@ -36,7 +36,7 @@ Offset Size (in bytes) Content The WMI object flags control whether the method or notification ID is used: -- 0x1: Data block usage is expensive and must be explicitly enabled/disabled. +- 0x1: Data block is expensive to collect. - 0x2: Data block contains WMI methods. - 0x4: Data block contains ASCIZ string. - 0x8: Data block describes a WMI event, use notification ID instead @@ -83,14 +83,18 @@ event as hexadecimal value. Their first parameter is an integer with a value of 0 if the WMI event should be disabled, other values will enable the WMI event. +Those ACPI methods are always called even for WMI events not registered as +being expensive to collect to match the behavior of the Windows driver. + WCxx ACPI methods ----------------- -Similar to the ``WExx`` ACPI methods, except that it controls data collection -instead of events and thus the last two characters of the ACPI method name are -the method ID of the data block to enable/disable. +Similar to the ``WExx`` ACPI methods, except that instead of WMI events it controls +data collection of data blocks registered as being expensive to collect. Thus the +last two characters of the ACPI method name are the method ID of the data block +to enable/disable. Those ACPI methods are also called before setting data blocks to match the -behaviour of the Windows driver. +behavior of the Windows driver. _WED ACPI method ---------------- diff --git a/MAINTAINERS b/MAINTAINERS index 53486c178bd4..ed0e73feaa60 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4186,6 +4186,7 @@ F: include/linux/cpumask_types.h F: include/linux/find.h F: include/linux/nodemask.h F: include/linux/nodemask_types.h +F: include/uapi/linux/bits.h F: include/vdso/bits.h F: lib/bitmap-str.c F: lib/bitmap.c @@ -4198,6 +4199,7 @@ F: tools/include/linux/bitfield.h F: tools/include/linux/bitmap.h F: tools/include/linux/bits.h F: tools/include/linux/find.h +F: tools/include/uapi/linux/bits.h F: tools/include/vdso/bits.h F: tools/lib/bitmap.c F: tools/lib/find_bit.c @@ -4858,6 +4860,12 @@ F: drivers/firmware/broadcom/tee_bnxt_fw.c F: drivers/net/ethernet/broadcom/bnxt/ F: include/linux/firmware/broadcom/tee_bnxt_fw.h +BROADCOM BNG_EN 800 GIGABIT ETHERNET DRIVER +M: Vikas Gupta <vikas.gupta@broadcom.com> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/broadcom/bnge/ + BROADCOM BRCM80211 IEEE802.11 WIRELESS DRIVERS M: Arend van Spriel <arend.vanspriel@broadcom.com> L: linux-wireless@vger.kernel.org @@ -7329,6 +7337,8 @@ M: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com> M: Jiri Pirko <jiri@resnulli.us> L: netdev@vger.kernel.org S: Supported +F: Documentation/devicetree/bindings/dpll/dpll-device.yaml +F: Documentation/devicetree/bindings/dpll/dpll-pin.yaml F: Documentation/driver-api/dpll.rst F: drivers/dpll/* F: include/linux/dpll.h @@ -15555,6 +15565,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_* MELLANOX ETHERNET DRIVER (mlx5e) M: Saeed Mahameed <saeedm@nvidia.com> M: Tariq Toukan <tariqt@nvidia.com> +M: Mark Bloch <mbloch@nvidia.com> L: netdev@vger.kernel.org S: Maintained W: https://www.nvidia.com/networking/ @@ -15624,6 +15635,7 @@ MELLANOX MLX5 core VPI driver M: Saeed Mahameed <saeedm@nvidia.com> M: Leon Romanovsky <leonro@nvidia.com> M: Tariq Toukan <tariqt@nvidia.com> +M: Mark Bloch <mbloch@nvidia.com> L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Maintained @@ -15681,6 +15693,8 @@ MEMBLOCK AND MEMORY MANAGEMENT INITIALIZATION M: Mike Rapoport <rppt@kernel.org> L: linux-mm@kvack.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git for-next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git fixes F: Documentation/core-api/boot-time-mm.rst F: Documentation/core-api/kho/bindings/memblock/* F: include/linux/memblock.h @@ -15853,6 +15867,17 @@ F: mm/numa.c F: mm/numa_emulation.c F: mm/numa_memblks.c +MEMORY MANAGEMENT - OOM KILLER +M: Michal Hocko <mhocko@suse.com> +R: David Rientjes <rientjes@google.com> +R: Shakeel Butt <shakeel.butt@linux.dev> +L: linux-mm@kvack.org +S: Maintained +F: include/linux/oom.h +F: include/trace/events/oom.h +F: include/uapi/linux/oom.h +F: mm/oom_kill.c + MEMORY MANAGEMENT - PAGE ALLOCATOR M: Andrew Morton <akpm@linux-foundation.org> M: Vlastimil Babka <vbabka@suse.cz> @@ -15867,8 +15892,17 @@ F: include/linux/compaction.h F: include/linux/gfp.h F: include/linux/page-isolation.h F: mm/compaction.c +F: mm/debug_page_alloc.c +F: mm/fail_page_alloc.c F: mm/page_alloc.c +F: mm/page_ext.c +F: mm/page_frag_cache.c F: mm/page_isolation.c +F: mm/page_owner.c +F: mm/page_poison.c +F: mm/page_reporting.c +F: mm/show_mem.c +F: mm/shuffle.c MEMORY MANAGEMENT - RECLAIM M: Andrew Morton <akpm@linux-foundation.org> @@ -15928,9 +15962,9 @@ F: mm/swapfile.c MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) M: Andrew Morton <akpm@linux-foundation.org> M: David Hildenbrand <david@redhat.com> +M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Zi Yan <ziy@nvidia.com> R: Baolin Wang <baolin.wang@linux.alibaba.com> -R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Nico Pache <npache@redhat.com> R: Ryan Roberts <ryan.roberts@arm.com> @@ -16482,6 +16516,14 @@ L: linux-wireless@vger.kernel.org S: Supported F: drivers/net/wireless/microchip/ +MICROCHIP ZL3073X DRIVER +M: Ivan Vecera <ivecera@redhat.com> +M: Prathosh Satish <Prathosh.Satish@microchip.com> +L: netdev@vger.kernel.org +S: Supported +F: Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml +F: drivers/dpll/zl3073x/ + MICROSEMI MIPS SOCS M: Alexandre Belloni <alexandre.belloni@bootlin.com> M: UNGLinuxDriver@microchip.com @@ -16803,8 +16845,8 @@ F: include/dt-bindings/clock/mobileye,eyeq5-clk.h MODULE SUPPORT M: Luis Chamberlain <mcgrof@kernel.org> M: Petr Pavlu <petr.pavlu@suse.com> +M: Daniel Gomez <da.gomez@kernel.org> R: Sami Tolvanen <samitolvanen@google.com> -R: Daniel Gomez <da.gomez@samsung.com> L: linux-modules@vger.kernel.org L: linux-kernel@vger.kernel.org S: Maintained @@ -17203,10 +17245,10 @@ F: drivers/rtc/rtc-ntxec.c F: include/linux/mfd/ntxec.h NETRONOME ETHERNET DRIVERS -M: Louis Peens <louis.peens@corigine.com> R: Jakub Kicinski <kuba@kernel.org> +R: Simon Horman <horms@kernel.org> L: oss-drivers@corigine.com -S: Maintained +S: Odd Fixes F: drivers/net/ethernet/netronome/ NETWORK BLOCK DEVICE (NBD) @@ -19582,8 +19624,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git F: drivers/pinctrl/intel/ PIN CONTROLLER - KEEMBAY -M: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com> -S: Supported +S: Orphan F: drivers/pinctrl/pinctrl-keembay* PIN CONTROLLER - MEDIATEK @@ -20136,21 +20177,15 @@ S: Supported F: Documentation/devicetree/bindings/soc/qcom/qcom,apr* F: Documentation/devicetree/bindings/sound/qcom,* F: drivers/soc/qcom/apr.c -F: include/dt-bindings/sound/qcom,wcd9335.h -F: include/dt-bindings/sound/qcom,wcd934x.h -F: sound/soc/codecs/lpass-rx-macro.* -F: sound/soc/codecs/lpass-tx-macro.* -F: sound/soc/codecs/lpass-va-macro.c -F: sound/soc/codecs/lpass-wsa-macro.* +F: drivers/soundwire/qcom.c +F: include/dt-bindings/sound/qcom,wcd93* +F: sound/soc/codecs/lpass-*.* F: sound/soc/codecs/msm8916-wcd-analog.c F: sound/soc/codecs/msm8916-wcd-digital.c F: sound/soc/codecs/wcd-clsh-v2.* F: sound/soc/codecs/wcd-mbhc-v2.* -F: sound/soc/codecs/wcd9335.* -F: sound/soc/codecs/wcd934x.c -F: sound/soc/codecs/wsa881x.c -F: sound/soc/codecs/wsa883x.c -F: sound/soc/codecs/wsa884x.c +F: sound/soc/codecs/wcd93*.* +F: sound/soc/codecs/wsa88*.* F: sound/soc/qcom/ QCOM EMBEDDED USB DEBUGGER (EUD) @@ -21181,7 +21216,7 @@ M: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml +F: Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c RENESAS RZ/V2H(P) USB2PHY PORT RESET DRIVER @@ -21393,7 +21428,7 @@ N: spacemit K: spacemit RISC-V THEAD SoC SUPPORT -M: Drew Fustini <drew@pdp7.com> +M: Drew Fustini <fustini@kernel.org> M: Guo Ren <guoren@kernel.org> M: Fu Wei <wefu@redhat.com> L: linux-riscv@lists.infradead.org @@ -22569,9 +22604,11 @@ S: Maintained F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS +M: D. Wythe <alibuda@linux.alibaba.com> +M: Dust Li <dust.li@linux.alibaba.com> +M: Sidraya Jayagond <sidraya@linux.ibm.com> M: Wenjia Zhang <wenjia@linux.ibm.com> -M: Jan Karcher <jaka@linux.ibm.com> -R: D. Wythe <alibuda@linux.alibaba.com> +R: Mahanta Jambigi <mjambigi@linux.ibm.com> R: Tony Lu <tonylu@linux.alibaba.com> R: Wen Gu <guwen@linux.alibaba.com> L: linux-rdma@vger.kernel.org @@ -24082,6 +24119,7 @@ M: Bin Du <bin.du@amd.com> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-designware-amdisp.c +F: include/linux/soc/amd/isp4_misc.h SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER M: Jaehoon Chung <jh80.chung@samsung.com> @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 8f1f18adcdb5..5ef57f88df6b 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -152,6 +152,9 @@ #define SO_PASSRIGHTS 83 +#define SO_INQ 84 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..393d71124f5d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -256,6 +256,7 @@ config ARM64 select HOTPLUG_SMT if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING + select JUMP_LABEL select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MODULES_USE_ELF_RELA diff --git a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi index 3df2fd3993b5..9740fbf200f0 100644 --- a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi +++ b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi @@ -20,8 +20,6 @@ compatible = "jedec,spi-nor"; reg = <0x0>; spi-max-frequency = <25000000>; - #address-cells = <1>; - #size-cells = <1>; partitions { compatible = "fixed-partitions"; diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts index e2d9439397f7..5b3c42e9f0e6 100644 --- a/arch/arm64/boot/dts/apple/t8103-j293.dts +++ b/arch/arm64/boot/dts/apple/t8103-j293.dts @@ -100,6 +100,8 @@ &displaydfr_mipi { status = "okay"; + #address-cells = <1>; + #size-cells = <0>; dfr_panel: panel@0 { compatible = "apple,j293-summit", "apple,summit"; diff --git a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi index 8e82231acab5..0c8206156bfe 100644 --- a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi +++ b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi @@ -71,7 +71,7 @@ */ &port00 { bus-range = <1 1>; - wifi0: network@0,0 { + wifi0: wifi@0,0 { compatible = "pci14e4,4425"; reg = <0x10000 0x0 0x0 0x0 0x0>; /* To be filled by the loader */ diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 20faf0c0d809..3a204845b85b 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -405,8 +405,6 @@ compatible = "apple,t8103-display-pipe-mipi", "apple,h7-display-pipe-mipi"; reg = <0x2 0x28600000 0x0 0x100000>; power-domains = <&ps_mipi_dsi>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts index be86d34c6696..fb8ad7d4c65a 100644 --- a/arch/arm64/boot/dts/apple/t8112-j493.dts +++ b/arch/arm64/boot/dts/apple/t8112-j493.dts @@ -63,6 +63,8 @@ &displaydfr_mipi { status = "okay"; + #address-cells = <1>; + #size-cells = <0>; dfr_panel: panel@0 { compatible = "apple,j493-summit", "apple,summit"; diff --git a/arch/arm64/boot/dts/apple/t8112.dtsi b/arch/arm64/boot/dts/apple/t8112.dtsi index e95711d8337f..f68354194355 100644 --- a/arch/arm64/boot/dts/apple/t8112.dtsi +++ b/arch/arm64/boot/dts/apple/t8112.dtsi @@ -420,8 +420,6 @@ compatible = "apple,t8112-display-pipe-mipi", "apple,h7-display-pipe-mipi"; reg = <0x2 0x28600000 0x0 0x100000>; power-domains = <&ps_mipi_dsi>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 897fc686e6a9..7e04a2905ce4 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1573,6 +1573,7 @@ CONFIG_RESET_QCOM_AOSS=y CONFIG_RESET_QCOM_PDC=m CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_RESET_TI_SCI=y +CONFIG_PHY_SNPS_EUSB2=m CONFIG_PHY_XGENE=y CONFIG_PHY_CAN_TRANSCEIVER=m CONFIG_PHY_NXP_PTN3222=m @@ -1597,7 +1598,6 @@ CONFIG_PHY_QCOM_EDP=m CONFIG_PHY_QCOM_PCIE2=m CONFIG_PHY_QCOM_QMP=m CONFIG_PHY_QCOM_QUSB2=m -CONFIG_PHY_QCOM_SNPS_EUSB2=m CONFIG_PHY_QCOM_EUSB2_REPEATER=m CONFIG_PHY_QCOM_M31_USB=m CONFIG_PHY_QCOM_USB_HS=m diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ba5df0df02a4..9f38340d24c2 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -287,17 +287,6 @@ .Lskip_fgt2_\@: .endm -.macro __init_el2_gcs - mrs_s x1, SYS_ID_AA64PFR1_EL1 - ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lskip_gcs_\@ - - /* Ensure GCS is not enabled when we start trying to do BLs */ - msr_s SYS_GCSCR_EL1, xzr - msr_s SYS_GCSCRE0_EL1, xzr -.Lskip_gcs_\@: -.endm - /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as @@ -319,7 +308,6 @@ __init_el2_cptr __init_el2_fgt __init_el2_fgt2 - __init_el2_gcs .endm #ifndef __KVM_NVHE_HYPERVISOR__ @@ -371,6 +359,13 @@ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 .Lskip_mpam_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2 + +.Linit_gcs_\@: + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr + +.Lskip_gcs_\@: check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 .Linit_sve_\@: /* SVE register access */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d27079968341..3e41a880b062 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1480,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2920b0a51403..a2faf0049dab 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ syscall.o proton-pack.o idle.o patching.o pi/ \ - rsi.o + rsi.o jump_label.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o @@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b34044e20128..e151585c6cca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3135,6 +3135,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope) } #endif +#ifdef CONFIG_ARM64_SME +static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope) +{ + return system_supports_sme() && has_user_cpuid_feature(cap, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL), HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES), @@ -3223,31 +3230,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), - HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), - HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), - HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), #endif /* CONFIG_ARM64_SME */ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3857fd7ee8d4..62230d6dd919 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -15,6 +15,7 @@ #include <asm/efi.h> #include <asm/stacktrace.h> +#include <asm/vmap_stack.h> static bool region_is_misaligned(const efi_memory_desc_t *md) { @@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, - NUMA_NO_NODE, &&l); -l: if (!p) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); + if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return -ENOMEM; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5954cec19660..08b7042a2e2d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -673,6 +673,11 @@ static void permission_overlay_switch(struct task_struct *next) current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); if (current->thread.por_el0 != next->thread.por_el0) { write_sysreg_s(next->thread.por_el0, SYS_POR_EL0); + /* + * No ISB required as we can tolerate spurious Overlay faults - + * the fault handler will check again based on the new value + * of POR_EL0. + */ } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b3f6b56e733..21a795303568 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1143,7 +1143,7 @@ static inline unsigned int num_other_online_cpus(void) void smp_send_stop(void) { static unsigned long stop_in_progress; - cpumask_t mask; + static cpumask_t mask; unsigned long timeout; /* diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 38a91bb5d4c7..23dd3f3fc3eb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - ret = kvm_arch_vcpu_run_map_fp(vcpu); - if (ret) - return ret; - if (likely(vcpu_has_run_once(vcpu))) return 0; @@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard) static void cpu_hyp_uninit(void *discard) { - if (__this_cpu_read(kvm_hyp_initialized)) { + if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reset(); __this_cpu_write(kvm_hyp_initialized, 0); } @@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { + if (per_cpu(kvm_hyp_initialized, cpu)) + continue; + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); - free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + + if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) + continue; if (free_sve) { struct cpu_sve_state *sve_state; @@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void) sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); } + + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8f6c8f57c6b9..15e17aca1dec 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -15,32 +15,6 @@ #include <asm/sysreg.h> /* - * Called on entry to KVM_RUN unless this vcpu previously ran at least - * once and the most recent prior KVM_RUN for this vcpu was called from - * the same task as current (highly likely). - * - * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu), - * such that on entering hyp the relevant parts of current are already - * mapped. - */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) -{ - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - int ret; - - /* pKVM has its own tracking of the host fpsimd state. */ - if (is_protected_kvm_enabled()) - return 0; - - /* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); - if (ret) - return ret; - - return 0; -} - -/* * Prepare vcpu for saving the host's FPSIMD state and loading the guest's. * The actual loading is done by the FPSIMD access trap taken to hyp. * diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 95d7534c9679..8957734d6183 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; + u64 granule; s8 level; int ret; @@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return -EPERM; } - do { - u64 granule = kvm_granule_size(level); + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { + if (!kvm_level_supports_block_mapping(level)) + continue; + granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - level++; - } while ((level <= KVM_PGTABLE_LAST_LEVEL) && - !(kvm_level_supports_block_mapping(level) && - range_included(&cur, range))); + if (!range_included(&cur, range)) + continue; + *range = cur; + return 0; + } - *range = cur; + WARN_ON(1); - return 0; + return -EINVAL; } int host_stage2_idmap_locked(phys_addr_t addr, u64 size, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 5b191f4dc566..dc1d26559bfa 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) } } +#define has_tgran_2(__r, __sz) \ + ({ \ + u64 _s1, _s2, _mmfr0 = __r; \ + \ + _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz##_2, _mmfr0); \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ + _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ + (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ + }) /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) */ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) { + u64 orig_val = val; + switch (reg) { case SYS_ID_AA64ISAR0_EL1: /* Support everything but TME */ @@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) */ switch (PAGE_SIZE) { case SZ_4K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); + if (has_tgran_2(orig_val, 4)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); fallthrough; case SZ_16K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); + if (has_tgran_2(orig_val, 16)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); fallthrough; case SZ_64K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); + if (has_tgran_2(orig_val, 64)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); break; } diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index a50fb7e6841f..679aafe77de2 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -401,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) { bool level; - level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; - if (level) - level &= vgic_v3_get_misr(vcpu); + level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu); kvm_vgic_inject_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.mi_intid, level, vcpu); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..11eb8d1adc84 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr, } } -static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, - unsigned int mm_flags) +static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags) { - unsigned long iss2 = ESR_ELx_ISS2(esr); - if (!system_supports_poe()) return false; - if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay)) - return true; - + /* + * We do not check whether an Overlay fault has occurred because we + * cannot make a decision based solely on its value: + * + * - If Overlay is set, a fault did occur due to POE, but it may be + * spurious in those cases where we update POR_EL0 without ISB (e.g. + * on context-switch). We would then need to manually check POR_EL0 + * against vma_pkey(vma), which is exactly what + * arch_vma_access_permitted() does. + * + * - If Overlay is not set, we may still need to report a pkey fault. + * This is the case if an access was made within a mapping but with no + * page mapped, and POR_EL0 forbids the access (according to + * vma_pkey()). Such access will result in a SIGSEGV regardless + * because core code checks arch_vma_access_permitted(), but in order + * to report the correct error code - SEGV_PKUERR - we must handle + * that case here. + */ return !arch_vma_access_permitted(vma, mm_flags & FAULT_FLAG_WRITE, mm_flags & FAULT_FLAG_INSTRUCTION, @@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); vma_end_read(vma); fault = 0; @@ -679,7 +691,7 @@ retry: goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); mmap_read_unlock(mm); fault = 0; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 80d470aa469d..54dccfd6aa11 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -518,7 +518,6 @@ alternative_else_nop_endif msr REG_PIR_EL1, x0 orr tcr2, tcr2, TCR2_EL1_PIE - msr REG_TCR2_EL1, x0 .Lskip_indirection: diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index fe198b473f84..e739dbc6329d 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -18,12 +18,12 @@ /* * This gives the physical RAM offset. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef PHYS_OFFSET #define PHYS_OFFSET _UL(0) #endif extern unsigned long vm_map_base; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifndef IO_BASE #define IO_BASE CSR_DMW0_BASE @@ -66,7 +66,7 @@ extern unsigned long vm_map_base; #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ATYPE_ #define _ATYPE32_ #define _ATYPE64_ @@ -85,7 +85,7 @@ extern unsigned long vm_map_base; /* * 32/64-bit LoongArch address spaces */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ACAST32_ #define _ACAST64_ #else diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h index ff3d10ac393f..7dc29bd9b2f0 100644 --- a/arch/loongarch/include/asm/alternative-asm.h +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -2,7 +2,7 @@ #ifndef _ASM_ALTERNATIVE_ASM_H #define _ASM_ALTERNATIVE_ASM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/asm.h> @@ -77,6 +77,6 @@ .previous .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h index cee7b29785ab..b5bae21fb3c8 100644 --- a/arch/loongarch/include/asm/alternative.h +++ b/arch/loongarch/include/asm/alternative.h @@ -2,7 +2,7 @@ #ifndef _ASM_ALTERNATIVE_H #define _ASM_ALTERNATIVE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/stddef.h> @@ -106,6 +106,6 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index df05005f2b80..d60bdf2e6377 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -7,7 +7,7 @@ #define EX_TYPE_UACCESS_ERR_ZERO 2 #define EX_TYPE_BPF 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ @@ -22,7 +22,7 @@ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #include <linux/bits.h> #include <linux/stringify.h> @@ -60,6 +60,6 @@ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h index f591b3245def..f018d26fc995 100644 --- a/arch/loongarch/include/asm/asm.h +++ b/arch/loongarch/include/asm/asm.h @@ -110,7 +110,7 @@ #define LONG_SRA srai.w #define LONG_SRAV sra.w -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .word #endif #define LONGSIZE 4 @@ -131,7 +131,7 @@ #define LONG_SRA srai.d #define LONG_SRAV sra.d -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .dword #endif #define LONGSIZE 8 @@ -158,7 +158,7 @@ #define PTR_SCALESHIFT 2 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define PTR .word #endif #define PTRSIZE 4 @@ -181,7 +181,7 @@ #define PTR_SCALESHIFT 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define PTR .dword #endif #define PTRSIZE 8 diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 98cf4d7b4b0a..dfb982fe8701 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -46,7 +46,7 @@ #define PRID_PRODUCT_MASK 0x0fff -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) enum cpu_type_enum { CPU_UNKNOWN, @@ -55,7 +55,7 @@ enum cpu_type_enum { CPU_LAST }; -#endif /* !__ASSEMBLY */ +#endif /* !__ASSEMBLER__ */ /* * ISA Level encodings diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 6e0a99763a9a..f4caaf764f9e 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -14,7 +14,7 @@ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef CONFIG_DYNAMIC_FTRACE @@ -84,7 +84,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h index 996038da806d..af95b941f48b 100644 --- a/arch/loongarch/include/asm/gpr-num.h +++ b/arch/loongarch/include/asm/gpr-num.h @@ -2,7 +2,7 @@ #ifndef __ASM_GPR_NUM_H #define __ASM_GPR_NUM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .equ .L__gpr_num_zero, 0 .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 @@ -25,7 +25,7 @@ .equ .L__gpr_num_$s\num, 23 + \num .endr -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __DEFINE_ASM_GPR_NUMS \ " .equ .L__gpr_num_zero, 0\n" \ @@ -47,6 +47,6 @@ " .equ .L__gpr_num_$s\\num, 23 + \\num\n" \ " .endr\n" \ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_GPR_NUM_H */ diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h index 003172b8406b..620163628a7f 100644 --- a/arch/loongarch/include/asm/irqflags.h +++ b/arch/loongarch/include/asm/irqflags.h @@ -5,7 +5,7 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/stringify.h> @@ -80,6 +80,6 @@ static inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } -#endif /* #ifndef __ASSEMBLY__ */ +#endif /* #ifndef __ASSEMBLER__ */ #endif /* _ASM_IRQFLAGS_H */ diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h index 8a924bd69d19..4000c7603d8e 100644 --- a/arch/loongarch/include/asm/jump_label.h +++ b/arch/loongarch/include/asm/jump_label.h @@ -7,7 +7,7 @@ #ifndef __ASM_JUMP_LABEL_H #define __ASM_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -50,5 +50,5 @@ l_yes: return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index 7f52bd31b9d4..62f139a9c87d 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/linkage.h> #include <linux/mmzone.h> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index d84dac88a584..a0994d226eff 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -9,15 +9,15 @@ #include <linux/linkage.h> #include <linux/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <larchintrin.h> /* CPUCFG */ #define read_cpucfg(reg) __cpucfg(reg) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* LoongArch Registers */ #define REG_ZERO 0x0 @@ -53,7 +53,7 @@ #define REG_S7 0x1e #define REG_S8 0x1f -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Bit fields for CPUCFG registers */ #define LOONGARCH_CPUCFG0 0x0 @@ -171,7 +171,7 @@ * SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* CSR */ #define csr_read32(reg) __csrrd_w(reg) @@ -187,7 +187,7 @@ #define iocsr_write32(val, reg) __iocsrwr_w(val, reg) #define iocsr_write64(val, reg) __iocsrwr_d(val, reg) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* CSR register number */ @@ -1195,7 +1195,7 @@ #define LOONGARCH_IOCSR_EXTIOI_ROUTE_BASE 0x1c00 #define IOCSR_EXTIOI_VECTOR_NUM 256 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static __always_inline u64 drdtime(void) { @@ -1357,7 +1357,7 @@ __BUILD_CSR_OP(tlbidx) #define clear_csr_estat(val) \ csr_xchg32(~(val), val, LOONGARCH_CSR_ESTAT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Generic EntryLo bit definitions */ #define ENTRYLO_V (_ULCAST_(1) << 0) diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h index caf1f71a1057..d5fa98d1d177 100644 --- a/arch/loongarch/include/asm/orc_types.h +++ b/arch/loongarch/include/asm/orc_types.h @@ -34,7 +34,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -53,6 +53,6 @@ struct orc_entry { unsigned int type:3; unsigned int signal:1; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 7368f12b7cb1..a3aaf34fba16 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -15,7 +15,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/kernel.h> #include <linux/pfn.h> @@ -110,6 +110,6 @@ extern int __virt_addr_valid(volatile void *kaddr); #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PAGE_H */ diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 45bfc65a0c9f..7bbfb04a54cc 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -92,7 +92,7 @@ #define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ _PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC) @@ -127,6 +127,6 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PGTABLE_BITS_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index b30185302c07..f2aeff544cee 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -55,7 +55,7 @@ #define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm_types.h> #include <linux/mmzone.h> @@ -618,6 +618,6 @@ static inline long pmd_protnone(pmd_t pmd) #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PGTABLE_H */ diff --git a/arch/loongarch/include/asm/prefetch.h b/arch/loongarch/include/asm/prefetch.h index 1672262a5e2e..0b168cdaae9a 100644 --- a/arch/loongarch/include/asm/prefetch.h +++ b/arch/loongarch/include/asm/prefetch.h @@ -8,7 +8,7 @@ #define Pref_Load 0 #define Pref_Store 8 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro __pref hint addr #ifdef CONFIG_CPU_HAS_PREFETCH diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index ad0bd234a0f1..3a47f52959a8 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -39,7 +39,7 @@ int loongson_cpu_disable(void); void loongson_cpu_die(unsigned int cpu); #endif -static inline void plat_smp_setup(void) +static inline void __init plat_smp_setup(void) { loongson_smp_setup(); } diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 4f5a9441754e..9dfa2ef00816 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -10,7 +10,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> @@ -53,7 +53,7 @@ static inline struct thread_info *current_thread_info(void) register unsigned long current_stack_pointer __asm__("$sp"); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* thread information allocation */ #define THREAD_SIZE SZ_16K diff --git a/arch/loongarch/include/asm/types.h b/arch/loongarch/include/asm/types.h index baf15a0dcf8b..0edd731f3d6a 100644 --- a/arch/loongarch/include/asm/types.h +++ b/arch/loongarch/include/asm/types.h @@ -8,7 +8,7 @@ #include <asm-generic/int-ll64.h> #include <uapi/asm/types.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ULCAST_ #define _U64CAST_ #else diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h index 2c68bc72736c..16c7f7e465a0 100644 --- a/arch/loongarch/include/asm/unwind_hints.h +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -5,7 +5,7 @@ #include <linux/objtool.h> #include <asm/orc_types.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro UNWIND_HINT_UNDEFINED UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED @@ -23,7 +23,7 @@ UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL .endm -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #define UNWIND_HINT_SAVE \ UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) @@ -31,6 +31,6 @@ #define UNWIND_HINT_RESTORE \ UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */ diff --git a/arch/loongarch/include/asm/vdso/arch_data.h b/arch/loongarch/include/asm/vdso/arch_data.h index 322d0a5f1c84..395ec223bcbe 100644 --- a/arch/loongarch/include/asm/vdso/arch_data.h +++ b/arch/loongarch/include/asm/vdso/arch_data.h @@ -7,7 +7,7 @@ #ifndef _VDSO_ARCH_DATA_H #define _VDSO_ARCH_DATA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/vdso.h> @@ -20,6 +20,6 @@ struct vdso_arch_data { struct vdso_pcpu_data pdata[NR_CPUS]; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/loongarch/include/asm/vdso/getrandom.h b/arch/loongarch/include/asm/vdso/getrandom.h index a81724b69f29..2ff05003c6e7 100644 --- a/arch/loongarch/include/asm/vdso/getrandom.h +++ b/arch/loongarch/include/asm/vdso/getrandom.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vdso.h> @@ -28,6 +28,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns return ret; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index f15503e3336c..dcafabca9bb6 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -7,7 +7,7 @@ #ifndef __ASM_VDSO_GETTIMEOFDAY_H #define __ASM_VDSO_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vdso.h> @@ -89,6 +89,6 @@ static inline bool loongarch_vdso_hres_capable(void) } #define __arch_vdso_hres_capable loongarch_vdso_hres_capable -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h index ef5770b343a0..1e255373b0b8 100644 --- a/arch/loongarch/include/asm/vdso/processor.h +++ b/arch/loongarch/include/asm/vdso/processor.h @@ -5,10 +5,10 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define cpu_relax() barrier() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index 50c65fb29daf..04bd2d452876 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -7,7 +7,7 @@ #ifndef _ASM_VDSO_VDSO_H #define _ASM_VDSO_VDSO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/page.h> @@ -16,6 +16,6 @@ #define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h index 1140b54b4bc8..558eb9dfda52 100644 --- a/arch/loongarch/include/asm/vdso/vsyscall.h +++ b/arch/loongarch/include/asm/vdso/vsyscall.h @@ -2,13 +2,13 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <vdso/datapage.h> /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index a54cd6fd3796..1367ca759468 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/acpi.h> #include <linux/efi-bgrt.h> +#include <linux/export.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/memblock.h> diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c index 4ad13847e962..0e0c766df1e3 100644 --- a/arch/loongarch/kernel/alternative.c +++ b/arch/loongarch/kernel/alternative.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/export.h> #include <linux/mm.h> #include <linux/module.h> #include <asm/alternative.h> diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index de21e72759ee..860a3bc030e0 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -144,6 +144,18 @@ void __init efi_init(void) if (efi_memmap_init_early(&data) < 0) panic("Unable to map EFI memory map.\n"); + /* + * Reserve the physical memory region occupied by the EFI + * memory map table (header + descriptors). This is crucial + * for kdump, as the kdump kernel relies on this original + * memmap passed by the bootloader. Without reservation, + * this region could be overwritten by the primary kernel. + * Also, set the EFI_PRESERVE_BS_REGIONS flag to indicate that + * critical boot services code/data regions like this are preserved. + */ + memblock_reserve((phys_addr_t)boot_memmap, sizeof(*tbl) + data.size); + set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags); + early_memunmap(tbl, sizeof(*tbl)); } diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c index 0fa81ced28dc..3d98c6aa00db 100644 --- a/arch/loongarch/kernel/elf.c +++ b/arch/loongarch/kernel/elf.c @@ -6,7 +6,6 @@ #include <linux/binfmts.h> #include <linux/elf.h> -#include <linux/export.h> #include <linux/sched.h> #include <asm/cpu-features.h> diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index 4c476904227f..141b49bd989c 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -4,6 +4,7 @@ */ #include <linux/cpu.h> +#include <linux/export.h> #include <linux/init.h> #include <asm/fpu.h> #include <asm/smp.h> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index e5a39bbad078..b1b51f920b23 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/export.h> #include <linux/types.h> #include <linux/interrupt.h> #include <linux/irq_work.h> diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index bc75a3a69fc8..367906b10f81 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -102,7 +102,7 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev return 0; } -static unsigned long __init get_loops_per_jiffy(void) +static unsigned long get_loops_per_jiffy(void) { unsigned long lpj = (unsigned long)const_clock_freq; diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 47fc2de6d150..3d9be6ca7ec5 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/kexec.h> #include <linux/module.h> +#include <linux/export.h> #include <linux/extable.h> #include <linux/mm.h> #include <linux/sched/mm.h> diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index 98379b7d4147..08d7951b2f60 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include <asm/unwind.h> +#include <linux/export.h> unsigned long unwind_get_return_address(struct unwind_state *state) { diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index d623935a7547..0005be49b056 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/objtool.h> +#include <linux/export.h> #include <linux/module.h> +#include <linux/objtool.h> #include <linux/sort.h> #include <asm/exception.h> #include <asm/orc_header.h> diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 929ae240280a..729e775bd40d 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include <linux/cpumask.h> +#include <linux/export.h> #include <linux/ftrace.h> #include <linux/kallsyms.h> diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index f39929d7bf8a..a75f865d6fb9 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -9,7 +9,8 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) { - int ipnum, cpu, irq_index, irq_mask, irq; + int ipnum, cpu, cpuid, irq_index, irq_mask, irq; + struct kvm_vcpu *vcpu; for (irq = 0; irq < EIOINTC_IRQS; irq++) { ipnum = s->ipmap.reg_u8[irq / 32]; @@ -20,7 +21,12 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) irq_index = irq / 32; irq_mask = BIT(irq & 0x1f); - cpu = s->coremap.reg_u8[irq]; + cpuid = s->coremap.reg_u8[irq]; + vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); + if (!vcpu) + continue; + + cpu = vcpu->vcpu_id; if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask)) set_bit(irq, s->sw_coreisr[cpu][ipnum]); else @@ -66,20 +72,25 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) } static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, - int irq, void *pvalue, u32 len, bool notify) + int irq, u64 val, u32 len, bool notify) { - int i, cpu; - u64 val = *(u64 *)pvalue; + int i, cpu, cpuid; + struct kvm_vcpu *vcpu; for (i = 0; i < len; i++) { - cpu = val & 0xff; + cpuid = val & 0xff; val = val >> 8; if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { - cpu = ffs(cpu) - 1; - cpu = (cpu >= 4) ? 0 : cpu; + cpuid = ffs(cpuid) - 1; + cpuid = (cpuid >= 4) ? 0 : cpuid; } + vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); + if (!vcpu) + continue; + + cpu = vcpu->vcpu_id; if (s->sw_coremap[irq + i] == cpu) continue; @@ -305,6 +316,11 @@ static int kvm_eiointc_read(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + vcpu->kvm->stat.eiointc_read_exits++; spin_lock_irqsave(&eiointc->lock, flags); switch (len) { @@ -398,7 +414,7 @@ static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq; s->coremap.reg_u8[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -436,17 +452,16 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, break; case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: index = (offset - EIOINTC_ENABLE_START) >> 1; - old_data = s->enable.reg_u32[index]; + old_data = s->enable.reg_u16[index]; s->enable.reg_u16[index] = data; /* * 1: enable irq. * update irq when isr is set. */ data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index]; - index = index << 1; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 1); } /* * 0: disable irq. @@ -455,7 +470,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -484,7 +499,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 1; s->coremap.reg_u16[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -529,10 +544,9 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, * update irq when isr is set. */ data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index]; - index = index << 2; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 1); } /* * 0: disable irq. @@ -541,7 +555,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -570,7 +584,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 2; s->coremap.reg_u32[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -615,10 +629,9 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, * update irq when isr is set. */ data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index]; - index = index << 3; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 1); } /* * 0: disable irq. @@ -627,7 +640,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -656,7 +669,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 3; s->coremap.reg_u64[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -679,6 +692,11 @@ static int kvm_eiointc_write(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + vcpu->kvm->stat.eiointc_write_exits++; spin_lock_irqsave(&eiointc->lock, flags); switch (len) { @@ -787,7 +805,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, int ret = 0; unsigned long flags; unsigned long type = (unsigned long)attr->attr; - u32 i, start_irq; + u32 i, start_irq, val; void __user *data; struct loongarch_eiointc *s = dev->kvm->arch.eiointc; @@ -795,8 +813,14 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: - if (copy_from_user(&s->num_cpu, data, 4)) + if (copy_from_user(&val, data, 4)) ret = -EFAULT; + else { + if (val >= EIOINTC_ROUTE_MAX_VCPUS) + ret = -EINVAL; + else + s->num_cpu = val; + } break; case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE: if (copy_from_user(&s->features, data, 4)) @@ -809,7 +833,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, for (i = 0; i < (EIOINTC_IRQS / 4); i++) { start_irq = i * 4; eiointc_update_sw_coremap(s, start_irq, - (void *)&s->coremap.reg_u32[i], sizeof(u32), false); + s->coremap.reg_u32[i], sizeof(u32), false); } break; default: @@ -824,7 +848,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, bool is_write) { - int addr, cpuid, offset, ret = 0; + int addr, cpu, offset, ret = 0; unsigned long flags; void *p = NULL; void __user *data; @@ -832,7 +856,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, s = dev->kvm->arch.eiointc; addr = attr->attr; - cpuid = addr >> 16; + cpu = addr >> 16; addr &= 0xffff; data = (void __user *)attr->addr; switch (addr) { @@ -857,8 +881,11 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, p = &s->isr.reg_u32[offset]; break; case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + if (cpu >= s->num_cpu) + return -EINVAL; + offset = (addr - EIOINTC_COREISR_START) / 4; - p = &s->coreisr.reg_u32[cpuid][offset]; + p = &s->coreisr.reg_u32[cpu][offset]; break; case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: offset = (addr - EIOINTC_COREMAP_START) / 4; @@ -899,9 +926,15 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev, data = (void __user *)attr->addr; switch (addr) { case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU: + if (is_write) + return ret; + p = &s->num_cpu; break; case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE: + if (is_write) + return ret; + p = &s->features; break; case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE: diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c index b37cd8537b45..db22c2ec55e2 100644 --- a/arch/loongarch/lib/crc32-loongarch.c +++ b/arch/loongarch/lib/crc32-loongarch.c @@ -11,6 +11,7 @@ #include <asm/cpu-features.h> #include <linux/crc32.h> +#include <linux/export.h> #include <linux/module.h> #include <linux/unaligned.h> diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c index df309ae4045d..bcc9d01d8c41 100644 --- a/arch/loongarch/lib/csum.c +++ b/arch/loongarch/lib/csum.c @@ -2,6 +2,7 @@ // Copyright (C) 2019-2020 Arm Ltd. #include <linux/compiler.h> +#include <linux/export.h> #include <linux/kasan-checks.h> #include <linux/kernel.h> diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index 70ca73019811..df949a3d0f34 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -16,12 +16,12 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } -void *early_memremap_ro(resource_size_t phys_addr, unsigned long size) +void * __init early_memremap_ro(resource_size_t phys_addr, unsigned long size) { return early_memremap(phys_addr, size); } -void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, +void * __init early_memremap_prot(resource_size_t phys_addr, unsigned long size, unsigned long prot_val) { return early_memremap(phys_addr, size); diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index 2726639150bc..5bc9627a6cf9 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -3,7 +3,6 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <linux/kernel.h> -#include <linux/export.h> #include <linux/init.h> #include <linux/acpi.h> #include <linux/types.h> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 31ac655b7837..72fb1b006da9 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -163,6 +163,9 @@ #define SO_PASSRIGHTS 83 +#define SO_INQ 84 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 1f2d5b7a7f5d..c16ec36dfee6 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -144,6 +144,9 @@ #define SO_PASSRIGHTS 0x4051 +#define SO_INQ 0x4052 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 36061f4732b7..d71ea0f4466f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -63,7 +63,8 @@ config RISCV select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_CFI_CLANG + # clang >= 17: https://github.com/llvm/llvm-project/commit/62fa708ceb027713b386c7e0efda994f8bdc27e2 + select ARCH_SUPPORTS_CFI_CLANG if CLANG_VERSION >= 170000 select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 438ce7df24c3..5bd5aae60d53 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -1075,7 +1075,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) */ #ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) -#define TASK_SIZE_MAX LONG_MAX #ifdef CONFIG_COMPAT #define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h index 451fd76b8811..d766e2b9e6df 100644 --- a/arch/riscv/include/asm/runtime-const.h +++ b/arch/riscv/include/asm/runtime-const.h @@ -206,7 +206,7 @@ static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, u addi_insn_mask &= 0x07fff; } - if (lower_immediate & 0x00000fff) { + if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) { /* replace upper 12 bits of addi with lower 12 bits of val */ addi_insn &= addi_insn_mask; addi_insn |= (lower_immediate & 0x00000fff) << 20; diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index d472da4450e6..525e50db24f7 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -127,6 +127,7 @@ do { \ #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define __get_user_8(x, ptr, label) \ +do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u32 __lo, __hi; \ asm_goto_output( \ @@ -141,7 +142,7 @@ do { \ : : label); \ (x) = (__typeof__(x))((__typeof__((x) - (x)))( \ (((u64)__hi << 32) | __lo))); \ - +} while (0) #else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ #define __get_user_8(x, ptr, label) \ do { \ diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h index 8dc92441702a..c6d66895c1f5 100644 --- a/arch/riscv/include/asm/vdso/getrandom.h +++ b/arch/riscv/include/asm/vdso/getrandom.h @@ -18,7 +18,7 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns register unsigned int flags asm("a2") = _flags; asm volatile ("ecall\n" - : "+r" (ret) + : "=r" (ret) : "r" (nr), "r" (buffer), "r" (len), "r" (flags) : "memory"); diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 45c9b426fcc5..b61786d43c20 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -205,11 +205,11 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, THEAD_VSETVLI_T4X0E8M8D1 THEAD_VSB_V_V0T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V8T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V16T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( @@ -241,11 +241,11 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ THEAD_VSETVLI_T4X0E8M8D1 THEAD_VLB_V_V0T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V8T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V16T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index e6fbaaf54956..87d655944803 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -18,10 +18,10 @@ const struct cpu_operations cpu_ops_sbi; /* * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can - * be invoked from multiple threads in parallel. Define a per cpu data + * be invoked from multiple threads in parallel. Define an array of boot data * to handle that. */ -static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); +static struct sbi_hart_boot_data boot_data[NR_CPUS]; static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) @@ -67,7 +67,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) unsigned long boot_addr = __pa_symbol(secondary_start_sbi); unsigned long hartid = cpuid_to_hartid_map(cpuid); unsigned long hsm_data; - struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid); + struct sbi_hart_boot_data *bdata = &boot_data[cpuid]; /* Make sure tidle is updated */ smp_mb(); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f7c9a1caa83e..14888e5ea19a 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -50,6 +50,7 @@ atomic_t hart_lottery __section(".sdata") #endif ; unsigned long boot_cpu_hartid; +EXPORT_SYMBOL_GPL(boot_cpu_hartid); /* * Place kernel memory regions on the resource tree so that diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index dd8e4af6583f..93043924fe6c 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -454,7 +454,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) val.data_u64 = 0; if (user_mode(regs)) { - if (copy_from_user_nofault(&val, (u8 __user *)addr, len)) + if (copy_from_user(&val, (u8 __user *)addr, len)) return -1; } else { memcpy(&val, (u8 *)addr, len); @@ -555,7 +555,7 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) return -EOPNOTSUPP; if (user_mode(regs)) { - if (copy_to_user_nofault((u8 __user *)addr, &val, len)) + if (copy_to_user((u8 __user *)addr, &val, len)) return -1; } else { memcpy((u8 *)addr, &val, len); diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 7c15b0f4ee3b..c29ef12a63bb 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -30,7 +30,7 @@ SECTIONS *(.data .data.* .gnu.linkonce.d.*) *(.dynbss) *(.bss .bss.* .gnu.linkonce.b.*) - } + } :text .note : { *(.note.*) } :text :note diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c index 1411337dc1e6..8fcf67e8c07f 100644 --- a/arch/riscv/kernel/vendor_extensions/sifive.c +++ b/arch/riscv/kernel/vendor_extensions/sifive.c @@ -8,7 +8,7 @@ #include <linux/types.h> /* All SiFive vendor extensions supported in Linux */ -const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF), __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ), __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD), diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index d229cbd2ba22..9b0d55be1239 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } @@ -60,6 +61,7 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 33711a29618c..6cbbf5e8555f 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -32,6 +32,7 @@ static int sha512_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -57,6 +58,7 @@ static int sha512_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_512; + sctx->first_message_part = 0; return 0; } @@ -97,6 +99,7 @@ static int sha384_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62c0ab4a4b9d..0905fa99a31e 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -265,7 +265,7 @@ static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *r addr = kernel_stack_pointer(regs) + n * sizeof(long); if (!regs_within_kernel_stack(regs, addr)) return 0; - return READ_ONCE_NOCHECK(addr); + return READ_ONCE_NOCHECK(*(unsigned long *)addr); } /** diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 2fbee3887d13..d930416d4c90 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_RECOVERED: case PCI_ERS_RESULT_NEED_RESET: + case PCI_ERS_RESULT_NONE: return false; default: return true; @@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver) return false; if (!driver->err_handler->error_detected) return false; - if (!driver->err_handler->slot_reset) - return false; - if (!driver->err_handler->resume) - return false; return true; } @@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int rc; + /* The underlying device may have been disabled by the event */ + if (!zdev_enabled(zdev)) + return PCI_ERS_RESULT_NEED_RESET; + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); rc = zpci_reset_load_store_blocked(zdev); if (rc) { @@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } - if (driver->err_handler->mmio_enabled) { + if (driver->err_handler->mmio_enabled) ers_res = driver->err_handler->mmio_enabled(pdev); - if (ers_result_indicates_abort(ers_res)) { - pr_info("%s: Automatic recovery failed after MMIO re-enable\n", - pci_name(pdev)); - return ers_res; - } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { - pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); - return ers_res; - } + else + ers_res = PCI_ERS_RESULT_NONE; + + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; } pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); @@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, return ers_res; } pdev->error_state = pci_channel_io_normal; - ers_res = driver->err_handler->slot_reset(pdev); + + if (driver->err_handler->slot_reset) + ers_res = driver->err_handler->slot_reset(pdev); + else + ers_res = PCI_ERS_RESULT_NONE; + if (ers_result_indicates_abort(ers_res)) { pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); return ers_res; @@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) goto out_unlock; } - if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + if (ers_res != PCI_ERS_RESULT_NEED_RESET) { ers_res = zpci_event_do_error_state_clear(pdev, driver); if (ers_result_indicates_abort(ers_res)) { status_str = "failed (abort on MMIO enable)"; @@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (ers_res == PCI_ERS_RESULT_NEED_RESET) ers_res = zpci_event_do_reset(pdev, driver); + /* + * ers_res can be PCI_ERS_RESULT_NONE either because the driver + * decided to return it, indicating that it abstains from voting + * on how to recover, or because it didn't implement the callback. + * Both cases assume, that if there is nothing else causing a + * disconnect, we recovered successfully. + */ + if (ers_res == PCI_ERS_RESULT_NONE) + ers_res = PCI_ERS_RESULT_RECOVERED; + if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); @@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; pci_ers_result_t ers_res; + u32 fh = 0; + int rc; zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); @@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (zdev) { mutex_lock(&zdev->state_lock); + rc = clp_refresh_fh(zdev->fid, &fh); + if (rc) + goto no_pdev; + if (!fh || ccdf->fh != fh) { + /* Ignore events with stale handles */ + zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", + ccdf->fid, fh, ccdf->fh); + goto no_pdev; + } zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index adcba7329386..71befa109e1c 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -145,6 +145,9 @@ #define SO_PASSRIGHTS 0x005c +#define SO_INQ 0x005d +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 71019b3b54ea..5b3362af7d65 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2695,6 +2695,15 @@ config MITIGATION_ITS disabled, mitigation cannot be enabled via cmdline. See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst> +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b6db4e0b936b..7543a8b52c67 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = { */ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; -static u64 snp_tsc_freq_khz __ro_after_init; +static unsigned long snp_tsc_freq_khz __ro_after_init; DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); @@ -2167,15 +2167,31 @@ static unsigned long securetsc_get_tsc_khz(void) void __init snp_secure_tsc_init(void) { - unsigned long long tsc_freq_mhz; + struct snp_secrets_page *secrets; + unsigned long tsc_freq_mhz; + void *mem; if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return; + mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); + if (!mem) { + pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + secrets = (__force struct snp_secrets_page *)mem; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); - snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + + /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ + tsc_freq_mhz &= GENMASK_ULL(17, 0); + + snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor); x86_platform.calibrate_cpu = securetsc_get_tsc_khz; x86_platform.calibrate_tsc = securetsc_get_tsc_khz; + + early_memunmap(mem, PAGE_SIZE); } diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 175958b02f2b..8e9a0cc20a4a 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -36,20 +36,20 @@ EXPORT_SYMBOL_GPL(write_ibpb); /* * Define the VERW operand that is disguised as entry code so that - * it can be referenced with KPTI enabled. This ensure VERW can be + * it can be referenced with KPTI enabled. This ensures VERW can be * used late in exit-to-user path after page tables are switched. */ .pushsection .entry.text, "ax" .align L1_CACHE_BYTES, 0xcc -SYM_CODE_START_NOALIGN(mds_verw_sel) +SYM_CODE_START_NOALIGN(x86_verw_sel) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc -SYM_CODE_END(mds_verw_sel); +SYM_CODE_END(x86_verw_sel); /* For KVM */ -EXPORT_SYMBOL_GPL(mds_verw_sel); +EXPORT_SYMBOL_GPL(x86_verw_sel); .popsection diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ee176236c2be..286d509f9363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,6 +456,7 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ @@ -487,6 +488,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +546,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 363110e6b2e3..a2c1f2d24b64 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -9,6 +9,14 @@ #include <asm/cpufeature.h> #include <asm/msr.h> +/* + * Define bits that are always set to 1 in DR7, only bit 10 is + * architecturally reserved to '1'. + * + * This is also the init/reset value for DR7. + */ +#define DR7_FIXED_1 0x00000400 + DECLARE_PER_CPU(unsigned long, cpu_dr7); #ifndef CONFIG_PARAVIRT_XXL @@ -100,8 +108,8 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value) static inline void hw_breakpoint_disable(void) { - /* Zero the control register for HW Breakpoint */ - set_debugreg(0UL, 7); + /* Reset the control register for HW Breakpoint */ + set_debugreg(DR7_FIXED_1, 7); /* Zero-out the individual HW breakpoint address registers */ set_debugreg(0UL, 0); @@ -125,9 +133,12 @@ static __always_inline unsigned long local_db_save(void) return 0; get_debugreg(dr7, 7); - dr7 &= ~0x400; /* architecturally set bit */ + + /* Architecturally set bit */ + dr7 &= ~DR7_FIXED_1; if (dr7) - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); + /* * Ensure the compiler doesn't lower the above statements into * the critical section; disabling breakpoints late would not diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9a9b21b78905..b30e5474c18e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void) static __always_inline void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static __always_inline void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4a391929cdb..f7af967aa16f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -31,6 +31,7 @@ #include <asm/apic.h> #include <asm/pvclock-abi.h> +#include <asm/debugreg.h> #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/msr-index.h> @@ -249,7 +250,6 @@ enum x86_intercept_stage; #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) -#define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff2bff #define KVM_GUESTDBG_VALID_MASK \ @@ -700,8 +700,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -764,6 +769,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index dd2b129b0418..6ca6516c7492 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) static __always_inline void __mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); - /* * Use the instruction mnemonic with implicit operands, as the LLVM * assembler fails to assemble the mnemonic with explicit operands: @@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx) */ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx" */ asm volatile(".byte 0x0f, 0x01, 0xfb" @@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) */ static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } @@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx) */ static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - if (ecx & 1) { - __mwait(eax, ecx); - } else { - __sti_mwait(eax, ecx); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + if (ecx & 1) { + __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); } } + +out: current_clr_polling(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d754b98f3f..10f261678749 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -302,25 +302,31 @@ .endm /* - * Macro to execute VERW instruction that mitigate transient data sampling - * attacks such as MDS. On affected systems a microcode update overloaded VERW - * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. - * + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS +.macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 - ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP @@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -extern u16 mds_verw_sel; +extern u16 x86_verw_sel; #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ -static __always_inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 58e028d42e41..a631f7d7c0c0 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,18 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed; +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -282,8 +294,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3]; + /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed; struct snp_msg_desc { diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index d8525e6ef50a..8bc074c8d7c6 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -72,6 +72,7 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL /* * TDG.VP.VMCALL Status Codes (returned in R10) diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h index 0007ba077c0c..41da492dfb01 100644 --- a/arch/x86/include/uapi/asm/debugreg.h +++ b/arch/x86/include/uapi/asm/debugreg.h @@ -15,7 +15,26 @@ which debugging register was responsible for the trap. The other bits are either reserved or not of interest to us. */ -/* Define reserved bits in DR6 which are always set to 1 */ +/* + * Define bits in DR6 which are set to 1 by default. + * + * This is also the DR6 architectural value following Power-up, Reset or INIT. + * + * Note, with the introduction of Bus Lock Detection (BLD) and Restricted + * Transactional Memory (RTM), the DR6 register has been modified: + * + * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports + * Bus Lock Detection. The assertion of a bus lock could clear it. + * + * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports + * restricted transactional memory. #DB occurred inside an RTM region + * could clear it. + * + * Apparently, DR6.BLD and DR6.RTM are active low bits. + * + * As a result, DR6_RESERVED is an incorrect name now, but it is kept for + * compatibility. + */ #define DR6_RESERVED (0xFFFF0FF0) #define DR_TRAP0 (0x1) /* db0 */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b2ad8d13211a..b750ac008b78 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include <linux/sched/clock.h> #include <linux/random.h> #include <linux/topology.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cacheinfo.h> @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + + tsa_init(c); + return; warn: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f94e6a5497d..f4d3abb12317 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -169,9 +171,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* * Controls whether l1d flush based mitigations are enabled, @@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -637,7 +641,7 @@ static void __init mmio_apply_mitigation(void) * is required irrespective of SMT state. */ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); if (mmio_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -1488,6 +1492,94 @@ static void __init its_apply_mitigation(void) } #undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) { + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + goto out; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) + tsa_mitigation = TSA_MITIGATION_FULL; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } +} + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = @@ -2249,10 +2341,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8feb8fd2957a..fb50c1dd53ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -2243,20 +2255,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); #endif #endif -/* - * Clear all 6 debug registers: - */ -static void clear_all_debug_regs(void) +static void initialize_debug_regs(void) { - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - - set_debugreg(0, i); - } + /* Control register first -- to make sure everything is disabled. */ + set_debugreg(DR7_FIXED_1, 7); + set_debugreg(DR6_RESERVED, 6); + /* dr5 and dr4 don't exist */ + set_debugreg(0, 3); + set_debugreg(0, 2); + set_debugreg(0, 1); + set_debugreg(0, 0); } #ifdef CONFIG_KGDB @@ -2417,7 +2425,7 @@ void cpu_init(void) load_mm_ldt(&init_mm); - clear_all_debug_regs(); + initialize_debug_regs(); dbg_restore_debug_regs(); doublefault_init_cpu_tss(); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9d852c3b2cb5..5c4eb28c3ac9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) struct thresh_restart { struct threshold_block *b; - int reset; int set_lvt_off; int lvt_off; u16 old_limit; @@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr) rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); + /* + * Reset error count and overflow bit. + * This is done during init or after handling an interrupt. + */ + if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) { + hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI); + hi |= THRESHOLD_MAX - tr->b->threshold_limit; } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); @@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol } bank_type = smca_get_bank_type(cpu, bank); - if (bank_type >= N_SMCA_BANK_TYPES) - return NULL; if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; - return NULL; + } + + if (b && b->block) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block); + return buf_mcatype; + } + + if (bank_type >= N_SMCA_BANK_TYPES) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank); + return buf_mcatype; } if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e9b3c5d4a52e..4da4eab56c81 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void) void (*mc_poll_banks)(void) = mc_poll_banks_default; +static bool should_enable_timer(unsigned long iv) +{ + return !mca_cfg.ignore_ce && iv; +} + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t) if (mce_get_storm_mode()) { __start_timer(t, HZ); - } else { + } else if (should_enable_timer(iv)) { __this_cpu_write(mce_next_interval, iv); __start_timer(t, iv); } @@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; - if (mca_cfg.ignore_ce || !iv) - return; - - this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (should_enable_timer(iv)) { + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } static void __mcheck_cpu_setup_timer(void) @@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { struct timer_list *t = this_cpu_ptr(&mce_timer); - int ret; mce_device_create(cpu); - - ret = mce_threshold_create_device(cpu); - if (ret) { - mce_device_remove(cpu); - return ret; - } + mce_threshold_create_device(cpu); mce_reenable_cpu(); mce_start_timer(t); return 0; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index efcf21e9552e..9b149b9c4109 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_clear(struct cpuinfo_x86 *c) { intel_clear_lmce(); + cmci_clear(); } bool intel_filter_mce(struct mce *m) diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c index 2a1655b1fdd8..1fd349cfc802 100644 --- a/arch/x86/kernel/cpu/microcode/amd_shas.c +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = { 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, } }, + { 0xa0011d7, { + 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b, + 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12, + 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2, + 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36, + } + }, { 0xa001223, { 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, @@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = { 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, } }, + { 0xa00123b, { + 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2, + 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3, + 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28, + 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72, + } + }, { 0xa00820c, { 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, @@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = { 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, } }, + { 0xa00820d, { + 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4, + 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca, + 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1, + 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7, + } + }, { 0xa10113e, { 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, @@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = { 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, } }, + { 0xa10114c, { + 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64, + 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74, + 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a, + 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0, + } + }, { 0xa10123e, { 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, @@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = { 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, } }, + { 0xa10124c, { + 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90, + 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46, + 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc, + 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2, + } + }, { 0xa108108, { 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, @@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = { 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, } }, + { 0xa108109, { + 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa, + 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b, + 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35, + 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59, + } + }, { 0xa20102d, { 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, @@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = { 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, } }, + { 0xa20102e, { + 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd, + 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6, + 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5, + 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe, + } + }, { 0xa201210, { 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, @@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = { 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, } }, + { 0xa201211, { + 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95, + 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27, + 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff, + 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27, + } + }, { 0xa404107, { 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, @@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = { 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, } }, + { 0xa404108, { + 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc, + 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb, + 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19, + 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00, + } + }, { 0xa500011, { 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, @@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = { 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, } }, + { 0xa500012, { + 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4, + 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16, + 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f, + 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63, + } + }, { 0xa601209, { 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, @@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = { 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, } }, + { 0xa60120a, { + 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d, + 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b, + 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d, + 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c, + } + }, { 0xa704107, { 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, @@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = { 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, } }, + { 0xa704108, { + 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93, + 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98, + 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49, + 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a, + } + }, { 0xa705206, { 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, @@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = { 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, } }, + { 0xa705208, { + 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19, + 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2, + 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11, + 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff, + } + }, { 0xa708007, { 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, @@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = { 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, } }, + { 0xa708008, { + 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46, + 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab, + 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16, + 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b, + } + }, { 0xa70c005, { 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, @@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = { 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, } }, + { 0xa70c008, { + 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21, + 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e, + 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66, + 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0, + } + }, { 0xaa00116, { 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, @@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = { 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, } }, + { 0xaa00216, { + 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5, + 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08, + 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2, + 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1, + } + }, }; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 102641fd2172..8b1a9733d13e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -385,7 +385,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) struct perf_event *bp; /* Disable hardware debugging while we are in kgdb: */ - set_debugreg(0UL, 7); + set_debugreg(DR7_FIXED_1, 7); for (i = 0; i < HBP_NUM; i++) { if (!breakinfo[i].enabled) continue; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 704883c21f3a..a838be04f874 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -907,16 +907,24 @@ static __init bool prefer_mwait_c1_over_halt(void) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - __sti_mwait(0, 0); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + __sti_mwait(0, 0); + raw_local_irq_disable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a10e180cbf23..3ef15c2f152f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400)) + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1)) return; printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8d6cf25127aa..b972bf72fb8b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -133,7 +133,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400))) { + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))) { printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n", log_lvl, d0, d1, d2); printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n", diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c5c897a86418..36354b470590 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } -static __always_inline unsigned long debug_read_clear_dr6(void) +static __always_inline unsigned long debug_read_reset_dr6(void) { unsigned long dr6; + get_debugreg(dr6, 6); + dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ + /* * The Intel SDM says: * - * Certain debug exceptions may clear bits 0-3. The remaining - * contents of the DR6 register are never cleared by the - * processor. To avoid confusion in identifying debug - * exceptions, debug handlers should clear the register before - * returning to the interrupted task. + * Certain debug exceptions may clear bits 0-3 of DR6. + * + * BLD induced #DB clears DR6.BLD and any other debug + * exception doesn't modify DR6.BLD. * - * Keep it simple: clear DR6 immediately. + * RTM induced #DB clears DR6.RTM and any other debug + * exception sets DR6.RTM. + * + * To avoid confusion in identifying debug exceptions, + * debug handlers should set DR6.BLD and DR6.RTM, and + * clear other DR6 bits before returning. + * + * Keep it simple: write DR6 with its architectural reset + * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately. */ - get_debugreg(dr6, 6); set_debugreg(DR6_RESERVED, 6); - dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ return dr6; } @@ -1239,13 +1247,13 @@ out: /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { - exc_debug_kernel(regs, debug_read_clear_dr6()); + exc_debug_kernel(regs, debug_read_reset_dr6()); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { - exc_debug_user(regs, debug_read_clear_dr6()); + exc_debug_user(regs, debug_read_reset_dr6()); } #ifdef CONFIG_X86_FRED @@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) { /* * FRED #DB stores DR6 on the stack in the format which - * debug_read_clear_dr6() returns for the IDT entry points. + * debug_read_reset_dr6() returns for the IDT entry points. */ unsigned long dr6 = fred_event_data(regs); @@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) { - unsigned long dr6 = debug_read_clear_dr6(); + unsigned long dr6 = debug_read_reset_dr6(); if (user_mode(regs)) exc_debug_user(regs, dr6); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b2d006756e02..f84bc0569c9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1165,6 +1165,8 @@ void kvm_set_cpu_caps(void) */ SYNTHESIZED_F(LFENCE_RDTSC), /* SmmPgCfgLock */ + /* 4: Resv */ + SYNTHESIZED_F(VERW_CLEAR), F(NULL_SEL_CLR_BASE), /* UpperAddressIgnore */ F(AUTOIBRS), @@ -1179,6 +1181,11 @@ void kvm_set_cpu_caps(void) F(SRSO_USER_KERNEL_NO), ); + kvm_cpu_cap_init(CPUID_8000_0021_ECX, + SYNTHESIZED_F(TSA_SQ_NO), + SYNTHESIZED_F(TSA_L1_NO), + ); + kvm_cpu_cap_init(CPUID_8000_0022_EAX, F(PERFMON_V2), ); @@ -1748,8 +1755,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: - entry->ebx = entry->ecx = entry->edx = 0; + entry->ebx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 24f0318c50d7..ee27064dd72f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_invlpg_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. @@ -2001,11 +2004,11 @@ out_flush_all: static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + unsigned long *vcpu_mask = hv_vcpu->vcpu_mask; u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index fde0ae986003..c53b92379e6e 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,6 +52,10 @@ /* CPUID level 0x80000022 (EAX) */ #define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0) +/* CPUID level 0x80000021 (ECX) */ +#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1) +#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2) + struct cpuid_reg { u32 function; u32 index; @@ -82,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* @@ -121,6 +126,8 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); + KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 459c3b791fd4..b201f77fcd49 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -4445,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0c61153b275f..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: @@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax 2: diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 1ad20c273f3b..f31ccdeb905b 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -173,6 +173,9 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i tdx_clear_unsupported_cpuid(entry); } +#define TDVMCALLINFO_GET_QUOTE BIT(0) +#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1) + static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, struct kvm_tdx_capabilities *caps) { @@ -188,6 +191,10 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, caps->cpuid.nent = td_conf->num_cpuid_config; + caps->user_tdvmcallinfo_1_r11 = + TDVMCALLINFO_GET_QUOTE | + TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT; + for (i = 0; i < td_conf->num_cpuid_config; i++) td_init_cpuid_entry2(&caps->cpuid.entries[i], i); @@ -1530,6 +1537,27 @@ static int tdx_get_quote(struct kvm_vcpu *vcpu) return 0; } +static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 vector = tdx->vp_enter_args.r12; + + if (vector < 32 || vector > 255) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT; + vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.setup_event_notify.vector = vector; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1541,6 +1569,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_get_td_vm_call_info(vcpu); case TDVMCALL_GET_QUOTE: return tdx_get_quote(vcpu); + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + return tdx_setup_event_notify_interrupt(vcpu); default: break; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4953846cb30d..191a9ed0da22 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7291,7 +7291,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&cpu_buf_vm_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b58a74c1722d..357b9e3a6cef 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ - if (kvm_caps.has_tsc_control) + if (kvm_caps.has_tsc_control) { tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz, v->arch.l1_tsc_scaling_ratio); + tgt_tsc_khz = tgt_tsc_khz ? : 1; + } if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, @@ -11035,7 +11037,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(vcpu->arch.switch_db_regs && !(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); set_debugreg(vcpu->arch.eff_db[0], 0); set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); @@ -11044,7 +11046,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); } vcpu->arch.host_debugctl = get_debugctlmsr(); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9b029bb29a16..5fa2cca43653 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1971,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) - return -EINVAL; + /* + * Don't check for the port being within range of max_evtchn_port(). + * Userspace can configure what ever targets it likes; events just won't + * be delivered if/while the target is invalid, just like userspace can + * configure MSIs which target non-existent APICs. + * + * This allow on Live Migration and Live Update, the IRQ routing table + * can be restored *independently* of other things like creating vCPUs, + * without imposing an ordering dependency on userspace. In this + * particular case, the problematic ordering would be with setting the + * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 + * instead of 1024 event channels. + */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) diff --git a/block/genhd.c b/block/genhd.c index 8171a6bc3210..c26733f6324b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -128,23 +128,27 @@ static void part_stat_read_all(struct block_device *part, static void bdev_count_inflight_rw(struct block_device *part, unsigned int inflight[2], bool mq_driver) { + int write = 0; + int read = 0; int cpu; if (mq_driver) { blk_mq_in_driver_rw(part, inflight); - } else { - for_each_possible_cpu(cpu) { - inflight[READ] += part_stat_local_read_cpu( - part, in_flight[READ], cpu); - inflight[WRITE] += part_stat_local_read_cpu( - part, in_flight[WRITE], cpu); - } + return; + } + + for_each_possible_cpu(cpu) { + read += part_stat_local_read_cpu(part, in_flight[READ], cpu); + write += part_stat_local_read_cpu(part, in_flight[WRITE], cpu); } - if (WARN_ON_ONCE((int)inflight[READ] < 0)) - inflight[READ] = 0; - if (WARN_ON_ONCE((int)inflight[WRITE] < 0)) - inflight[WRITE] = 0; + /* + * While iterating all CPUs, some IOs may be issued from a CPU already + * traversed and complete on a CPU that has not yet been traversed, + * causing the inflight number to be negative. + */ + inflight[READ] = read > 0 ? read : 0; + inflight[WRITE] = write > 0 ? write : 0; } /** diff --git a/crypto/wp512.c b/crypto/wp512.c index 41f13d490333..229b189a7988 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -21,10 +21,10 @@ */ #include <crypto/internal/hash.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/module.h> -#include <linux/mm.h> -#include <asm/byteorder.h> -#include <linux/types.h> +#include <linux/string.h> +#include <linux/unaligned.h> #define WP512_DIGEST_SIZE 64 #define WP384_DIGEST_SIZE 48 @@ -37,9 +37,6 @@ struct wp512_ctx { u8 bitLength[WP512_LENGTHBYTES]; - u8 buffer[WP512_BLOCK_SIZE]; - int bufferBits; - int bufferPos; u64 hash[WP512_DIGEST_SIZE/8]; }; @@ -779,16 +776,16 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = { * The core Whirlpool transform. */ -static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) { +static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx, + const u8 *buffer) { int i, r; u64 K[8]; /* the round key */ u64 block[8]; /* mu(buffer) */ u64 state[8]; /* the cipher state */ u64 L[8]; - const __be64 *buffer = (const __be64 *)wctx->buffer; for (i = 0; i < 8; i++) - block[i] = be64_to_cpu(buffer[i]); + block[i] = get_unaligned_be64(buffer + i * 8); state[0] = block[0] ^ (K[0] = wctx->hash[0]); state[1] = block[1] ^ (K[1] = wctx->hash[1]); @@ -991,8 +988,6 @@ static int wp512_init(struct shash_desc *desc) { int i; memset(wctx->bitLength, 0, 32); - wctx->bufferBits = wctx->bufferPos = 0; - wctx->buffer[0] = 0; for (i = 0; i < 8; i++) { wctx->hash[i] = 0L; } @@ -1000,84 +995,54 @@ static int wp512_init(struct shash_desc *desc) { return 0; } -static int wp512_update(struct shash_desc *desc, const u8 *source, - unsigned int len) +static void wp512_add_length(u8 *bitLength, u64 value) { - struct wp512_ctx *wctx = shash_desc_ctx(desc); - int sourcePos = 0; - unsigned int bits_len = len * 8; // convert to number of bits - int sourceGap = (8 - ((int)bits_len & 7)) & 7; - int bufferRem = wctx->bufferBits & 7; + u32 carry; int i; - u32 b, carry; - u8 *buffer = wctx->buffer; - u8 *bitLength = wctx->bitLength; - int bufferBits = wctx->bufferBits; - int bufferPos = wctx->bufferPos; - u64 value = bits_len; for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != 0ULL); i--) { carry += bitLength[i] + ((u32)value & 0xff); bitLength[i] = (u8)carry; carry >>= 8; value >>= 8; } - while (bits_len > 8) { - b = ((source[sourcePos] << sourceGap) & 0xff) | - ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap)); - buffer[bufferPos++] |= (u8)(b >> bufferRem); - bufferBits += 8 - bufferRem; - if (bufferBits == WP512_BLOCK_SIZE * 8) { - wp512_process_buffer(wctx); - bufferBits = bufferPos = 0; - } - buffer[bufferPos] = b << (8 - bufferRem); - bufferBits += bufferRem; - bits_len -= 8; - sourcePos++; - } - if (bits_len > 0) { - b = (source[sourcePos] << sourceGap) & 0xff; - buffer[bufferPos] |= b >> bufferRem; - } else { - b = 0; - } - if (bufferRem + bits_len < 8) { - bufferBits += bits_len; - } else { - bufferPos++; - bufferBits += 8 - bufferRem; - bits_len -= 8 - bufferRem; - if (bufferBits == WP512_BLOCK_SIZE * 8) { - wp512_process_buffer(wctx); - bufferBits = bufferPos = 0; - } - buffer[bufferPos] = b << (8 - bufferRem); - bufferBits += (int)bits_len; - } +} - wctx->bufferBits = bufferBits; - wctx->bufferPos = bufferPos; +static int wp512_update(struct shash_desc *desc, const u8 *source, + unsigned int len) +{ + struct wp512_ctx *wctx = shash_desc_ctx(desc); + unsigned int remain = len % WP512_BLOCK_SIZE; + u64 bits_len = (len - remain) * 8ull; + u8 *bitLength = wctx->bitLength; - return 0; + wp512_add_length(bitLength, bits_len); + do { + wp512_process_buffer(wctx, source); + source += WP512_BLOCK_SIZE; + bits_len -= WP512_BLOCK_SIZE * 8; + } while (bits_len); + + return remain; } -static int wp512_final(struct shash_desc *desc, u8 *out) +static int wp512_finup(struct shash_desc *desc, const u8 *src, + unsigned int bufferPos, u8 *out) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int i; - u8 *buffer = wctx->buffer; u8 *bitLength = wctx->bitLength; - int bufferBits = wctx->bufferBits; - int bufferPos = wctx->bufferPos; __be64 *digest = (__be64 *)out; + u8 buffer[WP512_BLOCK_SIZE]; - buffer[bufferPos] |= 0x80U >> (bufferBits & 7); + wp512_add_length(bitLength, bufferPos * 8); + memcpy(buffer, src, bufferPos); + buffer[bufferPos] = 0x80U; bufferPos++; if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { if (bufferPos < WP512_BLOCK_SIZE) memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos); - wp512_process_buffer(wctx); + wp512_process_buffer(wctx, buffer); bufferPos = 0; } if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) @@ -1086,31 +1051,32 @@ static int wp512_final(struct shash_desc *desc, u8 *out) bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES; memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES], bitLength, WP512_LENGTHBYTES); - wp512_process_buffer(wctx); + wp512_process_buffer(wctx, buffer); + memzero_explicit(buffer, sizeof(buffer)); for (i = 0; i < WP512_DIGEST_SIZE/8; i++) digest[i] = cpu_to_be64(wctx->hash[i]); - wctx->bufferBits = bufferBits; - wctx->bufferPos = bufferPos; return 0; } -static int wp384_final(struct shash_desc *desc, u8 *out) +static int wp384_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { u8 D[64]; - wp512_final(desc, D); + wp512_finup(desc, src, len, D); memcpy(out, D, WP384_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); return 0; } -static int wp256_final(struct shash_desc *desc, u8 *out) +static int wp256_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { u8 D[64]; - wp512_final(desc, D); + wp512_finup(desc, src, len, D); memcpy(out, D, WP256_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); @@ -1121,11 +1087,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP512_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp512_final, + .finup = wp512_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp512", .cra_driver_name = "wp512-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -1133,11 +1100,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP384_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp384_final, + .finup = wp384_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp384", .cra_driver_name = "wp384-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -1145,11 +1113,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP256_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp256_final, + .finup = wp256_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp256", .cra_driver_name = "wp256-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/drivers/Kconfig b/drivers/Kconfig index 7c556c5ac4fd..3d8c902c2561 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -77,6 +77,8 @@ source "drivers/pps/Kconfig" source "drivers/ptp/Kconfig" +source "drivers/dpll/Kconfig" + source "drivers/pinctrl/Kconfig" source "drivers/gpio/Kconfig" @@ -245,6 +247,4 @@ source "drivers/hte/Kconfig" source "drivers/cdx/Kconfig" -source "drivers/dpll/Kconfig" - endmenu diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 45593612a4db..6905b56bf3e4 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -243,23 +243,10 @@ static int acpi_battery_get_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CURRENT_NOW: case POWER_SUPPLY_PROP_POWER_NOW: - if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN) { + if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN) ret = -ENODEV; - break; - } - - val->intval = battery->rate_now * 1000; - /* - * When discharging, the current should be reported as a - * negative number as per the power supply class interface - * definition. - */ - if (psp == POWER_SUPPLY_PROP_CURRENT_NOW && - (battery->state & ACPI_BATTERY_STATE_DISCHARGING) && - acpi_battery_handle_discharging(battery) - == POWER_SUPPLY_STATUS_DISCHARGING) - val->intval = -val->intval; - + else + val->intval = battery->rate_now * 1000; break; case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN: case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN: diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e5e5c2e81d09..aa93b0ecbbc6 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1450,7 +1450,7 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_VERSION, "ASUSPRO D840MB_M840SA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUSPRO D840MB_M840SA"), }, /* 320 is broken, there is no known good version. */ }, diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index 2a1fe3080712..0dfa2cdc897c 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -755,7 +755,7 @@ static void lanai_shutdown_rx_vci(const struct lanai_vcc *lvcc) /* Shutdown transmitting on card. * Unfortunately the lanai needs us to wait until all the data * drains out of the buffer before we can dealloc it, so this - * can take awhile -- up to 370ms for a full 128KB buffer + * can take a while -- up to 370ms for a full 128KB buffer * assuming everone else is quiet. In theory the time is * boundless if there's a CBR VCC holding things up. */ diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 7779ab0ca7ce..efc575a00edd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -602,6 +602,7 @@ CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); CPU_SHOW_VULN_FALLBACK(ghostwrite); CPU_SHOW_VULN_FALLBACK(old_microcode); CPU_SHOW_VULN_FALLBACK(indirect_target_selection); +CPU_SHOW_VULN_FALLBACK(tsa); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -620,6 +621,7 @@ static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); +static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -639,6 +641,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_ghostwrite.attr, &dev_attr_old_microcode.attr, &dev_attr_indirect_target_selection.attr, + &dev_attr_tsa.attr, NULL }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index eebe699fdf4f..bf77d28e959f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1236,6 +1236,7 @@ void dpm_complete(pm_message_t state) */ void dpm_resume_end(pm_message_t state) { + pm_restore_gfp_mask(); dpm_resume(state); dpm_complete(state); } @@ -2176,8 +2177,10 @@ int dpm_suspend_start(pm_message_t state) error = dpm_prepare(state); if (error) dpm_save_failed_step(SUSPEND_PREPARE); - else + else { + pm_restrict_gfp_mask(); error = dpm_suspend(state); + } dpm_show_time(starttime, state, error, "start"); return error; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index b1be6c510372..0c2eabe14af3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -64,13 +64,15 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, rcu_read_unlock(); page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); - rcu_read_lock(); - if (!page) + if (!page) { + rcu_read_lock(); return ERR_PTR(-ENOMEM); + } xa_lock(&brd->brd_pages); ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, page, gfp); + rcu_read_lock(); if (ret) { xa_unlock(&brd->brd_pages); __free_page(page); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d36f44f5ee80..9fd284fa76dc 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1148,8 +1148,8 @@ exit: blk_mq_end_request(req, res); } -static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, - int res, unsigned issue_flags) +static struct io_uring_cmd *__ublk_prep_compl_io_cmd(struct ublk_io *io, + struct request *req) { /* read cmd first because req will overwrite it */ struct io_uring_cmd *cmd = io->cmd; @@ -1164,6 +1164,13 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, io->flags &= ~UBLK_IO_FLAG_ACTIVE; io->req = req; + return cmd; +} + +static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, + int res, unsigned issue_flags) +{ + struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req); /* tell ublksrv one io request is coming */ io_uring_cmd_done(cmd, res, 0, issue_flags); @@ -1416,6 +1423,14 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } +static inline bool ublk_belong_to_same_batch(const struct ublk_io *io, + const struct ublk_io *io2) +{ + return (io_uring_cmd_ctx_handle(io->cmd) == + io_uring_cmd_ctx_handle(io2->cmd)) && + (io->task == io2->task); +} + static void ublk_queue_rqs(struct rq_list *rqlist) { struct rq_list requeue_list = { }; @@ -1427,14 +1442,16 @@ static void ublk_queue_rqs(struct rq_list *rqlist) struct ublk_queue *this_q = req->mq_hctx->driver_data; struct ublk_io *this_io = &this_q->ios[req->tag]; - if (io && io->task != this_io->task && !rq_list_empty(&submit_list)) + if (ublk_prep_req(this_q, req, true) != BLK_STS_OK) { + rq_list_add_tail(&requeue_list, req); + continue; + } + + if (io && !ublk_belong_to_same_batch(io, this_io) && + !rq_list_empty(&submit_list)) ublk_queue_cmd_list(io, &submit_list); io = this_io; - - if (ublk_prep_req(this_q, req, true) == BLK_STS_OK) - rq_list_add_tail(&submit_list, req); - else - rq_list_add_tail(&requeue_list, req); + rq_list_add_tail(&submit_list, req); } if (!rq_list_empty(&submit_list)) @@ -2148,10 +2165,9 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, return 0; } -static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io) +static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io, + struct request *req) { - struct request *req = io->req; - /* * We have handled UBLK_IO_NEED_GET_DATA command, * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just @@ -2178,6 +2194,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, u32 cmd_op = cmd->cmd_op; unsigned tag = ub_cmd->tag; int ret = -EINVAL; + struct request *req; pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", __func__, cmd->cmd_op, ub_cmd->q_id, tag, @@ -2236,11 +2253,19 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, goto out; break; case UBLK_IO_NEED_GET_DATA: - io->addr = ub_cmd->addr; - if (!ublk_get_data(ubq, io)) - return -EIOCBQUEUED; - - return UBLK_IO_RES_OK; + /* + * ublk_get_data() may fail and fallback to requeue, so keep + * uring_cmd active first and prepare for handling new requeued + * request + */ + req = io->req; + ublk_fill_io_cmd(io, cmd, ub_cmd->addr); + io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV; + if (likely(ublk_get_data(ubq, io, req))) { + __ublk_prep_compl_io_cmd(io, req); + return UBLK_IO_RES_OK; + } + break; default: goto out; } @@ -2825,7 +2850,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) if (copy_from_user(&info, argp, sizeof(info))) return -EFAULT; - if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || info.nr_hw_queues > UBLK_MAX_NR_QUEUES) + if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || !info.queue_depth || + info.nr_hw_queues > UBLK_MAX_NR_QUEUES || !info.nr_hw_queues) return -EINVAL; if (capable(CAP_SYS_ADMIN)) diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 2cbc664e5d62..623aaa4439c4 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -103,10 +103,10 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) { struct cxl_mailbox *cxl_mbox; - u8 min_scrub_cycle = U8_MAX; struct cxl_region_params *p; struct cxl_memdev *cxlmd; struct cxl_region *cxlr; + u8 min_scrub_cycle = 0; int i, ret; if (!cxl_ps_ctx->cxlr) { @@ -133,8 +133,12 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, if (ret) return ret; + /* + * The min_scrub_cycle of a region is the max of minimum scrub + * cycles supported by memdevs that back the region. + */ if (min_cycle) - min_scrub_cycle = min(*min_cycle, min_scrub_cycle); + min_scrub_cycle = max(*min_cycle, min_scrub_cycle); } if (min_cycle) @@ -1099,8 +1103,10 @@ int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt) old_rec = xa_store(&array_rec->rec_gen_media, le64_to_cpu(rec->media_hdr.phys_addr), rec, GFP_KERNEL); - if (xa_is_err(old_rec)) + if (xa_is_err(old_rec)) { + kfree(rec); return xa_err(old_rec); + } kfree(old_rec); @@ -1127,8 +1133,10 @@ int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt) old_rec = xa_store(&array_rec->rec_dram, le64_to_cpu(rec->media_hdr.phys_addr), rec, GFP_KERNEL); - if (xa_is_err(old_rec)) + if (xa_is_err(old_rec)) { + kfree(rec); return xa_err(old_rec); + } kfree(old_rec); @@ -1315,7 +1323,7 @@ cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd, attrbs.bank = ctx->bank; break; case EDAC_REPAIR_RANK_SPARING: - attrbs.repair_type = CXL_BANK_SPARING; + attrbs.repair_type = CXL_RANK_SPARING; break; default: return NULL; diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index 6f2eae1eb126..7c750599ea69 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -544,7 +544,7 @@ static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs, u32 flags; if (rpc_in->op_size < sizeof(uuid_t)) - return ERR_PTR(-EINVAL); + return false; feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid); if (IS_ERR(feat)) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 485a831695c7..2731ba3a0799 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -31,40 +31,38 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, ras_cap.header_log); } -static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, - struct cxl_ras_capability_regs ras_cap) +static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, + struct cxl_ras_capability_regs ras_cap) { u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; - struct cxl_dev_state *cxlds; - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); + trace_cxl_aer_correctable_error(cxlmd, status); } -static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, - struct cxl_ras_capability_regs ras_cap) +static void +cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, + struct cxl_ras_capability_regs ras_cap) { u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; - struct cxl_dev_state *cxlds; u32 fe; - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - if (hweight32(status) > 1) fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, ras_cap.cap_control)); else fe = status; - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, + trace_cxl_aer_uncorrectable_error(cxlmd, status, fe, ras_cap.header_log); } +static int match_memdev_by_parent(struct device *dev, const void *uport) +{ + if (is_cxl_memdev(dev) && dev->parent == uport) + return 1; + return 0; +} + static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) { unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device, @@ -73,13 +71,12 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, data->prot_err.agent_addr.bus, devfn); + struct cxl_memdev *cxlmd; int port_type; if (!pdev) return; - guard(device)(&pdev->dev); - port_type = pci_pcie_type(pdev); if (port_type == PCI_EXP_TYPE_ROOT_PORT || port_type == PCI_EXP_TYPE_DOWNSTREAM || @@ -92,10 +89,20 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) return; } + guard(device)(&pdev->dev); + if (!pdev->dev.driver) + return; + + struct device *mem_dev __free(put_device) = bus_find_device( + &cxl_bus_type, NULL, pdev, match_memdev_by_parent); + if (!mem_dev) + return; + + cxlmd = to_cxl_memdev(mem_dev); if (data->severity == AER_CORRECTABLE) - cxl_cper_trace_corr_prot_err(pdev, data->ras_cap); + cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap); else - cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap); + cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap); } static void cxl_cper_prot_err_work_fn(struct work_struct *work) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index b1ef4546346d..bea3e9858aca 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -685,11 +685,13 @@ long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { - ret = dma_fence_wait_timeout(fence, intr, ret); - if (ret <= 0) { - dma_resv_iter_end(&cursor); - return ret; - } + ret = dma_fence_wait_timeout(fence, intr, timeout); + if (ret <= 0) + break; + + /* Even for zero timeout the return value is 1 */ + if (timeout) + timeout = ret; } dma_resv_iter_end(&cursor); diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig index 20607ed54243..ade872c915ac 100644 --- a/drivers/dpll/Kconfig +++ b/drivers/dpll/Kconfig @@ -3,5 +3,11 @@ # Generic DPLL drivers configuration # +menu "DPLL device support" + config DPLL bool + +source "drivers/dpll/zl3073x/Kconfig" + +endmenu diff --git a/drivers/dpll/Makefile b/drivers/dpll/Makefile index 2e5b27850110..9e7a3a3e592e 100644 --- a/drivers/dpll/Makefile +++ b/drivers/dpll/Makefile @@ -7,3 +7,5 @@ obj-$(CONFIG_DPLL) += dpll.o dpll-y += dpll_core.o dpll-y += dpll_netlink.o dpll-y += dpll_nl.o + +obj-$(CONFIG_ZL3073X) += zl3073x/ diff --git a/drivers/dpll/zl3073x/Kconfig b/drivers/dpll/zl3073x/Kconfig new file mode 100644 index 000000000000..41fa6a8f96ab --- /dev/null +++ b/drivers/dpll/zl3073x/Kconfig @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config ZL3073X + tristate "Microchip Azurite DPLL/PTP/SyncE devices" + depends on NET + select DPLL + select NET_DEVLINK + help + This driver supports Microchip Azurite family DPLL/PTP/SyncE + devices that support up to 5 independent DPLL channels, + 10 input pins and up to 20 output pins. + + To compile this driver as a module, choose M here. The module + will be called zl3073x. + +config ZL3073X_I2C + tristate "I2C bus implementation for Microchip Azurite devices" + depends on I2C && ZL3073X + select REGMAP_I2C + default m + help + This is I2C bus implementation for Microchip Azurite DPLL/PTP/SyncE + devices. + + To compile this driver as a module, choose M here: the module will + be called zl3073x_i2c. + +config ZL3073X_SPI + tristate "SPI bus implementation for Microchip Azurite devices" + depends on SPI && ZL3073X + select REGMAP_SPI + default m + help + This is SPI bus implementation for Microchip Azurite DPLL/PTP/SyncE + devices. + + To compile this driver as a module, choose M here: the module will + be called zl3073x_spi. diff --git a/drivers/dpll/zl3073x/Makefile b/drivers/dpll/zl3073x/Makefile new file mode 100644 index 000000000000..c3e2f02f319d --- /dev/null +++ b/drivers/dpll/zl3073x/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_ZL3073X) += zl3073x.o +zl3073x-objs := core.o devlink.o dpll.o prop.o + +obj-$(CONFIG_ZL3073X_I2C) += zl3073x_i2c.o +zl3073x_i2c-objs := i2c.o + +obj-$(CONFIG_ZL3073X_SPI) += zl3073x_spi.o +zl3073x_spi-objs := spi.o diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c new file mode 100644 index 000000000000..f2d58e1a5672 --- /dev/null +++ b/drivers/dpll/zl3073x/core.c @@ -0,0 +1,859 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/array_size.h> +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/dev_printk.h> +#include <linux/device.h> +#include <linux/export.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/regmap.h> +#include <linux/sprintf.h> +#include <linux/string_choices.h> +#include <linux/unaligned.h> +#include <net/devlink.h> + +#include "core.h" +#include "devlink.h" +#include "dpll.h" +#include "regs.h" + +/* Chip IDs for zl30731 */ +static const u16 zl30731_ids[] = { + 0x0E93, + 0x1E93, + 0x2E93, +}; + +const struct zl3073x_chip_info zl30731_chip_info = { + .ids = zl30731_ids, + .num_ids = ARRAY_SIZE(zl30731_ids), + .num_channels = 1, +}; +EXPORT_SYMBOL_NS_GPL(zl30731_chip_info, "ZL3073X"); + +/* Chip IDs for zl30732 */ +static const u16 zl30732_ids[] = { + 0x0E30, + 0x0E94, + 0x1E94, + 0x1F60, + 0x2E94, + 0x3FC4, +}; + +const struct zl3073x_chip_info zl30732_chip_info = { + .ids = zl30732_ids, + .num_ids = ARRAY_SIZE(zl30732_ids), + .num_channels = 2, +}; +EXPORT_SYMBOL_NS_GPL(zl30732_chip_info, "ZL3073X"); + +/* Chip IDs for zl30733 */ +static const u16 zl30733_ids[] = { + 0x0E95, + 0x1E95, + 0x2E95, +}; + +const struct zl3073x_chip_info zl30733_chip_info = { + .ids = zl30733_ids, + .num_ids = ARRAY_SIZE(zl30733_ids), + .num_channels = 3, +}; +EXPORT_SYMBOL_NS_GPL(zl30733_chip_info, "ZL3073X"); + +/* Chip IDs for zl30734 */ +static const u16 zl30734_ids[] = { + 0x0E96, + 0x1E96, + 0x2E96, +}; + +const struct zl3073x_chip_info zl30734_chip_info = { + .ids = zl30734_ids, + .num_ids = ARRAY_SIZE(zl30734_ids), + .num_channels = 4, +}; +EXPORT_SYMBOL_NS_GPL(zl30734_chip_info, "ZL3073X"); + +/* Chip IDs for zl30735 */ +static const u16 zl30735_ids[] = { + 0x0E97, + 0x1E97, + 0x2E97, +}; + +const struct zl3073x_chip_info zl30735_chip_info = { + .ids = zl30735_ids, + .num_ids = ARRAY_SIZE(zl30735_ids), + .num_channels = 5, +}; +EXPORT_SYMBOL_NS_GPL(zl30735_chip_info, "ZL3073X"); + +#define ZL_RANGE_OFFSET 0x80 +#define ZL_PAGE_SIZE 0x80 +#define ZL_NUM_PAGES 15 +#define ZL_PAGE_SEL 0x7F +#define ZL_PAGE_SEL_MASK GENMASK(3, 0) +#define ZL_NUM_REGS (ZL_NUM_PAGES * ZL_PAGE_SIZE) + +/* Regmap range configuration */ +static const struct regmap_range_cfg zl3073x_regmap_range = { + .range_min = ZL_RANGE_OFFSET, + .range_max = ZL_RANGE_OFFSET + ZL_NUM_REGS - 1, + .selector_reg = ZL_PAGE_SEL, + .selector_mask = ZL_PAGE_SEL_MASK, + .selector_shift = 0, + .window_start = 0, + .window_len = ZL_PAGE_SIZE, +}; + +static bool +zl3073x_is_volatile_reg(struct device *dev __maybe_unused, unsigned int reg) +{ + /* Only page selector is non-volatile */ + return reg != ZL_PAGE_SEL; +} + +const struct regmap_config zl3073x_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = ZL_RANGE_OFFSET + ZL_NUM_REGS - 1, + .ranges = &zl3073x_regmap_range, + .num_ranges = 1, + .cache_type = REGCACHE_MAPLE, + .volatile_reg = zl3073x_is_volatile_reg, +}; +EXPORT_SYMBOL_NS_GPL(zl3073x_regmap_config, "ZL3073X"); + +/** + * zl3073x_ref_freq_factorize - factorize given frequency + * @freq: input frequency + * @base: base frequency + * @mult: multiplier + * + * Checks if the given frequency can be factorized using one of the + * supported base frequencies. If so the base frequency and multiplier + * are stored into appropriate parameters if they are not NULL. + * + * Return: 0 on success, -EINVAL if the frequency cannot be factorized + */ +int +zl3073x_ref_freq_factorize(u32 freq, u16 *base, u16 *mult) +{ + static const u16 base_freqs[] = { + 1, 2, 4, 5, 8, 10, 16, 20, 25, 32, 40, 50, 64, 80, 100, 125, + 128, 160, 200, 250, 256, 320, 400, 500, 625, 640, 800, 1000, + 1250, 1280, 1600, 2000, 2500, 3125, 3200, 4000, 5000, 6250, + 6400, 8000, 10000, 12500, 15625, 16000, 20000, 25000, 31250, + 32000, 40000, 50000, 62500, + }; + u32 div; + int i; + + for (i = 0; i < ARRAY_SIZE(base_freqs); i++) { + div = freq / base_freqs[i]; + + if (div <= U16_MAX && (freq % base_freqs[i]) == 0) { + if (base) + *base = base_freqs[i]; + if (mult) + *mult = div; + + return 0; + } + } + + return -EINVAL; +} + +static bool +zl3073x_check_reg(struct zl3073x_dev *zldev, unsigned int reg, size_t size) +{ + /* Check that multiop lock is held when accessing registers + * from page 10 and above. + */ + if (ZL_REG_PAGE(reg) >= 10) + lockdep_assert_held(&zldev->multiop_lock); + + /* Check the index is in valid range for indexed register */ + if (ZL_REG_OFFSET(reg) > ZL_REG_MAX_OFFSET(reg)) { + dev_err(zldev->dev, "Index out of range for reg 0x%04lx\n", + ZL_REG_ADDR(reg)); + return false; + } + /* Check the requested size corresponds to register size */ + if (ZL_REG_SIZE(reg) != size) { + dev_err(zldev->dev, "Invalid size %zu for reg 0x%04lx\n", + size, ZL_REG_ADDR(reg)); + return false; + } + + return true; +} + +static int +zl3073x_read_reg(struct zl3073x_dev *zldev, unsigned int reg, void *val, + size_t size) +{ + int rc; + + if (!zl3073x_check_reg(zldev, reg, size)) + return -EINVAL; + + /* Map the register address to virtual range */ + reg = ZL_REG_ADDR(reg) + ZL_RANGE_OFFSET; + + rc = regmap_bulk_read(zldev->regmap, reg, val, size); + if (rc) { + dev_err(zldev->dev, "Failed to read reg 0x%04x: %pe\n", reg, + ERR_PTR(rc)); + return rc; + } + + return 0; +} + +static int +zl3073x_write_reg(struct zl3073x_dev *zldev, unsigned int reg, const void *val, + size_t size) +{ + int rc; + + if (!zl3073x_check_reg(zldev, reg, size)) + return -EINVAL; + + /* Map the register address to virtual range */ + reg = ZL_REG_ADDR(reg) + ZL_RANGE_OFFSET; + + rc = regmap_bulk_write(zldev->regmap, reg, val, size); + if (rc) { + dev_err(zldev->dev, "Failed to write reg 0x%04x: %pe\n", reg, + ERR_PTR(rc)); + return rc; + } + + return 0; +} + +/** + * zl3073x_read_u8 - read value from 8bit register + * @zldev: zl3073x device pointer + * @reg: register to write to + * @val: value to write + * + * Reads value from given 8bit register. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_read_u8(struct zl3073x_dev *zldev, unsigned int reg, u8 *val) +{ + return zl3073x_read_reg(zldev, reg, val, sizeof(*val)); +} + +/** + * zl3073x_write_u8 - write value to 16bit register + * @zldev: zl3073x device pointer + * @reg: register to write to + * @val: value to write + * + * Writes value into given 8bit register. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_write_u8(struct zl3073x_dev *zldev, unsigned int reg, u8 val) +{ + return zl3073x_write_reg(zldev, reg, &val, sizeof(val)); +} + +/** + * zl3073x_read_u16 - read value from 16bit register + * @zldev: zl3073x device pointer + * @reg: register to write to + * @val: value to write + * + * Reads value from given 16bit register. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_read_u16(struct zl3073x_dev *zldev, unsigned int reg, u16 *val) +{ + int rc; + + rc = zl3073x_read_reg(zldev, reg, val, sizeof(*val)); + if (!rc) + be16_to_cpus(val); + + return rc; +} + +/** + * zl3073x_write_u16 - write value to 16bit register + * @zldev: zl3073x device pointer + * @reg: register to write to + * @val: value to write + * + * Writes value into given 16bit register. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_write_u16(struct zl3073x_dev *zldev, unsigned int reg, u16 val) +{ + cpu_to_be16s(&val); + + return zl3073x_write_reg(zldev, reg, &val, sizeof(val)); +} + +/** + * zl3073x_read_u32 - read value from 32bit register + * @zldev: zl3073x device pointer + * @reg: register to write to + * @val: value to write + * + * Reads value from given 32bit register. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_read_u32(struct zl3073x_dev *zldev, unsigned int reg, u32 *val) +{ + int rc; + + rc = zl3073x_read_reg(zldev, reg, val, sizeof(*val)); + if (!rc) + be32_to_cpus(val); + + return rc; +} + +/** + * zl3073x_write_u32 - write value to 32bit register + * @zldev: zl3073x device pointer + * @reg: register to write to + * @val: value to write + * + * Writes value into given 32bit register. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_write_u32(struct zl3073x_dev *zldev, unsigned int reg, u32 val) +{ + cpu_to_be32s(&val); + + return zl3073x_write_reg(zldev, reg, &val, sizeof(val)); +} + +/** + * zl3073x_read_u48 - read value from 48bit register + * @zldev: zl3073x device pointer + * @reg: register to write to + * @val: value to write + * + * Reads value from given 48bit register. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_read_u48(struct zl3073x_dev *zldev, unsigned int reg, u64 *val) +{ + u8 buf[6]; + int rc; + + rc = zl3073x_read_reg(zldev, reg, buf, sizeof(buf)); + if (!rc) + *val = get_unaligned_be48(buf); + + return rc; +} + +/** + * zl3073x_write_u48 - write value to 48bit register + * @zldev: zl3073x device pointer + * @reg: register to write to + * @val: value to write + * + * Writes value into given 48bit register. + * The value must be from the interval -S48_MIN to U48_MAX. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_write_u48(struct zl3073x_dev *zldev, unsigned int reg, u64 val) +{ + u8 buf[6]; + + /* Check the value belongs to <S48_MIN, U48_MAX> + * Any value >= S48_MIN has bits 47..63 set. + */ + if (val > GENMASK_ULL(47, 0) && val < GENMASK_ULL(63, 47)) { + dev_err(zldev->dev, "Value 0x%0llx out of range\n", val); + return -EINVAL; + } + + put_unaligned_be48(val, buf); + + return zl3073x_write_reg(zldev, reg, buf, sizeof(buf)); +} + +/** + * zl3073x_poll_zero_u8 - wait for register to be cleared by device + * @zldev: zl3073x device pointer + * @reg: register to poll (has to be 8bit register) + * @mask: bit mask for polling + * + * Waits for bits specified by @mask in register @reg value to be cleared + * by the device. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_poll_zero_u8(struct zl3073x_dev *zldev, unsigned int reg, u8 mask) +{ + /* Register polling sleep & timeout */ +#define ZL_POLL_SLEEP_US 10 +#define ZL_POLL_TIMEOUT_US 2000000 + unsigned int val; + + /* Check the register is 8bit */ + if (ZL_REG_SIZE(reg) != 1) { + dev_err(zldev->dev, "Invalid reg 0x%04lx size for polling\n", + ZL_REG_ADDR(reg)); + return -EINVAL; + } + + /* Map the register address to virtual range */ + reg = ZL_REG_ADDR(reg) + ZL_RANGE_OFFSET; + + return regmap_read_poll_timeout(zldev->regmap, reg, val, !(val & mask), + ZL_POLL_SLEEP_US, ZL_POLL_TIMEOUT_US); +} + +int zl3073x_mb_op(struct zl3073x_dev *zldev, unsigned int op_reg, u8 op_val, + unsigned int mask_reg, u16 mask_val) +{ + int rc; + + /* Set mask for the operation */ + rc = zl3073x_write_u16(zldev, mask_reg, mask_val); + if (rc) + return rc; + + /* Trigger the operation */ + rc = zl3073x_write_u8(zldev, op_reg, op_val); + if (rc) + return rc; + + /* Wait for the operation to actually finish */ + return zl3073x_poll_zero_u8(zldev, op_reg, op_val); +} + +/** + * zl3073x_ref_state_fetch - get input reference state + * @zldev: pointer to zl3073x_dev structure + * @index: input reference index to fetch state for + * + * Function fetches information for the given input reference that are + * invariant and stores them for later use. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index) +{ + struct zl3073x_ref *input = &zldev->ref[index]; + u8 ref_config; + int rc; + + /* If the input is differential then the configuration for N-pin + * reference is ignored and P-pin config is used for both. + */ + if (zl3073x_is_n_pin(index) && + zl3073x_ref_is_diff(zldev, index - 1)) { + input->enabled = zl3073x_ref_is_enabled(zldev, index - 1); + input->diff = true; + + return 0; + } + + guard(mutex)(&zldev->multiop_lock); + + /* Read reference configuration */ + rc = zl3073x_mb_op(zldev, ZL_REG_REF_MB_SEM, ZL_REF_MB_SEM_RD, + ZL_REG_REF_MB_MASK, BIT(index)); + if (rc) + return rc; + + /* Read ref_config register */ + rc = zl3073x_read_u8(zldev, ZL_REG_REF_CONFIG, &ref_config); + if (rc) + return rc; + + input->enabled = FIELD_GET(ZL_REF_CONFIG_ENABLE, ref_config); + input->diff = FIELD_GET(ZL_REF_CONFIG_DIFF_EN, ref_config); + + dev_dbg(zldev->dev, "REF%u is %s and configured as %s\n", index, + str_enabled_disabled(input->enabled), + input->diff ? "differential" : "single-ended"); + + return rc; +} + +/** + * zl3073x_out_state_fetch - get output state + * @zldev: pointer to zl3073x_dev structure + * @index: output index to fetch state for + * + * Function fetches information for the given output (not output pin) + * that are invariant and stores them for later use. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_out_state_fetch(struct zl3073x_dev *zldev, u8 index) +{ + struct zl3073x_out *out = &zldev->out[index]; + u8 output_ctrl, output_mode; + int rc; + + /* Read output configuration */ + rc = zl3073x_read_u8(zldev, ZL_REG_OUTPUT_CTRL(index), &output_ctrl); + if (rc) + return rc; + + /* Store info about output enablement and synthesizer the output + * is connected to. + */ + out->enabled = FIELD_GET(ZL_OUTPUT_CTRL_EN, output_ctrl); + out->synth = FIELD_GET(ZL_OUTPUT_CTRL_SYNTH_SEL, output_ctrl); + + dev_dbg(zldev->dev, "OUT%u is %s and connected to SYNTH%u\n", index, + str_enabled_disabled(out->enabled), out->synth); + + guard(mutex)(&zldev->multiop_lock); + + /* Read output configuration */ + rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD, + ZL_REG_OUTPUT_MB_MASK, BIT(index)); + if (rc) + return rc; + + /* Read output_mode */ + rc = zl3073x_read_u8(zldev, ZL_REG_OUTPUT_MODE, &output_mode); + if (rc) + return rc; + + /* Extract and store output signal format */ + out->signal_format = FIELD_GET(ZL_OUTPUT_MODE_SIGNAL_FORMAT, + output_mode); + + dev_dbg(zldev->dev, "OUT%u has signal format 0x%02x\n", index, + out->signal_format); + + return rc; +} + +/** + * zl3073x_synth_state_fetch - get synth state + * @zldev: pointer to zl3073x_dev structure + * @index: synth index to fetch state for + * + * Function fetches information for the given synthesizer that are + * invariant and stores them for later use. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_synth_state_fetch(struct zl3073x_dev *zldev, u8 index) +{ + struct zl3073x_synth *synth = &zldev->synth[index]; + u16 base, m, n; + u8 synth_ctrl; + u32 mult; + int rc; + + /* Read synth control register */ + rc = zl3073x_read_u8(zldev, ZL_REG_SYNTH_CTRL(index), &synth_ctrl); + if (rc) + return rc; + + /* Store info about synth enablement and DPLL channel the synth is + * driven by. + */ + synth->enabled = FIELD_GET(ZL_SYNTH_CTRL_EN, synth_ctrl); + synth->dpll = FIELD_GET(ZL_SYNTH_CTRL_DPLL_SEL, synth_ctrl); + + dev_dbg(zldev->dev, "SYNTH%u is %s and driven by DPLL%u\n", index, + str_enabled_disabled(synth->enabled), synth->dpll); + + guard(mutex)(&zldev->multiop_lock); + + /* Read synth configuration */ + rc = zl3073x_mb_op(zldev, ZL_REG_SYNTH_MB_SEM, ZL_SYNTH_MB_SEM_RD, + ZL_REG_SYNTH_MB_MASK, BIT(index)); + if (rc) + return rc; + + /* The output frequency is determined by the following formula: + * base * multiplier * numerator / denominator + * + * Read registers with these values + */ + rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_BASE, &base); + if (rc) + return rc; + + rc = zl3073x_read_u32(zldev, ZL_REG_SYNTH_FREQ_MULT, &mult); + if (rc) + return rc; + + rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_M, &m); + if (rc) + return rc; + + rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_N, &n); + if (rc) + return rc; + + /* Check denominator for zero to avoid div by 0 */ + if (!n) { + dev_err(zldev->dev, + "Zero divisor for SYNTH%u retrieved from device\n", + index); + return -EINVAL; + } + + /* Compute and store synth frequency */ + zldev->synth[index].freq = div_u64(mul_u32_u32(base * m, mult), n); + + dev_dbg(zldev->dev, "SYNTH%u frequency: %u Hz\n", index, + zldev->synth[index].freq); + + return rc; +} + +static int +zl3073x_dev_state_fetch(struct zl3073x_dev *zldev) +{ + int rc; + u8 i; + + for (i = 0; i < ZL3073X_NUM_REFS; i++) { + rc = zl3073x_ref_state_fetch(zldev, i); + if (rc) { + dev_err(zldev->dev, + "Failed to fetch input state: %pe\n", + ERR_PTR(rc)); + return rc; + } + } + + for (i = 0; i < ZL3073X_NUM_SYNTHS; i++) { + rc = zl3073x_synth_state_fetch(zldev, i); + if (rc) { + dev_err(zldev->dev, + "Failed to fetch synth state: %pe\n", + ERR_PTR(rc)); + return rc; + } + } + + for (i = 0; i < ZL3073X_NUM_OUTS; i++) { + rc = zl3073x_out_state_fetch(zldev, i); + if (rc) { + dev_err(zldev->dev, + "Failed to fetch output state: %pe\n", + ERR_PTR(rc)); + return rc; + } + } + + return rc; +} + +static void +zl3073x_dev_periodic_work(struct kthread_work *work) +{ + struct zl3073x_dev *zldev = container_of(work, struct zl3073x_dev, + work.work); + struct zl3073x_dpll *zldpll; + + list_for_each_entry(zldpll, &zldev->dplls, list) + zl3073x_dpll_changes_check(zldpll); + + /* Run twice a second */ + kthread_queue_delayed_work(zldev->kworker, &zldev->work, + msecs_to_jiffies(500)); +} + +static void zl3073x_dev_dpll_fini(void *ptr) +{ + struct zl3073x_dpll *zldpll, *next; + struct zl3073x_dev *zldev = ptr; + + /* Stop monitoring thread */ + if (zldev->kworker) { + kthread_cancel_delayed_work_sync(&zldev->work); + kthread_destroy_worker(zldev->kworker); + zldev->kworker = NULL; + } + + /* Release DPLLs */ + list_for_each_entry_safe(zldpll, next, &zldev->dplls, list) { + zl3073x_dpll_unregister(zldpll); + list_del(&zldpll->list); + zl3073x_dpll_free(zldpll); + } +} + +static int +zl3073x_devm_dpll_init(struct zl3073x_dev *zldev, u8 num_dplls) +{ + struct kthread_worker *kworker; + struct zl3073x_dpll *zldpll; + unsigned int i; + int rc; + + INIT_LIST_HEAD(&zldev->dplls); + + /* Initialize all DPLLs */ + for (i = 0; i < num_dplls; i++) { + zldpll = zl3073x_dpll_alloc(zldev, i); + if (IS_ERR(zldpll)) { + dev_err_probe(zldev->dev, PTR_ERR(zldpll), + "Failed to alloc DPLL%u\n", i); + rc = PTR_ERR(zldpll); + goto error; + } + + rc = zl3073x_dpll_register(zldpll); + if (rc) { + dev_err_probe(zldev->dev, rc, + "Failed to register DPLL%u\n", i); + zl3073x_dpll_free(zldpll); + goto error; + } + + list_add_tail(&zldpll->list, &zldev->dplls); + } + + /* Perform initial firmware fine phase correction */ + rc = zl3073x_dpll_init_fine_phase_adjust(zldev); + if (rc) { + dev_err_probe(zldev->dev, rc, + "Failed to init fine phase correction\n"); + goto error; + } + + /* Initialize monitoring thread */ + kthread_init_delayed_work(&zldev->work, zl3073x_dev_periodic_work); + kworker = kthread_run_worker(0, "zl3073x-%s", dev_name(zldev->dev)); + if (IS_ERR(kworker)) { + rc = PTR_ERR(kworker); + goto error; + } + + zldev->kworker = kworker; + kthread_queue_delayed_work(zldev->kworker, &zldev->work, 0); + + /* Add devres action to release DPLL related resources */ + rc = devm_add_action_or_reset(zldev->dev, zl3073x_dev_dpll_fini, zldev); + if (rc) + goto error; + + return 0; + +error: + zl3073x_dev_dpll_fini(zldev); + + return rc; +} + +/** + * zl3073x_dev_probe - initialize zl3073x device + * @zldev: pointer to zl3073x device + * @chip_info: chip info based on compatible + * + * Common initialization of zl3073x device structure. + * + * Returns: 0 on success, <0 on error + */ +int zl3073x_dev_probe(struct zl3073x_dev *zldev, + const struct zl3073x_chip_info *chip_info) +{ + u16 id, revision, fw_ver; + unsigned int i; + u32 cfg_ver; + int rc; + + /* Read chip ID */ + rc = zl3073x_read_u16(zldev, ZL_REG_ID, &id); + if (rc) + return rc; + + /* Check it matches */ + for (i = 0; i < chip_info->num_ids; i++) { + if (id == chip_info->ids[i]) + break; + } + + if (i == chip_info->num_ids) { + return dev_err_probe(zldev->dev, -ENODEV, + "Unknown or non-match chip ID: 0x%0x\n", + id); + } + + /* Read revision, firmware version and custom config version */ + rc = zl3073x_read_u16(zldev, ZL_REG_REVISION, &revision); + if (rc) + return rc; + rc = zl3073x_read_u16(zldev, ZL_REG_FW_VER, &fw_ver); + if (rc) + return rc; + rc = zl3073x_read_u32(zldev, ZL_REG_CUSTOM_CONFIG_VER, &cfg_ver); + if (rc) + return rc; + + dev_dbg(zldev->dev, "ChipID(%X), ChipRev(%X), FwVer(%u)\n", id, + revision, fw_ver); + dev_dbg(zldev->dev, "Custom config version: %lu.%lu.%lu.%lu\n", + FIELD_GET(GENMASK(31, 24), cfg_ver), + FIELD_GET(GENMASK(23, 16), cfg_ver), + FIELD_GET(GENMASK(15, 8), cfg_ver), + FIELD_GET(GENMASK(7, 0), cfg_ver)); + + /* Generate random clock ID as the device has not such property that + * could be used for this purpose. A user can later change this value + * using devlink. + */ + zldev->clock_id = get_random_u64(); + + /* Initialize mutex for operations where multiple reads, writes + * and/or polls are required to be done atomically. + */ + rc = devm_mutex_init(zldev->dev, &zldev->multiop_lock); + if (rc) + return dev_err_probe(zldev->dev, rc, + "Failed to initialize mutex\n"); + + /* Fetch device state */ + rc = zl3073x_dev_state_fetch(zldev); + if (rc) + return rc; + + /* Register DPLL channels */ + rc = zl3073x_devm_dpll_init(zldev, chip_info->num_channels); + if (rc) + return rc; + + /* Register the devlink instance and parameters */ + rc = zl3073x_devlink_register(zldev); + if (rc) + return dev_err_probe(zldev->dev, rc, + "Failed to register devlink instance\n"); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(zl3073x_dev_probe, "ZL3073X"); + +MODULE_AUTHOR("Ivan Vecera <ivecera@redhat.com>"); +MODULE_DESCRIPTION("Microchip ZL3073x core driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h new file mode 100644 index 000000000000..97b1032e392d --- /dev/null +++ b/drivers/dpll/zl3073x/core.h @@ -0,0 +1,367 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ZL3073X_CORE_H +#define _ZL3073X_CORE_H + +#include <linux/kthread.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/types.h> + +#include "regs.h" + +struct device; +struct regmap; +struct zl3073x_dpll; + +/* + * Hardware limits for ZL3073x chip family + */ +#define ZL3073X_MAX_CHANNELS 5 +#define ZL3073X_NUM_REFS 10 +#define ZL3073X_NUM_OUTS 10 +#define ZL3073X_NUM_SYNTHS 5 +#define ZL3073X_NUM_INPUT_PINS ZL3073X_NUM_REFS +#define ZL3073X_NUM_OUTPUT_PINS (ZL3073X_NUM_OUTS * 2) +#define ZL3073X_NUM_PINS (ZL3073X_NUM_INPUT_PINS + \ + ZL3073X_NUM_OUTPUT_PINS) + +/** + * struct zl3073x_ref - input reference invariant info + * @enabled: input reference is enabled or disabled + * @diff: true if input reference is differential + */ +struct zl3073x_ref { + bool enabled; + bool diff; +}; + +/** + * struct zl3073x_out - output invariant info + * @enabled: out is enabled or disabled + * @synth: synthesizer the out is connected to + * @signal_format: out signal format + */ +struct zl3073x_out { + bool enabled; + u8 synth; + u8 signal_format; +}; + +/** + * struct zl3073x_synth - synthesizer invariant info + * @freq: synthesizer frequency + * @dpll: ID of DPLL the synthesizer is driven by + * @enabled: synth is enabled or disabled + */ +struct zl3073x_synth { + u32 freq; + u8 dpll; + bool enabled; +}; + +/** + * struct zl3073x_dev - zl3073x device + * @dev: pointer to device + * @regmap: regmap to access device registers + * @multiop_lock: to serialize multiple register operations + * @clock_id: clock id of the device + * @ref: array of input references' invariants + * @out: array of outs' invariants + * @synth: array of synths' invariants + * @dplls: list of DPLLs + * @kworker: thread for periodic work + * @work: periodic work + */ +struct zl3073x_dev { + struct device *dev; + struct regmap *regmap; + struct mutex multiop_lock; + u64 clock_id; + + /* Invariants */ + struct zl3073x_ref ref[ZL3073X_NUM_REFS]; + struct zl3073x_out out[ZL3073X_NUM_OUTS]; + struct zl3073x_synth synth[ZL3073X_NUM_SYNTHS]; + + /* DPLL channels */ + struct list_head dplls; + + /* Monitor */ + struct kthread_worker *kworker; + struct kthread_delayed_work work; +}; + +struct zl3073x_chip_info { + const u16 *ids; + size_t num_ids; + int num_channels; +}; + +extern const struct zl3073x_chip_info zl30731_chip_info; +extern const struct zl3073x_chip_info zl30732_chip_info; +extern const struct zl3073x_chip_info zl30733_chip_info; +extern const struct zl3073x_chip_info zl30734_chip_info; +extern const struct zl3073x_chip_info zl30735_chip_info; +extern const struct regmap_config zl3073x_regmap_config; + +struct zl3073x_dev *zl3073x_devm_alloc(struct device *dev); +int zl3073x_dev_probe(struct zl3073x_dev *zldev, + const struct zl3073x_chip_info *chip_info); + +/********************** + * Registers operations + **********************/ + +int zl3073x_mb_op(struct zl3073x_dev *zldev, unsigned int op_reg, u8 op_val, + unsigned int mask_reg, u16 mask_val); +int zl3073x_poll_zero_u8(struct zl3073x_dev *zldev, unsigned int reg, u8 mask); +int zl3073x_read_u8(struct zl3073x_dev *zldev, unsigned int reg, u8 *val); +int zl3073x_read_u16(struct zl3073x_dev *zldev, unsigned int reg, u16 *val); +int zl3073x_read_u32(struct zl3073x_dev *zldev, unsigned int reg, u32 *val); +int zl3073x_read_u48(struct zl3073x_dev *zldev, unsigned int reg, u64 *val); +int zl3073x_write_u8(struct zl3073x_dev *zldev, unsigned int reg, u8 val); +int zl3073x_write_u16(struct zl3073x_dev *zldev, unsigned int reg, u16 val); +int zl3073x_write_u32(struct zl3073x_dev *zldev, unsigned int reg, u32 val); +int zl3073x_write_u48(struct zl3073x_dev *zldev, unsigned int reg, u64 val); + +/***************** + * Misc operations + *****************/ + +int zl3073x_ref_freq_factorize(u32 freq, u16 *base, u16 *mult); + +static inline bool +zl3073x_is_n_pin(u8 id) +{ + /* P-pins ids are even while N-pins are odd */ + return id & 1; +} + +static inline bool +zl3073x_is_p_pin(u8 id) +{ + return !zl3073x_is_n_pin(id); +} + +/** + * zl3073x_input_pin_ref_get - get reference for given input pin + * @id: input pin id + * + * Return: reference id for the given input pin + */ +static inline u8 +zl3073x_input_pin_ref_get(u8 id) +{ + return id; +} + +/** + * zl3073x_output_pin_out_get - get output for the given output pin + * @id: output pin id + * + * Return: output id for the given output pin + */ +static inline u8 +zl3073x_output_pin_out_get(u8 id) +{ + /* Output pin pair shares the single output */ + return id / 2; +} + +/** + * zl3073x_ref_is_diff - check if the given input reference is differential + * @zldev: pointer to zl3073x device + * @index: input reference index + * + * Return: true if reference is differential, false if reference is single-ended + */ +static inline bool +zl3073x_ref_is_diff(struct zl3073x_dev *zldev, u8 index) +{ + return zldev->ref[index].diff; +} + +/** + * zl3073x_ref_is_enabled - check if the given input reference is enabled + * @zldev: pointer to zl3073x device + * @index: input reference index + * + * Return: true if input refernce is enabled, false otherwise + */ +static inline bool +zl3073x_ref_is_enabled(struct zl3073x_dev *zldev, u8 index) +{ + return zldev->ref[index].enabled; +} + +/** + * zl3073x_synth_dpll_get - get DPLL ID the synth is driven by + * @zldev: pointer to zl3073x device + * @index: synth index + * + * Return: ID of DPLL the given synthetizer is driven by + */ +static inline u8 +zl3073x_synth_dpll_get(struct zl3073x_dev *zldev, u8 index) +{ + return zldev->synth[index].dpll; +} + +/** + * zl3073x_synth_freq_get - get synth current freq + * @zldev: pointer to zl3073x device + * @index: synth index + * + * Return: frequency of given synthetizer + */ +static inline u32 +zl3073x_synth_freq_get(struct zl3073x_dev *zldev, u8 index) +{ + return zldev->synth[index].freq; +} + +/** + * zl3073x_synth_is_enabled - check if the given synth is enabled + * @zldev: pointer to zl3073x device + * @index: synth index + * + * Return: true if synth is enabled, false otherwise + */ +static inline bool +zl3073x_synth_is_enabled(struct zl3073x_dev *zldev, u8 index) +{ + return zldev->synth[index].enabled; +} + +/** + * zl3073x_out_synth_get - get synth connected to given output + * @zldev: pointer to zl3073x device + * @index: output index + * + * Return: index of synth connected to given output. + */ +static inline u8 +zl3073x_out_synth_get(struct zl3073x_dev *zldev, u8 index) +{ + return zldev->out[index].synth; +} + +/** + * zl3073x_out_is_enabled - check if the given output is enabled + * @zldev: pointer to zl3073x device + * @index: output index + * + * Return: true if the output is enabled, false otherwise + */ +static inline bool +zl3073x_out_is_enabled(struct zl3073x_dev *zldev, u8 index) +{ + u8 synth; + + /* Output is enabled only if associated synth is enabled */ + synth = zl3073x_out_synth_get(zldev, index); + if (zl3073x_synth_is_enabled(zldev, synth)) + return zldev->out[index].enabled; + + return false; +} + +/** + * zl3073x_out_signal_format_get - get output signal format + * @zldev: pointer to zl3073x device + * @index: output index + * + * Return: signal format of given output + */ +static inline u8 +zl3073x_out_signal_format_get(struct zl3073x_dev *zldev, u8 index) +{ + return zldev->out[index].signal_format; +} + +/** + * zl3073x_out_dpll_get - get DPLL ID the output is driven by + * @zldev: pointer to zl3073x device + * @index: output index + * + * Return: ID of DPLL the given output is driven by + */ +static inline +u8 zl3073x_out_dpll_get(struct zl3073x_dev *zldev, u8 index) +{ + u8 synth; + + /* Get synthesizer connected to given output */ + synth = zl3073x_out_synth_get(zldev, index); + + /* Return DPLL that drives the synth */ + return zl3073x_synth_dpll_get(zldev, synth); +} + +/** + * zl3073x_out_is_diff - check if the given output is differential + * @zldev: pointer to zl3073x device + * @index: output index + * + * Return: true if output is differential, false if output is single-ended + */ +static inline bool +zl3073x_out_is_diff(struct zl3073x_dev *zldev, u8 index) +{ + switch (zl3073x_out_signal_format_get(zldev, index)) { + case ZL_OUTPUT_MODE_SIGNAL_FORMAT_LVDS: + case ZL_OUTPUT_MODE_SIGNAL_FORMAT_DIFF: + case ZL_OUTPUT_MODE_SIGNAL_FORMAT_LOWVCM: + return true; + default: + break; + } + + return false; +} + +/** + * zl3073x_output_pin_is_enabled - check if the given output pin is enabled + * @zldev: pointer to zl3073x device + * @id: output pin id + * + * Checks if the output of the given output pin is enabled and also that + * its signal format also enables the given pin. + * + * Return: true if output pin is enabled, false if output pin is disabled + */ +static inline bool +zl3073x_output_pin_is_enabled(struct zl3073x_dev *zldev, u8 id) +{ + u8 output = zl3073x_output_pin_out_get(id); + + /* Check if the whole output is enabled */ + if (!zl3073x_out_is_enabled(zldev, output)) + return false; + + /* Check signal format */ + switch (zl3073x_out_signal_format_get(zldev, output)) { + case ZL_OUTPUT_MODE_SIGNAL_FORMAT_DISABLED: + /* Both output pins are disabled by signal format */ + return false; + + case ZL_OUTPUT_MODE_SIGNAL_FORMAT_1P: + /* Output is one single ended P-pin output */ + if (zl3073x_is_n_pin(id)) + return false; + break; + case ZL_OUTPUT_MODE_SIGNAL_FORMAT_1N: + /* Output is one single ended N-pin output */ + if (zl3073x_is_p_pin(id)) + return false; + break; + default: + /* For other format both pins are enabled */ + break; + } + + return true; +} + +#endif /* _ZL3073X_CORE_H */ diff --git a/drivers/dpll/zl3073x/devlink.c b/drivers/dpll/zl3073x/devlink.c new file mode 100644 index 000000000000..7e7fe726ee37 --- /dev/null +++ b/drivers/dpll/zl3073x/devlink.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/device/devres.h> +#include <linux/netlink.h> +#include <linux/sprintf.h> +#include <linux/types.h> +#include <net/devlink.h> + +#include "core.h" +#include "devlink.h" +#include "dpll.h" +#include "regs.h" + +/** + * zl3073x_devlink_info_get - Devlink device info callback + * @devlink: devlink structure pointer + * @req: devlink request pointer to store information + * @extack: netlink extack pointer to report errors + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dev *zldev = devlink_priv(devlink); + u16 id, revision, fw_ver; + char buf[16]; + u32 cfg_ver; + int rc; + + rc = zl3073x_read_u16(zldev, ZL_REG_ID, &id); + if (rc) + return rc; + + snprintf(buf, sizeof(buf), "%X", id); + rc = devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, + buf); + if (rc) + return rc; + + rc = zl3073x_read_u16(zldev, ZL_REG_REVISION, &revision); + if (rc) + return rc; + + snprintf(buf, sizeof(buf), "%X", revision); + rc = devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, + buf); + if (rc) + return rc; + + rc = zl3073x_read_u16(zldev, ZL_REG_FW_VER, &fw_ver); + if (rc) + return rc; + + snprintf(buf, sizeof(buf), "%u", fw_ver); + rc = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + buf); + if (rc) + return rc; + + rc = zl3073x_read_u32(zldev, ZL_REG_CUSTOM_CONFIG_VER, &cfg_ver); + if (rc) + return rc; + + /* No custom config version */ + if (cfg_ver == U32_MAX) + return 0; + + snprintf(buf, sizeof(buf), "%lu.%lu.%lu.%lu", + FIELD_GET(GENMASK(31, 24), cfg_ver), + FIELD_GET(GENMASK(23, 16), cfg_ver), + FIELD_GET(GENMASK(15, 8), cfg_ver), + FIELD_GET(GENMASK(7, 0), cfg_ver)); + + return devlink_info_version_running_put(req, "custom_cfg", buf); +} + +static int +zl3073x_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dev *zldev = devlink_priv(devlink); + struct zl3073x_dpll *zldpll; + + if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + return -EOPNOTSUPP; + + /* Unregister all DPLLs */ + list_for_each_entry(zldpll, &zldev->dplls, list) + zl3073x_dpll_unregister(zldpll); + + return 0; +} + +static int +zl3073x_devlink_reload_up(struct devlink *devlink, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dev *zldev = devlink_priv(devlink); + union devlink_param_value val; + struct zl3073x_dpll *zldpll; + int rc; + + if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + return -EOPNOTSUPP; + + rc = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, + &val); + if (rc) + return rc; + + if (zldev->clock_id != val.vu64) { + dev_dbg(zldev->dev, + "'clock_id' changed to %016llx\n", val.vu64); + zldev->clock_id = val.vu64; + } + + /* Re-register all DPLLs */ + list_for_each_entry(zldpll, &zldev->dplls, list) { + rc = zl3073x_dpll_register(zldpll); + if (rc) + dev_warn(zldev->dev, + "Failed to re-register DPLL%u\n", zldpll->id); + } + + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + + return 0; +} + +static const struct devlink_ops zl3073x_devlink_ops = { + .info_get = zl3073x_devlink_info_get, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_down = zl3073x_devlink_reload_down, + .reload_up = zl3073x_devlink_reload_up, +}; + +static void +zl3073x_devlink_free(void *ptr) +{ + devlink_free(ptr); +} + +/** + * zl3073x_devm_alloc - allocates zl3073x device structure + * @dev: pointer to device structure + * + * Allocates zl3073x device structure as device resource. + * + * Return: pointer to zl3073x device on success, error pointer on error + */ +struct zl3073x_dev *zl3073x_devm_alloc(struct device *dev) +{ + struct zl3073x_dev *zldev; + struct devlink *devlink; + int rc; + + devlink = devlink_alloc(&zl3073x_devlink_ops, sizeof(*zldev), dev); + if (!devlink) + return ERR_PTR(-ENOMEM); + + /* Add devres action to free devlink device */ + rc = devm_add_action_or_reset(dev, zl3073x_devlink_free, devlink); + if (rc) + return ERR_PTR(rc); + + zldev = devlink_priv(devlink); + zldev->dev = dev; + dev_set_drvdata(zldev->dev, zldev); + + return zldev; +} +EXPORT_SYMBOL_NS_GPL(zl3073x_devm_alloc, "ZL3073X"); + +static int +zl3073x_devlink_param_clock_id_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (!val.vu64) { + NL_SET_ERR_MSG_MOD(extack, "'clock_id' must be non-zero"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param zl3073x_devlink_params[] = { + DEVLINK_PARAM_GENERIC(CLOCK_ID, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, + zl3073x_devlink_param_clock_id_validate), +}; + +static void +zl3073x_devlink_unregister(void *ptr) +{ + struct devlink *devlink = priv_to_devlink(ptr); + + devl_lock(devlink); + + /* Unregister devlink params */ + devl_params_unregister(devlink, zl3073x_devlink_params, + ARRAY_SIZE(zl3073x_devlink_params)); + + /* Unregister devlink instance */ + devl_unregister(devlink); + + devl_unlock(devlink); +} + +/** + * zl3073x_devlink_register - register devlink instance and params + * @zldev: zl3073x device to register the devlink for + * + * Register the devlink instance and parameters associated with the device. + * + * Return: 0 on success, <0 on error + */ +int zl3073x_devlink_register(struct zl3073x_dev *zldev) +{ + struct devlink *devlink = priv_to_devlink(zldev); + union devlink_param_value value; + int rc; + + devl_lock(devlink); + + /* Register devlink params */ + rc = devl_params_register(devlink, zl3073x_devlink_params, + ARRAY_SIZE(zl3073x_devlink_params)); + if (rc) { + devl_unlock(devlink); + + return rc; + } + + value.vu64 = zldev->clock_id; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, + value); + + /* Register devlink instance */ + devl_register(devlink); + + devl_unlock(devlink); + + /* Add devres action to unregister devlink device */ + return devm_add_action_or_reset(zldev->dev, zl3073x_devlink_unregister, + zldev); +} diff --git a/drivers/dpll/zl3073x/devlink.h b/drivers/dpll/zl3073x/devlink.h new file mode 100644 index 000000000000..037720db204f --- /dev/null +++ b/drivers/dpll/zl3073x/devlink.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ZL3073X_DEVLINK_H +#define _ZL3073X_DEVLINK_H + +struct zl3073x_dev; + +struct zl3073x_dev *zl3073x_devm_alloc(struct device *dev); + +int zl3073x_devlink_register(struct zl3073x_dev *zldev); + +#endif /* _ZL3073X_DEVLINK_H */ diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c new file mode 100644 index 000000000000..cb0f1a43c5fb --- /dev/null +++ b/drivers/dpll/zl3073x/dpll.c @@ -0,0 +1,1504 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/bits.h> +#include <linux/bitfield.h> +#include <linux/bug.h> +#include <linux/container_of.h> +#include <linux/dev_printk.h> +#include <linux/dpll.h> +#include <linux/err.h> +#include <linux/kthread.h> +#include <linux/math64.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/sprintf.h> + +#include "core.h" +#include "dpll.h" +#include "prop.h" +#include "regs.h" + +#define ZL3073X_DPLL_REF_NONE ZL3073X_NUM_REFS +#define ZL3073X_DPLL_REF_IS_VALID(_ref) ((_ref) != ZL3073X_DPLL_REF_NONE) + +/** + * struct zl3073x_dpll_pin - DPLL pin + * @list: this DPLL pin list entry + * @dpll: DPLL the pin is registered to + * @dpll_pin: pointer to registered dpll_pin + * @label: package label + * @dir: pin direction + * @id: pin id + * @prio: pin priority <0, 14> + * @selectable: pin is selectable in automatic mode + * @pin_state: last saved pin state + */ +struct zl3073x_dpll_pin { + struct list_head list; + struct zl3073x_dpll *dpll; + struct dpll_pin *dpll_pin; + char label[8]; + enum dpll_pin_direction dir; + u8 id; + u8 prio; + bool selectable; + enum dpll_pin_state pin_state; +}; + +/** + * zl3073x_dpll_is_input_pin - check if the pin is input one + * @pin: pin to check + * + * Return: true if pin is input, false if pin is output. + */ +static bool +zl3073x_dpll_is_input_pin(struct zl3073x_dpll_pin *pin) +{ + return pin->dir == DPLL_PIN_DIRECTION_INPUT; +} + +/** + * zl3073x_dpll_is_p_pin - check if the pin is P-pin + * @pin: pin to check + * + * Return: true if the pin is P-pin, false if it is N-pin + */ +static bool +zl3073x_dpll_is_p_pin(struct zl3073x_dpll_pin *pin) +{ + return zl3073x_is_p_pin(pin->id); +} + +static int +zl3073x_dpll_pin_direction_get(const struct dpll_pin *dpll_pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll_pin *pin = pin_priv; + + *direction = pin->dir; + + return 0; +} + +/** + * zl3073x_dpll_input_ref_frequency_get - get input reference frequency + * @zldpll: pointer to zl3073x_dpll + * @ref_id: reference id + * @frequency: pointer to variable to store frequency + * + * Reads frequency of given input reference. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_input_ref_frequency_get(struct zl3073x_dpll *zldpll, u8 ref_id, + u32 *frequency) +{ + struct zl3073x_dev *zldev = zldpll->dev; + u16 base, mult, num, denom; + int rc; + + guard(mutex)(&zldev->multiop_lock); + + /* Read reference configuration */ + rc = zl3073x_mb_op(zldev, ZL_REG_REF_MB_SEM, ZL_REF_MB_SEM_RD, + ZL_REG_REF_MB_MASK, BIT(ref_id)); + if (rc) + return rc; + + /* Read registers to compute resulting frequency */ + rc = zl3073x_read_u16(zldev, ZL_REG_REF_FREQ_BASE, &base); + if (rc) + return rc; + rc = zl3073x_read_u16(zldev, ZL_REG_REF_FREQ_MULT, &mult); + if (rc) + return rc; + rc = zl3073x_read_u16(zldev, ZL_REG_REF_RATIO_M, &num); + if (rc) + return rc; + rc = zl3073x_read_u16(zldev, ZL_REG_REF_RATIO_N, &denom); + if (rc) + return rc; + + /* Sanity check that HW has not returned zero denominator */ + if (!denom) { + dev_err(zldev->dev, + "Zero divisor for ref %u frequency got from device\n", + ref_id); + return -EINVAL; + } + + /* Compute the frequency */ + *frequency = mul_u64_u32_div(base * mult, num, denom); + + return rc; +} + +static int +zl3073x_dpll_input_pin_frequency_get(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 *frequency, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dpll_pin *pin = pin_priv; + u32 ref_freq; + u8 ref; + int rc; + + /* Read and return ref frequency */ + ref = zl3073x_input_pin_ref_get(pin->id); + rc = zl3073x_dpll_input_ref_frequency_get(zldpll, ref, &ref_freq); + if (!rc) + *frequency = ref_freq; + + return rc; +} + +static int +zl3073x_dpll_input_pin_frequency_set(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 frequency, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dev *zldev = zldpll->dev; + struct zl3073x_dpll_pin *pin = pin_priv; + u16 base, mult; + u8 ref; + int rc; + + /* Get base frequency and multiplier for the requested frequency */ + rc = zl3073x_ref_freq_factorize(frequency, &base, &mult); + if (rc) + return rc; + + guard(mutex)(&zldev->multiop_lock); + + /* Load reference configuration */ + ref = zl3073x_input_pin_ref_get(pin->id); + rc = zl3073x_mb_op(zldev, ZL_REG_REF_MB_SEM, ZL_REF_MB_SEM_RD, + ZL_REG_REF_MB_MASK, BIT(ref)); + + /* Update base frequency, multiplier, numerator & denominator */ + rc = zl3073x_write_u16(zldev, ZL_REG_REF_FREQ_BASE, base); + if (rc) + return rc; + rc = zl3073x_write_u16(zldev, ZL_REG_REF_FREQ_MULT, mult); + if (rc) + return rc; + rc = zl3073x_write_u16(zldev, ZL_REG_REF_RATIO_M, 1); + if (rc) + return rc; + rc = zl3073x_write_u16(zldev, ZL_REG_REF_RATIO_N, 1); + if (rc) + return rc; + + /* Commit reference configuration */ + return zl3073x_mb_op(zldev, ZL_REG_REF_MB_SEM, ZL_REF_MB_SEM_WR, + ZL_REG_REF_MB_MASK, BIT(ref)); +} + +/** + * zl3073x_dpll_selected_ref_get - get currently selected reference + * @zldpll: pointer to zl3073x_dpll + * @ref: place to store selected reference + * + * Check for currently selected reference the DPLL should be locked to + * and stores its index to given @ref. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_selected_ref_get(struct zl3073x_dpll *zldpll, u8 *ref) +{ + struct zl3073x_dev *zldev = zldpll->dev; + u8 state, value; + int rc; + + switch (zldpll->refsel_mode) { + case ZL_DPLL_MODE_REFSEL_MODE_AUTO: + /* For automatic mode read refsel_status register */ + rc = zl3073x_read_u8(zldev, + ZL_REG_DPLL_REFSEL_STATUS(zldpll->id), + &value); + if (rc) + return rc; + + /* Extract reference state */ + state = FIELD_GET(ZL_DPLL_REFSEL_STATUS_STATE, value); + + /* Return the reference only if the DPLL is locked to it */ + if (state == ZL_DPLL_REFSEL_STATUS_STATE_LOCK) + *ref = FIELD_GET(ZL_DPLL_REFSEL_STATUS_REFSEL, value); + else + *ref = ZL3073X_DPLL_REF_NONE; + break; + case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: + /* For manual mode return stored value */ + *ref = zldpll->forced_ref; + break; + default: + /* For other modes like NCO, freerun... there is no input ref */ + *ref = ZL3073X_DPLL_REF_NONE; + break; + } + + return 0; +} + +/** + * zl3073x_dpll_selected_ref_set - select reference in manual mode + * @zldpll: pointer to zl3073x_dpll + * @ref: input reference to be selected + * + * Selects the given reference for the DPLL channel it should be + * locked to. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_selected_ref_set(struct zl3073x_dpll *zldpll, u8 ref) +{ + struct zl3073x_dev *zldev = zldpll->dev; + u8 mode, mode_refsel; + int rc; + + mode = zldpll->refsel_mode; + + switch (mode) { + case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: + /* Manual mode with ref selected */ + if (ref == ZL3073X_DPLL_REF_NONE) { + switch (zldpll->lock_status) { + case DPLL_LOCK_STATUS_LOCKED_HO_ACQ: + case DPLL_LOCK_STATUS_HOLDOVER: + /* Switch to forced holdover */ + mode = ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER; + break; + default: + /* Switch to freerun */ + mode = ZL_DPLL_MODE_REFSEL_MODE_FREERUN; + break; + } + /* Keep selected reference */ + ref = zldpll->forced_ref; + } else if (ref == zldpll->forced_ref) { + /* No register update - same mode and same ref */ + return 0; + } + break; + case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: + case ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER: + /* Manual mode without no ref */ + if (ref == ZL3073X_DPLL_REF_NONE) + /* No register update - keep current mode */ + return 0; + + /* Switch to reflock mode and update ref selection */ + mode = ZL_DPLL_MODE_REFSEL_MODE_REFLOCK; + break; + default: + /* For other modes like automatic or NCO ref cannot be selected + * manually + */ + return -EOPNOTSUPP; + } + + /* Build mode_refsel value */ + mode_refsel = FIELD_PREP(ZL_DPLL_MODE_REFSEL_MODE, mode) | + FIELD_PREP(ZL_DPLL_MODE_REFSEL_REF, ref); + + /* Update dpll_mode_refsel register */ + rc = zl3073x_write_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(zldpll->id), + mode_refsel); + if (rc) + return rc; + + /* Store new mode and forced reference */ + zldpll->refsel_mode = mode; + zldpll->forced_ref = ref; + + return rc; +} + +/** + * zl3073x_dpll_connected_ref_get - get currently connected reference + * @zldpll: pointer to zl3073x_dpll + * @ref: place to store selected reference + * + * Looks for currently connected the DPLL is locked to and stores its index + * to given @ref. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_connected_ref_get(struct zl3073x_dpll *zldpll, u8 *ref) +{ + struct zl3073x_dev *zldev = zldpll->dev; + int rc; + + /* Get currently selected input reference */ + rc = zl3073x_dpll_selected_ref_get(zldpll, ref); + if (rc) + return rc; + + if (ZL3073X_DPLL_REF_IS_VALID(*ref)) { + u8 ref_status; + + /* Read the reference monitor status */ + rc = zl3073x_read_u8(zldev, ZL_REG_REF_MON_STATUS(*ref), + &ref_status); + if (rc) + return rc; + + /* If the monitor indicates an error nothing is connected */ + if (ref_status != ZL_REF_MON_STATUS_OK) + *ref = ZL3073X_DPLL_REF_NONE; + } + + return 0; +} + +/** + * zl3073x_dpll_ref_prio_get - get priority for given input pin + * @pin: pointer to pin + * @prio: place to store priority + * + * Reads current priority for the given input pin and stores the value + * to @prio. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_ref_prio_get(struct zl3073x_dpll_pin *pin, u8 *prio) +{ + struct zl3073x_dpll *zldpll = pin->dpll; + struct zl3073x_dev *zldev = zldpll->dev; + u8 ref, ref_prio; + int rc; + + guard(mutex)(&zldev->multiop_lock); + + /* Read DPLL configuration */ + rc = zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_RD, + ZL_REG_DPLL_MB_MASK, BIT(zldpll->id)); + if (rc) + return rc; + + /* Read reference priority - one value for P&N pins (4 bits/pin) */ + ref = zl3073x_input_pin_ref_get(pin->id); + rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_REF_PRIO(ref / 2), + &ref_prio); + if (rc) + return rc; + + /* Select nibble according pin type */ + if (zl3073x_dpll_is_p_pin(pin)) + *prio = FIELD_GET(ZL_DPLL_REF_PRIO_REF_P, ref_prio); + else + *prio = FIELD_GET(ZL_DPLL_REF_PRIO_REF_N, ref_prio); + + return rc; +} + +/** + * zl3073x_dpll_ref_prio_set - set priority for given input pin + * @pin: pointer to pin + * @prio: place to store priority + * + * Sets priority for the given input pin. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_ref_prio_set(struct zl3073x_dpll_pin *pin, u8 prio) +{ + struct zl3073x_dpll *zldpll = pin->dpll; + struct zl3073x_dev *zldev = zldpll->dev; + u8 ref, ref_prio; + int rc; + + guard(mutex)(&zldev->multiop_lock); + + /* Read DPLL configuration into mailbox */ + rc = zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_RD, + ZL_REG_DPLL_MB_MASK, BIT(zldpll->id)); + if (rc) + return rc; + + /* Read reference priority - one value shared between P&N pins */ + ref = zl3073x_input_pin_ref_get(pin->id); + rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_REF_PRIO(ref / 2), &ref_prio); + if (rc) + return rc; + + /* Update nibble according pin type */ + if (zl3073x_dpll_is_p_pin(pin)) { + ref_prio &= ~ZL_DPLL_REF_PRIO_REF_P; + ref_prio |= FIELD_PREP(ZL_DPLL_REF_PRIO_REF_P, prio); + } else { + ref_prio &= ~ZL_DPLL_REF_PRIO_REF_N; + ref_prio |= FIELD_PREP(ZL_DPLL_REF_PRIO_REF_N, prio); + } + + /* Update reference priority */ + rc = zl3073x_write_u8(zldev, ZL_REG_DPLL_REF_PRIO(ref / 2), ref_prio); + if (rc) + return rc; + + /* Commit configuration */ + return zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_WR, + ZL_REG_DPLL_MB_MASK, BIT(zldpll->id)); +} + +/** + * zl3073x_dpll_ref_state_get - get status for given input pin + * @pin: pointer to pin + * @state: place to store status + * + * Checks current status for the given input pin and stores the value + * to @state. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_ref_state_get(struct zl3073x_dpll_pin *pin, + enum dpll_pin_state *state) +{ + struct zl3073x_dpll *zldpll = pin->dpll; + struct zl3073x_dev *zldev = zldpll->dev; + u8 ref, ref_conn, status; + int rc; + + ref = zl3073x_input_pin_ref_get(pin->id); + + /* Get currently connected reference */ + rc = zl3073x_dpll_connected_ref_get(zldpll, &ref_conn); + if (rc) + return rc; + + if (ref == ref_conn) { + *state = DPLL_PIN_STATE_CONNECTED; + return 0; + } + + /* If the DPLL is running in automatic mode and the reference is + * selectable and its monitor does not report any error then report + * pin as selectable. + */ + if (zldpll->refsel_mode == ZL_DPLL_MODE_REFSEL_MODE_AUTO && + pin->selectable) { + /* Read reference monitor status */ + rc = zl3073x_read_u8(zldev, ZL_REG_REF_MON_STATUS(ref), + &status); + if (rc) + return rc; + + /* If the monitor indicates errors report the reference + * as disconnected + */ + if (status == ZL_REF_MON_STATUS_OK) { + *state = DPLL_PIN_STATE_SELECTABLE; + return 0; + } + } + + /* Otherwise report the pin as disconnected */ + *state = DPLL_PIN_STATE_DISCONNECTED; + + return 0; +} + +static int +zl3073x_dpll_input_pin_state_on_dpll_get(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll_pin *pin = pin_priv; + + return zl3073x_dpll_ref_state_get(pin, state); +} + +static int +zl3073x_dpll_input_pin_state_on_dpll_set(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dpll_pin *pin = pin_priv; + u8 new_ref; + int rc; + + switch (zldpll->refsel_mode) { + case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: + case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: + case ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER: + if (state == DPLL_PIN_STATE_CONNECTED) { + /* Choose the pin as new selected reference */ + new_ref = zl3073x_input_pin_ref_get(pin->id); + } else if (state == DPLL_PIN_STATE_DISCONNECTED) { + /* No reference */ + new_ref = ZL3073X_DPLL_REF_NONE; + } else { + NL_SET_ERR_MSG_MOD(extack, + "Invalid pin state for manual mode"); + return -EINVAL; + } + + rc = zl3073x_dpll_selected_ref_set(zldpll, new_ref); + break; + + case ZL_DPLL_MODE_REFSEL_MODE_AUTO: + if (state == DPLL_PIN_STATE_SELECTABLE) { + if (pin->selectable) + return 0; /* Pin is already selectable */ + + /* Restore pin priority in HW */ + rc = zl3073x_dpll_ref_prio_set(pin, pin->prio); + if (rc) + return rc; + + /* Mark pin as selectable */ + pin->selectable = true; + } else if (state == DPLL_PIN_STATE_DISCONNECTED) { + if (!pin->selectable) + return 0; /* Pin is already disconnected */ + + /* Set pin priority to none in HW */ + rc = zl3073x_dpll_ref_prio_set(pin, + ZL_DPLL_REF_PRIO_NONE); + if (rc) + return rc; + + /* Mark pin as non-selectable */ + pin->selectable = false; + } else { + NL_SET_ERR_MSG(extack, + "Invalid pin state for automatic mode"); + return -EINVAL; + } + break; + + default: + /* In other modes we cannot change input reference */ + NL_SET_ERR_MSG(extack, + "Pin state cannot be changed in current mode"); + rc = -EOPNOTSUPP; + break; + } + + return rc; +} + +static int +zl3073x_dpll_input_pin_prio_get(const struct dpll_pin *dpll_pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 *prio, struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll_pin *pin = pin_priv; + + *prio = pin->prio; + + return 0; +} + +static int +zl3073x_dpll_input_pin_prio_set(const struct dpll_pin *dpll_pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 prio, struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll_pin *pin = pin_priv; + int rc; + + if (prio > ZL_DPLL_REF_PRIO_MAX) + return -EINVAL; + + /* If the pin is selectable then update HW registers */ + if (pin->selectable) { + rc = zl3073x_dpll_ref_prio_set(pin, prio); + if (rc) + return rc; + } + + /* Save priority */ + pin->prio = prio; + + return 0; +} + +static int +zl3073x_dpll_output_pin_frequency_get(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 *frequency, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dev *zldev = zldpll->dev; + struct zl3073x_dpll_pin *pin = pin_priv; + struct device *dev = zldev->dev; + u8 out, signal_format, synth; + u32 output_div, synth_freq; + int rc; + + out = zl3073x_output_pin_out_get(pin->id); + synth = zl3073x_out_synth_get(zldev, out); + synth_freq = zl3073x_synth_freq_get(zldev, synth); + + guard(mutex)(&zldev->multiop_lock); + + /* Read output configuration into mailbox */ + rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD, + ZL_REG_OUTPUT_MB_MASK, BIT(out)); + if (rc) + return rc; + + rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_DIV, &output_div); + if (rc) + return rc; + + /* Check output divisor for zero */ + if (!output_div) { + dev_err(dev, "Zero divisor for output %u got from device\n", + out); + return -EINVAL; + } + + /* Read used signal format for the given output */ + signal_format = zl3073x_out_signal_format_get(zldev, out); + + switch (signal_format) { + case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV: + case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV: + /* In case of divided format we have to distiguish between + * given output pin type. + */ + if (zl3073x_dpll_is_p_pin(pin)) { + /* For P-pin the resulting frequency is computed as + * simple division of synth frequency and output + * divisor. + */ + *frequency = synth_freq / output_div; + } else { + /* For N-pin we have to divide additionally by + * divisor stored in esync_period output mailbox + * register that is used as N-pin divisor for these + * modes. + */ + u32 ndiv; + + rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD, + &ndiv); + if (rc) + return rc; + + /* Check N-pin divisor for zero */ + if (!ndiv) { + dev_err(dev, + "Zero N-pin divisor for output %u got from device\n", + out); + return -EINVAL; + } + + /* Compute final divisor for N-pin */ + *frequency = synth_freq / output_div / ndiv; + } + break; + default: + /* In other modes the resulting frequency is computed as + * division of synth frequency and output divisor. + */ + *frequency = synth_freq / output_div; + break; + } + + return rc; +} + +static int +zl3073x_dpll_output_pin_frequency_set(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 frequency, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dev *zldev = zldpll->dev; + struct zl3073x_dpll_pin *pin = pin_priv; + struct device *dev = zldev->dev; + u32 output_n_freq, output_p_freq; + u8 out, signal_format, synth; + u32 cur_div, new_div, ndiv; + u32 synth_freq; + int rc; + + out = zl3073x_output_pin_out_get(pin->id); + synth = zl3073x_out_synth_get(zldev, out); + synth_freq = zl3073x_synth_freq_get(zldev, synth); + new_div = synth_freq / (u32)frequency; + + /* Get used signal format for the given output */ + signal_format = zl3073x_out_signal_format_get(zldev, out); + + guard(mutex)(&zldev->multiop_lock); + + /* Load output configuration */ + rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD, + ZL_REG_OUTPUT_MB_MASK, BIT(out)); + if (rc) + return rc; + + /* Check signal format */ + if (signal_format != ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV && + signal_format != ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV) { + /* For non N-divided signal formats the frequency is computed + * as division of synth frequency and output divisor. + */ + rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_DIV, new_div); + if (rc) + return rc; + + /* For 50/50 duty cycle the divisor is equal to width */ + rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_WIDTH, new_div); + if (rc) + return rc; + + /* Commit output configuration */ + return zl3073x_mb_op(zldev, + ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_WR, + ZL_REG_OUTPUT_MB_MASK, BIT(out)); + } + + /* For N-divided signal format get current divisor */ + rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_DIV, &cur_div); + if (rc) + return rc; + + /* Check output divisor for zero */ + if (!cur_div) { + dev_err(dev, "Zero divisor for output %u got from device\n", + out); + return -EINVAL; + } + + /* Get N-pin divisor (shares the same register with esync */ + rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD, &ndiv); + if (rc) + return rc; + + /* Check N-pin divisor for zero */ + if (!ndiv) { + dev_err(dev, + "Zero N-pin divisor for output %u got from device\n", + out); + return -EINVAL; + } + + /* Compute current output frequency for P-pin */ + output_p_freq = synth_freq / cur_div; + + /* Compute current N-pin frequency */ + output_n_freq = output_p_freq / ndiv; + + if (zl3073x_dpll_is_p_pin(pin)) { + /* We are going to change output frequency for P-pin but + * if the requested frequency is less than current N-pin + * frequency then indicate a failure as we are not able + * to compute N-pin divisor to keep its frequency unchanged. + */ + if (frequency <= output_n_freq) + return -EINVAL; + + /* Update the output divisor */ + rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_DIV, new_div); + if (rc) + return rc; + + /* For 50/50 duty cycle the divisor is equal to width */ + rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_WIDTH, new_div); + if (rc) + return rc; + + /* Compute new divisor for N-pin */ + ndiv = (u32)frequency / output_n_freq; + } else { + /* We are going to change frequency of N-pin but if + * the requested freq is greater or equal than freq of P-pin + * in the output pair we cannot compute divisor for the N-pin. + * In this case indicate a failure. + */ + if (output_p_freq <= frequency) + return -EINVAL; + + /* Compute new divisor for N-pin */ + ndiv = output_p_freq / (u32)frequency; + } + + /* Update divisor for the N-pin */ + rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD, ndiv); + if (rc) + return rc; + + /* For 50/50 duty cycle the divisor is equal to width */ + rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_ESYNC_WIDTH, ndiv); + if (rc) + return rc; + + /* Commit output configuration */ + return zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_WR, + ZL_REG_OUTPUT_MB_MASK, BIT(out)); +} + +static int +zl3073x_dpll_output_pin_state_on_dpll_get(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + /* If the output pin is registered then it is always connected */ + *state = DPLL_PIN_STATE_CONNECTED; + + return 0; +} + +static int +zl3073x_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_lock_status *status, + enum dpll_lock_status_error *status_error, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dev *zldev = zldpll->dev; + u8 mon_status, state; + int rc; + + switch (zldpll->refsel_mode) { + case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: + case ZL_DPLL_MODE_REFSEL_MODE_NCO: + /* In FREERUN and NCO modes the DPLL is always unlocked */ + *status = DPLL_LOCK_STATUS_UNLOCKED; + + return 0; + default: + break; + } + + /* Read DPLL monitor status */ + rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_MON_STATUS(zldpll->id), + &mon_status); + if (rc) + return rc; + state = FIELD_GET(ZL_DPLL_MON_STATUS_STATE, mon_status); + + switch (state) { + case ZL_DPLL_MON_STATUS_STATE_LOCK: + if (FIELD_GET(ZL_DPLL_MON_STATUS_HO_READY, mon_status)) + *status = DPLL_LOCK_STATUS_LOCKED_HO_ACQ; + else + *status = DPLL_LOCK_STATUS_LOCKED; + break; + case ZL_DPLL_MON_STATUS_STATE_HOLDOVER: + case ZL_DPLL_MON_STATUS_STATE_ACQUIRING: + *status = DPLL_LOCK_STATUS_HOLDOVER; + break; + default: + dev_warn(zldev->dev, "Unknown DPLL monitor status: 0x%02x\n", + mon_status); + *status = DPLL_LOCK_STATUS_UNLOCKED; + break; + } + + return 0; +} + +static int +zl3073x_dpll_mode_get(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_mode *mode, struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + + switch (zldpll->refsel_mode) { + case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: + case ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER: + case ZL_DPLL_MODE_REFSEL_MODE_NCO: + case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: + /* Use MANUAL for device FREERUN, HOLDOVER, NCO and + * REFLOCK modes + */ + *mode = DPLL_MODE_MANUAL; + break; + case ZL_DPLL_MODE_REFSEL_MODE_AUTO: + /* Use AUTO for device AUTO mode */ + *mode = DPLL_MODE_AUTOMATIC; + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct dpll_pin_ops zl3073x_dpll_input_pin_ops = { + .direction_get = zl3073x_dpll_pin_direction_get, + .frequency_get = zl3073x_dpll_input_pin_frequency_get, + .frequency_set = zl3073x_dpll_input_pin_frequency_set, + .prio_get = zl3073x_dpll_input_pin_prio_get, + .prio_set = zl3073x_dpll_input_pin_prio_set, + .state_on_dpll_get = zl3073x_dpll_input_pin_state_on_dpll_get, + .state_on_dpll_set = zl3073x_dpll_input_pin_state_on_dpll_set, +}; + +static const struct dpll_pin_ops zl3073x_dpll_output_pin_ops = { + .direction_get = zl3073x_dpll_pin_direction_get, + .frequency_get = zl3073x_dpll_output_pin_frequency_get, + .frequency_set = zl3073x_dpll_output_pin_frequency_set, + .state_on_dpll_get = zl3073x_dpll_output_pin_state_on_dpll_get, +}; + +static const struct dpll_device_ops zl3073x_dpll_device_ops = { + .lock_status_get = zl3073x_dpll_lock_status_get, + .mode_get = zl3073x_dpll_mode_get, +}; + +/** + * zl3073x_dpll_pin_alloc - allocate DPLL pin + * @zldpll: pointer to zl3073x_dpll + * @dir: pin direction + * @id: pin id + * + * Allocates and initializes zl3073x_dpll_pin structure for given + * pin id and direction. + * + * Return: pointer to allocated structure on success, error pointer on error + */ +static struct zl3073x_dpll_pin * +zl3073x_dpll_pin_alloc(struct zl3073x_dpll *zldpll, enum dpll_pin_direction dir, + u8 id) +{ + struct zl3073x_dpll_pin *pin; + + pin = kzalloc(sizeof(*pin), GFP_KERNEL); + if (!pin) + return ERR_PTR(-ENOMEM); + + pin->dpll = zldpll; + pin->dir = dir; + pin->id = id; + + return pin; +} + +/** + * zl3073x_dpll_pin_free - deallocate DPLL pin + * @pin: pin to free + * + * Deallocates DPLL pin previously allocated by @zl3073x_dpll_pin_alloc. + */ +static void +zl3073x_dpll_pin_free(struct zl3073x_dpll_pin *pin) +{ + WARN(pin->dpll_pin, "DPLL pin is still registered\n"); + + kfree(pin); +} + +/** + * zl3073x_dpll_pin_register - register DPLL pin + * @pin: pointer to DPLL pin + * @index: absolute pin index for registration + * + * Registers given DPLL pin into DPLL sub-system. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_pin_register(struct zl3073x_dpll_pin *pin, u32 index) +{ + struct zl3073x_dpll *zldpll = pin->dpll; + struct zl3073x_pin_props *props; + const struct dpll_pin_ops *ops; + int rc; + + /* Get pin properties */ + props = zl3073x_pin_props_get(zldpll->dev, pin->dir, pin->id); + if (IS_ERR(props)) + return PTR_ERR(props); + + /* Save package label */ + strscpy(pin->label, props->package_label); + + if (zl3073x_dpll_is_input_pin(pin)) { + rc = zl3073x_dpll_ref_prio_get(pin, &pin->prio); + if (rc) + goto err_prio_get; + + if (pin->prio == ZL_DPLL_REF_PRIO_NONE) { + /* Clamp prio to max value & mark pin non-selectable */ + pin->prio = ZL_DPLL_REF_PRIO_MAX; + pin->selectable = false; + } else { + /* Mark pin as selectable */ + pin->selectable = true; + } + } + + /* Create or get existing DPLL pin */ + pin->dpll_pin = dpll_pin_get(zldpll->dev->clock_id, index, THIS_MODULE, + &props->dpll_props); + if (IS_ERR(pin->dpll_pin)) { + rc = PTR_ERR(pin->dpll_pin); + goto err_pin_get; + } + + if (zl3073x_dpll_is_input_pin(pin)) + ops = &zl3073x_dpll_input_pin_ops; + else + ops = &zl3073x_dpll_output_pin_ops; + + /* Register the pin */ + rc = dpll_pin_register(zldpll->dpll_dev, pin->dpll_pin, ops, pin); + if (rc) + goto err_register; + + /* Free pin properties */ + zl3073x_pin_props_put(props); + + return 0; + +err_register: + dpll_pin_put(pin->dpll_pin); +err_prio_get: + pin->dpll_pin = NULL; +err_pin_get: + zl3073x_pin_props_put(props); + + return rc; +} + +/** + * zl3073x_dpll_pin_unregister - unregister DPLL pin + * @pin: pointer to DPLL pin + * + * Unregisters pin previously registered by @zl3073x_dpll_pin_register. + */ +static void +zl3073x_dpll_pin_unregister(struct zl3073x_dpll_pin *pin) +{ + struct zl3073x_dpll *zldpll = pin->dpll; + const struct dpll_pin_ops *ops; + + WARN(!pin->dpll_pin, "DPLL pin is not registered\n"); + + if (zl3073x_dpll_is_input_pin(pin)) + ops = &zl3073x_dpll_input_pin_ops; + else + ops = &zl3073x_dpll_output_pin_ops; + + /* Unregister the pin */ + dpll_pin_unregister(zldpll->dpll_dev, pin->dpll_pin, ops, pin); + + dpll_pin_put(pin->dpll_pin); + pin->dpll_pin = NULL; +} + +/** + * zl3073x_dpll_pins_unregister - unregister all registered DPLL pins + * @zldpll: pointer to zl3073x_dpll structure + * + * Enumerates all DPLL pins registered to given DPLL device and + * unregisters them. + */ +static void +zl3073x_dpll_pins_unregister(struct zl3073x_dpll *zldpll) +{ + struct zl3073x_dpll_pin *pin, *next; + + list_for_each_entry_safe(pin, next, &zldpll->pins, list) { + zl3073x_dpll_pin_unregister(pin); + list_del(&pin->list); + zl3073x_dpll_pin_free(pin); + } +} + +/** + * zl3073x_dpll_pin_is_registrable - check if the pin is registrable + * @zldpll: pointer to zl3073x_dpll structure + * @dir: pin direction + * @index: pin index + * + * Checks if the given pin can be registered to given DPLL. For both + * directions the pin can be registered if it is enabled. In case of + * differential signal type only P-pin is reported as registrable. + * And additionally for the output pin, the pin can be registered only + * if it is connected to synthesizer that is driven by given DPLL. + * + * Return: true if the pin is registrable, false if not + */ +static bool +zl3073x_dpll_pin_is_registrable(struct zl3073x_dpll *zldpll, + enum dpll_pin_direction dir, u8 index) +{ + struct zl3073x_dev *zldev = zldpll->dev; + bool is_diff, is_enabled; + const char *name; + + if (dir == DPLL_PIN_DIRECTION_INPUT) { + u8 ref = zl3073x_input_pin_ref_get(index); + + name = "REF"; + + /* Skip the pin if the DPLL is running in NCO mode */ + if (zldpll->refsel_mode == ZL_DPLL_MODE_REFSEL_MODE_NCO) + return false; + + is_diff = zl3073x_ref_is_diff(zldev, ref); + is_enabled = zl3073x_ref_is_enabled(zldev, ref); + } else { + /* Output P&N pair shares single HW output */ + u8 out = zl3073x_output_pin_out_get(index); + + name = "OUT"; + + /* Skip the pin if it is connected to different DPLL channel */ + if (zl3073x_out_dpll_get(zldev, out) != zldpll->id) { + dev_dbg(zldev->dev, + "%s%u is driven by different DPLL\n", name, + out); + + return false; + } + + is_diff = zl3073x_out_is_diff(zldev, out); + is_enabled = zl3073x_out_is_enabled(zldev, out); + } + + /* Skip N-pin if the corresponding input/output is differential */ + if (is_diff && zl3073x_is_n_pin(index)) { + dev_dbg(zldev->dev, "%s%u is differential, skipping N-pin\n", + name, index / 2); + + return false; + } + + /* Skip the pin if it is disabled */ + if (!is_enabled) { + dev_dbg(zldev->dev, "%s%u%c is disabled\n", name, index / 2, + zl3073x_is_p_pin(index) ? 'P' : 'N'); + + return false; + } + + return true; +} + +/** + * zl3073x_dpll_pins_register - register all registerable DPLL pins + * @zldpll: pointer to zl3073x_dpll structure + * + * Enumerates all possible input/output pins and registers all of them + * that are registrable. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_pins_register(struct zl3073x_dpll *zldpll) +{ + struct zl3073x_dpll_pin *pin; + enum dpll_pin_direction dir; + u8 id, index; + int rc; + + /* Process input pins */ + for (index = 0; index < ZL3073X_NUM_PINS; index++) { + /* First input pins and then output pins */ + if (index < ZL3073X_NUM_INPUT_PINS) { + id = index; + dir = DPLL_PIN_DIRECTION_INPUT; + } else { + id = index - ZL3073X_NUM_INPUT_PINS; + dir = DPLL_PIN_DIRECTION_OUTPUT; + } + + /* Check if the pin registrable to this DPLL */ + if (!zl3073x_dpll_pin_is_registrable(zldpll, dir, id)) + continue; + + pin = zl3073x_dpll_pin_alloc(zldpll, dir, id); + if (IS_ERR(pin)) { + rc = PTR_ERR(pin); + goto error; + } + + rc = zl3073x_dpll_pin_register(pin, index); + if (rc) + goto error; + + list_add(&pin->list, &zldpll->pins); + } + + return 0; + +error: + zl3073x_dpll_pins_unregister(zldpll); + + return rc; +} + +/** + * zl3073x_dpll_device_register - register DPLL device + * @zldpll: pointer to zl3073x_dpll structure + * + * Registers given DPLL device into DPLL sub-system. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_device_register(struct zl3073x_dpll *zldpll) +{ + struct zl3073x_dev *zldev = zldpll->dev; + u8 dpll_mode_refsel; + int rc; + + /* Read DPLL mode and forcibly selected reference */ + rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(zldpll->id), + &dpll_mode_refsel); + if (rc) + return rc; + + /* Extract mode and selected input reference */ + zldpll->refsel_mode = FIELD_GET(ZL_DPLL_MODE_REFSEL_MODE, + dpll_mode_refsel); + zldpll->forced_ref = FIELD_GET(ZL_DPLL_MODE_REFSEL_REF, + dpll_mode_refsel); + + zldpll->dpll_dev = dpll_device_get(zldev->clock_id, zldpll->id, + THIS_MODULE); + if (IS_ERR(zldpll->dpll_dev)) { + rc = PTR_ERR(zldpll->dpll_dev); + zldpll->dpll_dev = NULL; + + return rc; + } + + rc = dpll_device_register(zldpll->dpll_dev, + zl3073x_prop_dpll_type_get(zldev, zldpll->id), + &zl3073x_dpll_device_ops, zldpll); + if (rc) { + dpll_device_put(zldpll->dpll_dev); + zldpll->dpll_dev = NULL; + } + + return rc; +} + +/** + * zl3073x_dpll_device_unregister - unregister DPLL device + * @zldpll: pointer to zl3073x_dpll structure + * + * Unregisters given DPLL device from DPLL sub-system previously registered + * by @zl3073x_dpll_device_register. + */ +static void +zl3073x_dpll_device_unregister(struct zl3073x_dpll *zldpll) +{ + WARN(!zldpll->dpll_dev, "DPLL device is not registered\n"); + + dpll_device_unregister(zldpll->dpll_dev, &zl3073x_dpll_device_ops, + zldpll); + dpll_device_put(zldpll->dpll_dev); + zldpll->dpll_dev = NULL; +} + +/** + * zl3073x_dpll_changes_check - check for changes and send notifications + * @zldpll: pointer to zl3073x_dpll structure + * + * Checks for changes on given DPLL device and its registered DPLL pins + * and sends notifications about them. + * + * This function is periodically called from @zl3073x_dev_periodic_work. + */ +void +zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll) +{ + struct zl3073x_dev *zldev = zldpll->dev; + enum dpll_lock_status lock_status; + struct device *dev = zldev->dev; + struct zl3073x_dpll_pin *pin; + int rc; + + /* Get current lock status for the DPLL */ + rc = zl3073x_dpll_lock_status_get(zldpll->dpll_dev, zldpll, + &lock_status, NULL, NULL); + if (rc) { + dev_err(dev, "Failed to get DPLL%u lock status: %pe\n", + zldpll->id, ERR_PTR(rc)); + return; + } + + /* If lock status was changed then notify DPLL core */ + if (zldpll->lock_status != lock_status) { + zldpll->lock_status = lock_status; + dpll_device_change_ntf(zldpll->dpll_dev); + } + + /* Input pin monitoring does make sense only in automatic + * or forced reference modes. + */ + if (zldpll->refsel_mode != ZL_DPLL_MODE_REFSEL_MODE_AUTO && + zldpll->refsel_mode != ZL_DPLL_MODE_REFSEL_MODE_REFLOCK) + return; + + list_for_each_entry(pin, &zldpll->pins, list) { + enum dpll_pin_state state; + + /* Output pins change checks are not necessary because output + * states are constant. + */ + if (!zl3073x_dpll_is_input_pin(pin)) + continue; + + rc = zl3073x_dpll_ref_state_get(pin, &state); + if (rc) { + dev_err(dev, + "Failed to get %s on DPLL%u state: %pe\n", + pin->label, zldpll->id, ERR_PTR(rc)); + return; + } + + if (state != pin->pin_state) { + dev_dbg(dev, "%s state changed: %u->%u\n", pin->label, + pin->pin_state, state); + pin->pin_state = state; + dpll_pin_change_ntf(pin->dpll_pin); + } + } +} + +/** + * zl3073x_dpll_init_fine_phase_adjust - do initial fine phase adjustments + * @zldev: pointer to zl3073x device + * + * Performs initial fine phase adjustments needed per datasheet. + * + * Return: 0 on success, <0 on error + */ +int +zl3073x_dpll_init_fine_phase_adjust(struct zl3073x_dev *zldev) +{ + int rc; + + rc = zl3073x_write_u8(zldev, ZL_REG_SYNTH_PHASE_SHIFT_MASK, 0x1f); + if (rc) + return rc; + + rc = zl3073x_write_u8(zldev, ZL_REG_SYNTH_PHASE_SHIFT_INTVL, 0x01); + if (rc) + return rc; + + rc = zl3073x_write_u16(zldev, ZL_REG_SYNTH_PHASE_SHIFT_DATA, 0xffff); + if (rc) + return rc; + + rc = zl3073x_write_u8(zldev, ZL_REG_SYNTH_PHASE_SHIFT_CTRL, 0x01); + if (rc) + return rc; + + return rc; +} + +/** + * zl3073x_dpll_alloc - allocate DPLL device + * @zldev: pointer to zl3073x device + * @ch: DPLL channel number + * + * Allocates DPLL device structure for given DPLL channel. + * + * Return: pointer to DPLL device on success, error pointer on error + */ +struct zl3073x_dpll * +zl3073x_dpll_alloc(struct zl3073x_dev *zldev, u8 ch) +{ + struct zl3073x_dpll *zldpll; + + zldpll = kzalloc(sizeof(*zldpll), GFP_KERNEL); + if (!zldpll) + return ERR_PTR(-ENOMEM); + + zldpll->dev = zldev; + zldpll->id = ch; + INIT_LIST_HEAD(&zldpll->pins); + + return zldpll; +} + +/** + * zl3073x_dpll_free - free DPLL device + * @zldpll: pointer to zl3073x_dpll structure + * + * Deallocates given DPLL device previously allocated by @zl3073x_dpll_alloc. + */ +void +zl3073x_dpll_free(struct zl3073x_dpll *zldpll) +{ + WARN(zldpll->dpll_dev, "DPLL device is still registered\n"); + + kfree(zldpll); +} + +/** + * zl3073x_dpll_register - register DPLL device and all its pins + * @zldpll: pointer to zl3073x_dpll structure + * + * Registers given DPLL device and all its pins into DPLL sub-system. + * + * Return: 0 on success, <0 on error + */ +int +zl3073x_dpll_register(struct zl3073x_dpll *zldpll) +{ + int rc; + + rc = zl3073x_dpll_device_register(zldpll); + if (rc) + return rc; + + rc = zl3073x_dpll_pins_register(zldpll); + if (rc) { + zl3073x_dpll_device_unregister(zldpll); + return rc; + } + + return 0; +} + +/** + * zl3073x_dpll_unregister - unregister DPLL device and its pins + * @zldpll: pointer to zl3073x_dpll structure + * + * Unregisters given DPLL device and all its pins from DPLL sub-system + * previously registered by @zl3073x_dpll_register. + */ +void +zl3073x_dpll_unregister(struct zl3073x_dpll *zldpll) +{ + /* Unregister all pins and dpll */ + zl3073x_dpll_pins_unregister(zldpll); + zl3073x_dpll_device_unregister(zldpll); +} diff --git a/drivers/dpll/zl3073x/dpll.h b/drivers/dpll/zl3073x/dpll.h new file mode 100644 index 000000000000..db7388cc377f --- /dev/null +++ b/drivers/dpll/zl3073x/dpll.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ZL3073X_DPLL_H +#define _ZL3073X_DPLL_H + +#include <linux/dpll.h> +#include <linux/list.h> + +#include "core.h" + +/** + * struct zl3073x_dpll - ZL3073x DPLL sub-device structure + * @list: this DPLL list entry + * @dev: pointer to multi-function parent device + * @id: DPLL index + * @refsel_mode: reference selection mode + * @forced_ref: selected reference in forced reference lock mode + * @dpll_dev: pointer to registered DPLL device + * @lock_status: last saved DPLL lock status + * @pins: list of pins + */ +struct zl3073x_dpll { + struct list_head list; + struct zl3073x_dev *dev; + u8 id; + u8 refsel_mode; + u8 forced_ref; + struct dpll_device *dpll_dev; + enum dpll_lock_status lock_status; + struct list_head pins; +}; + +struct zl3073x_dpll *zl3073x_dpll_alloc(struct zl3073x_dev *zldev, u8 ch); +void zl3073x_dpll_free(struct zl3073x_dpll *zldpll); + +int zl3073x_dpll_register(struct zl3073x_dpll *zldpll); +void zl3073x_dpll_unregister(struct zl3073x_dpll *zldpll); + +int zl3073x_dpll_init_fine_phase_adjust(struct zl3073x_dev *zldev); +void zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll); + +#endif /* _ZL3073X_DPLL_H */ diff --git a/drivers/dpll/zl3073x/i2c.c b/drivers/dpll/zl3073x/i2c.c new file mode 100644 index 000000000000..7bbfdd4ed867 --- /dev/null +++ b/drivers/dpll/zl3073x/i2c.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/dev_printk.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/module.h> +#include <linux/regmap.h> + +#include "core.h" + +static int zl3073x_i2c_probe(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct zl3073x_dev *zldev; + + zldev = zl3073x_devm_alloc(dev); + if (IS_ERR(zldev)) + return PTR_ERR(zldev); + + zldev->regmap = devm_regmap_init_i2c(client, &zl3073x_regmap_config); + if (IS_ERR(zldev->regmap)) + return dev_err_probe(dev, PTR_ERR(zldev->regmap), + "Failed to initialize regmap\n"); + + return zl3073x_dev_probe(zldev, i2c_get_match_data(client)); +} + +static const struct i2c_device_id zl3073x_i2c_id[] = { + { + .name = "zl30731", + .driver_data = (kernel_ulong_t)&zl30731_chip_info, + }, + { + .name = "zl30732", + .driver_data = (kernel_ulong_t)&zl30732_chip_info, + }, + { + .name = "zl30733", + .driver_data = (kernel_ulong_t)&zl30733_chip_info, + }, + { + .name = "zl30734", + .driver_data = (kernel_ulong_t)&zl30734_chip_info, + }, + { + .name = "zl30735", + .driver_data = (kernel_ulong_t)&zl30735_chip_info, + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(i2c, zl3073x_i2c_id); + +static const struct of_device_id zl3073x_i2c_of_match[] = { + { .compatible = "microchip,zl30731", .data = &zl30731_chip_info }, + { .compatible = "microchip,zl30732", .data = &zl30732_chip_info }, + { .compatible = "microchip,zl30733", .data = &zl30733_chip_info }, + { .compatible = "microchip,zl30734", .data = &zl30734_chip_info }, + { .compatible = "microchip,zl30735", .data = &zl30735_chip_info }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, zl3073x_i2c_of_match); + +static struct i2c_driver zl3073x_i2c_driver = { + .driver = { + .name = "zl3073x-i2c", + .of_match_table = zl3073x_i2c_of_match, + }, + .probe = zl3073x_i2c_probe, + .id_table = zl3073x_i2c_id, +}; +module_i2c_driver(zl3073x_i2c_driver); + +MODULE_AUTHOR("Ivan Vecera <ivecera@redhat.com>"); +MODULE_DESCRIPTION("Microchip ZL3073x I2C driver"); +MODULE_IMPORT_NS("ZL3073X"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dpll/zl3073x/prop.c b/drivers/dpll/zl3073x/prop.c new file mode 100644 index 000000000000..4cf7e8aefcb3 --- /dev/null +++ b/drivers/dpll/zl3073x/prop.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/array_size.h> +#include <linux/dev_printk.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/fwnode.h> +#include <linux/property.h> +#include <linux/slab.h> +#include <linux/string.h> + +#include "core.h" +#include "prop.h" + +/** + * zl3073x_pin_check_freq - verify frequency for given pin + * @zldev: pointer to zl3073x device + * @dir: pin direction + * @id: pin index + * @freq: frequency to check + * + * The function checks the given frequency is valid for the device. For input + * pins it checks that the frequency can be factorized using supported base + * frequencies. For output pins it checks that the frequency divides connected + * synth frequency without remainder. + * + * Return: true if the frequency is valid, false if not. + */ +static bool +zl3073x_pin_check_freq(struct zl3073x_dev *zldev, enum dpll_pin_direction dir, + u8 id, u64 freq) +{ + if (freq > U32_MAX) + goto err_inv_freq; + + if (dir == DPLL_PIN_DIRECTION_INPUT) { + int rc; + + /* Check if the frequency can be factorized */ + rc = zl3073x_ref_freq_factorize(freq, NULL, NULL); + if (rc) + goto err_inv_freq; + } else { + u32 synth_freq; + u8 out, synth; + + /* Get output pin synthesizer */ + out = zl3073x_output_pin_out_get(id); + synth = zl3073x_out_synth_get(zldev, out); + + /* Get synth frequency */ + synth_freq = zl3073x_synth_freq_get(zldev, synth); + + /* Check the frequency divides synth frequency */ + if (synth_freq % (u32)freq) + goto err_inv_freq; + } + + return true; + +err_inv_freq: + dev_warn(zldev->dev, + "Unsupported frequency %llu Hz in firmware node\n", freq); + + return false; +} + +/** + * zl3073x_prop_pin_package_label_set - get package label for the pin + * @zldev: pointer to zl3073x device + * @props: pointer to pin properties + * @dir: pin direction + * @id: pin index + * + * Generates package label string and stores it into pin properties structure. + * + * Possible formats: + * REF<n> - differential input reference + * REF<n>P & REF<n>N - single-ended input reference (P or N pin) + * OUT<n> - differential output + * OUT<n>P & OUT<n>N - single-ended output (P or N pin) + */ +static void +zl3073x_prop_pin_package_label_set(struct zl3073x_dev *zldev, + struct zl3073x_pin_props *props, + enum dpll_pin_direction dir, u8 id) +{ + const char *prefix, *suffix; + bool is_diff; + + if (dir == DPLL_PIN_DIRECTION_INPUT) { + u8 ref; + + prefix = "REF"; + ref = zl3073x_input_pin_ref_get(id); + is_diff = zl3073x_ref_is_diff(zldev, ref); + } else { + u8 out; + + prefix = "OUT"; + out = zl3073x_output_pin_out_get(id); + is_diff = zl3073x_out_is_diff(zldev, out); + } + + if (!is_diff) + suffix = zl3073x_is_p_pin(id) ? "P" : "N"; + else + suffix = ""; /* No suffix for differential one */ + + snprintf(props->package_label, sizeof(props->package_label), "%s%u%s", + prefix, id / 2, suffix); + + /* Set package_label pointer in DPLL core properties to generated + * string. + */ + props->dpll_props.package_label = props->package_label; +} + +/** + * zl3073x_prop_pin_fwnode_get - get fwnode for given pin + * @zldev: pointer to zl3073x device + * @props: pointer to pin properties + * @dir: pin direction + * @id: pin index + * + * Return: 0 on success, -ENOENT if the firmware node does not exist + */ +static int +zl3073x_prop_pin_fwnode_get(struct zl3073x_dev *zldev, + struct zl3073x_pin_props *props, + enum dpll_pin_direction dir, u8 id) +{ + struct fwnode_handle *pins_node, *pin_node; + const char *node_name; + + if (dir == DPLL_PIN_DIRECTION_INPUT) + node_name = "input-pins"; + else + node_name = "output-pins"; + + /* Get node containing input or output pins */ + pins_node = device_get_named_child_node(zldev->dev, node_name); + if (!pins_node) { + dev_dbg(zldev->dev, "'%s' sub-node is missing\n", node_name); + return -ENOENT; + } + + /* Enumerate child pin nodes and find the requested one */ + fwnode_for_each_child_node(pins_node, pin_node) { + u32 reg; + + if (fwnode_property_read_u32(pin_node, "reg", ®)) + continue; + + if (id == reg) + break; + } + + /* Release pin parent node */ + fwnode_handle_put(pins_node); + + /* Save found node */ + props->fwnode = pin_node; + + dev_dbg(zldev->dev, "Firmware node for %s %sfound\n", + props->package_label, pin_node ? "" : "NOT "); + + return pin_node ? 0 : -ENOENT; +} + +/** + * zl3073x_pin_props_get - get pin properties + * @zldev: pointer to zl3073x device + * @dir: pin direction + * @index: pin index + * + * The function looks for firmware node for the given pin if it is provided + * by the system firmware (DT or ACPI), allocates pin properties structure, + * generates package label string according pin type and optionally fetches + * board label, connection type, supported frequencies and esync capability + * from the firmware node if it does exist. + * + * Pointer that is returned by this function should be freed using + * @zl3073x_pin_props_put(). + * + * Return: + * * pointer to allocated pin properties structure on success + * * error pointer in case of error + */ +struct zl3073x_pin_props *zl3073x_pin_props_get(struct zl3073x_dev *zldev, + enum dpll_pin_direction dir, + u8 index) +{ + struct dpll_pin_frequency *ranges; + struct zl3073x_pin_props *props; + int i, j, num_freqs, rc; + const char *type; + u64 *freqs; + + props = kzalloc(sizeof(*props), GFP_KERNEL); + if (!props) + return ERR_PTR(-ENOMEM); + + /* Set default pin type and capabilities */ + if (dir == DPLL_PIN_DIRECTION_INPUT) { + props->dpll_props.type = DPLL_PIN_TYPE_EXT; + props->dpll_props.capabilities = + DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE | + DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + } else { + props->dpll_props.type = DPLL_PIN_TYPE_GNSS; + } + + props->dpll_props.phase_range.min = S32_MIN; + props->dpll_props.phase_range.max = S32_MAX; + + zl3073x_prop_pin_package_label_set(zldev, props, dir, index); + + /* Get firmware node for the given pin */ + rc = zl3073x_prop_pin_fwnode_get(zldev, props, dir, index); + if (rc) + return props; /* Return if it does not exist */ + + /* Look for label property and store the value as board label */ + fwnode_property_read_string(props->fwnode, "label", + &props->dpll_props.board_label); + + /* Look for pin type property and translate its value to DPLL + * pin type enum if it is present. + */ + if (!fwnode_property_read_string(props->fwnode, "connection-type", + &type)) { + if (!strcmp(type, "ext")) + props->dpll_props.type = DPLL_PIN_TYPE_EXT; + else if (!strcmp(type, "gnss")) + props->dpll_props.type = DPLL_PIN_TYPE_GNSS; + else if (!strcmp(type, "int")) + props->dpll_props.type = DPLL_PIN_TYPE_INT_OSCILLATOR; + else if (!strcmp(type, "synce")) + props->dpll_props.type = DPLL_PIN_TYPE_SYNCE_ETH_PORT; + else + dev_warn(zldev->dev, + "Unknown or unsupported pin type '%s'\n", + type); + } + + /* Check if the pin supports embedded sync control */ + props->esync_control = fwnode_property_read_bool(props->fwnode, + "esync-control"); + + /* Read supported frequencies property if it is specified */ + num_freqs = fwnode_property_count_u64(props->fwnode, + "supported-frequencies-hz"); + if (num_freqs <= 0) + /* Return if the property does not exist or number is 0 */ + return props; + + /* The firmware node specifies list of supported frequencies while + * DPLL core pin properties requires list of frequency ranges. + * So read the frequency list into temporary array. + */ + freqs = kcalloc(num_freqs, sizeof(*freqs), GFP_KERNEL); + if (!freqs) { + rc = -ENOMEM; + goto err_alloc_freqs; + } + + /* Read frequencies list from firmware node */ + fwnode_property_read_u64_array(props->fwnode, + "supported-frequencies-hz", freqs, + num_freqs); + + /* Allocate frequency ranges list and fill it */ + ranges = kcalloc(num_freqs, sizeof(*ranges), GFP_KERNEL); + if (!ranges) { + rc = -ENOMEM; + goto err_alloc_ranges; + } + + /* Convert list of frequencies to list of frequency ranges but + * filter-out frequencies that are not representable by device + */ + for (i = 0, j = 0; i < num_freqs; i++) { + struct dpll_pin_frequency freq = DPLL_PIN_FREQUENCY(freqs[i]); + + if (zl3073x_pin_check_freq(zldev, dir, index, freqs[i])) { + ranges[j] = freq; + j++; + } + } + + /* Save number of freq ranges and pointer to them into pin properties */ + props->dpll_props.freq_supported = ranges; + props->dpll_props.freq_supported_num = j; + + /* Free temporary array */ + kfree(freqs); + + return props; + +err_alloc_ranges: + kfree(freqs); +err_alloc_freqs: + fwnode_handle_put(props->fwnode); + kfree(props); + + return ERR_PTR(rc); +} + +/** + * zl3073x_pin_props_put - release pin properties + * @props: pin properties to free + * + * The function deallocates given pin properties structure. + */ +void zl3073x_pin_props_put(struct zl3073x_pin_props *props) +{ + /* Free supported frequency ranges list if it is present */ + kfree(props->dpll_props.freq_supported); + + /* Put firmware handle if it is present */ + if (props->fwnode) + fwnode_handle_put(props->fwnode); + + kfree(props); +} + +/** + * zl3073x_prop_dpll_type_get - get DPLL channel type + * @zldev: pointer to zl3073x device + * @index: DPLL channel index + * + * Return: DPLL type for given DPLL channel + */ +enum dpll_type +zl3073x_prop_dpll_type_get(struct zl3073x_dev *zldev, u8 index) +{ + const char *types[ZL3073X_MAX_CHANNELS]; + int count; + + /* Read dpll types property from firmware */ + count = device_property_read_string_array(zldev->dev, "dpll-types", + types, ARRAY_SIZE(types)); + + /* Return default if property or entry for given channel is missing */ + if (index >= count) + return DPLL_TYPE_PPS; + + if (!strcmp(types[index], "pps")) + return DPLL_TYPE_PPS; + else if (!strcmp(types[index], "eec")) + return DPLL_TYPE_EEC; + + dev_info(zldev->dev, "Unknown DPLL type '%s', using default\n", + types[index]); + + return DPLL_TYPE_PPS; /* Default */ +} diff --git a/drivers/dpll/zl3073x/prop.h b/drivers/dpll/zl3073x/prop.h new file mode 100644 index 000000000000..721a18f05938 --- /dev/null +++ b/drivers/dpll/zl3073x/prop.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ZL3073X_PROP_H +#define _ZL3073X_PROP_H + +#include <linux/dpll.h> + +#include "core.h" + +struct fwnode_handle; + +/** + * struct zl3073x_pin_props - pin properties + * @fwnode: pin firmware node + * @dpll_props: DPLL core pin properties + * @package_label: pin package label + * @esync_control: embedded sync support + */ +struct zl3073x_pin_props { + struct fwnode_handle *fwnode; + struct dpll_pin_properties dpll_props; + char package_label[8]; + bool esync_control; +}; + +enum dpll_type zl3073x_prop_dpll_type_get(struct zl3073x_dev *zldev, u8 index); + +struct zl3073x_pin_props *zl3073x_pin_props_get(struct zl3073x_dev *zldev, + enum dpll_pin_direction dir, + u8 index); + +void zl3073x_pin_props_put(struct zl3073x_pin_props *props); + +#endif /* _ZL3073X_PROP_H */ diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h new file mode 100644 index 000000000000..493c63e72920 --- /dev/null +++ b/drivers/dpll/zl3073x/regs.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ZL3073X_REGS_H +#define _ZL3073X_REGS_H + +#include <linux/bitfield.h> +#include <linux/bits.h> + +/* + * Register address structure: + * =========================== + * 25 19 18 16 15 7 6 0 + * +------------------------------------------+ + * | max_offset | size | page | page_offset | + * +------------------------------------------+ + * + * page_offset ... <0x00..0x7F> + * page .......... HW page number + * size .......... register byte size (1, 2, 4 or 6) + * max_offset .... maximal offset for indexed registers + * (for non-indexed regs max_offset == page_offset) + */ + +#define ZL_REG_OFFSET_MASK GENMASK(6, 0) +#define ZL_REG_PAGE_MASK GENMASK(15, 7) +#define ZL_REG_SIZE_MASK GENMASK(18, 16) +#define ZL_REG_MAX_OFFSET_MASK GENMASK(25, 19) +#define ZL_REG_ADDR_MASK GENMASK(15, 0) + +#define ZL_REG_OFFSET(_reg) FIELD_GET(ZL_REG_OFFSET_MASK, _reg) +#define ZL_REG_PAGE(_reg) FIELD_GET(ZL_REG_PAGE_MASK, _reg) +#define ZL_REG_MAX_OFFSET(_reg) FIELD_GET(ZL_REG_MAX_OFFSET_MASK, _reg) +#define ZL_REG_SIZE(_reg) FIELD_GET(ZL_REG_SIZE_MASK, _reg) +#define ZL_REG_ADDR(_reg) FIELD_GET(ZL_REG_ADDR_MASK, _reg) + +/** + * ZL_REG_IDX - define indexed register + * @_idx: index of register to access + * @_page: register page + * @_offset: register offset in page + * @_size: register byte size (1, 2, 4 or 6) + * @_items: number of register indices + * @_stride: stride between items in bytes + * + * All parameters except @_idx should be constant. + */ +#define ZL_REG_IDX(_idx, _page, _offset, _size, _items, _stride) \ + (FIELD_PREP(ZL_REG_OFFSET_MASK, \ + (_offset) + (_idx) * (_stride)) | \ + FIELD_PREP_CONST(ZL_REG_PAGE_MASK, _page) | \ + FIELD_PREP_CONST(ZL_REG_SIZE_MASK, _size) | \ + FIELD_PREP_CONST(ZL_REG_MAX_OFFSET_MASK, \ + (_offset) + ((_items) - 1) * (_stride))) + +/** + * ZL_REG - define simple (non-indexed) register + * @_page: register page + * @_offset: register offset in page + * @_size: register byte size (1, 2, 4 or 6) + * + * All parameters should be constant. + */ +#define ZL_REG(_page, _offset, _size) \ + ZL_REG_IDX(0, _page, _offset, _size, 1, 0) + +/************************** + * Register Page 0, General + **************************/ + +#define ZL_REG_ID ZL_REG(0, 0x01, 2) +#define ZL_REG_REVISION ZL_REG(0, 0x03, 2) +#define ZL_REG_FW_VER ZL_REG(0, 0x05, 2) +#define ZL_REG_CUSTOM_CONFIG_VER ZL_REG(0, 0x07, 4) + +/************************* + * Register Page 2, Status + *************************/ + +#define ZL_REG_REF_MON_STATUS(_idx) \ + ZL_REG_IDX(_idx, 2, 0x02, 1, ZL3073X_NUM_REFS, 1) +#define ZL_REF_MON_STATUS_OK 0 /* all bits zeroed */ + +#define ZL_REG_DPLL_MON_STATUS(_idx) \ + ZL_REG_IDX(_idx, 2, 0x10, 1, ZL3073X_MAX_CHANNELS, 1) +#define ZL_DPLL_MON_STATUS_STATE GENMASK(1, 0) +#define ZL_DPLL_MON_STATUS_STATE_ACQUIRING 0 +#define ZL_DPLL_MON_STATUS_STATE_LOCK 1 +#define ZL_DPLL_MON_STATUS_STATE_HOLDOVER 2 +#define ZL_DPLL_MON_STATUS_HO_READY BIT(2) + +#define ZL_REG_DPLL_REFSEL_STATUS(_idx) \ + ZL_REG_IDX(_idx, 2, 0x30, 1, ZL3073X_MAX_CHANNELS, 1) +#define ZL_DPLL_REFSEL_STATUS_REFSEL GENMASK(3, 0) +#define ZL_DPLL_REFSEL_STATUS_STATE GENMASK(6, 4) +#define ZL_DPLL_REFSEL_STATUS_STATE_LOCK 4 + +/*********************** + * Register Page 5, DPLL + ***********************/ + +#define ZL_REG_DPLL_MODE_REFSEL(_idx) \ + ZL_REG_IDX(_idx, 5, 0x04, 1, ZL3073X_MAX_CHANNELS, 4) +#define ZL_DPLL_MODE_REFSEL_MODE GENMASK(2, 0) +#define ZL_DPLL_MODE_REFSEL_MODE_FREERUN 0 +#define ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER 1 +#define ZL_DPLL_MODE_REFSEL_MODE_REFLOCK 2 +#define ZL_DPLL_MODE_REFSEL_MODE_AUTO 3 +#define ZL_DPLL_MODE_REFSEL_MODE_NCO 4 +#define ZL_DPLL_MODE_REFSEL_REF GENMASK(7, 4) + +/*********************************** + * Register Page 9, Synth and Output + ***********************************/ + +#define ZL_REG_SYNTH_CTRL(_idx) \ + ZL_REG_IDX(_idx, 9, 0x00, 1, ZL3073X_NUM_SYNTHS, 1) +#define ZL_SYNTH_CTRL_EN BIT(0) +#define ZL_SYNTH_CTRL_DPLL_SEL GENMASK(6, 4) + +#define ZL_REG_SYNTH_PHASE_SHIFT_CTRL ZL_REG(9, 0x1e, 1) +#define ZL_REG_SYNTH_PHASE_SHIFT_MASK ZL_REG(9, 0x1f, 1) +#define ZL_REG_SYNTH_PHASE_SHIFT_INTVL ZL_REG(9, 0x20, 1) +#define ZL_REG_SYNTH_PHASE_SHIFT_DATA ZL_REG(9, 0x21, 2) + +#define ZL_REG_OUTPUT_CTRL(_idx) \ + ZL_REG_IDX(_idx, 9, 0x28, 1, ZL3073X_NUM_OUTS, 1) +#define ZL_OUTPUT_CTRL_EN BIT(0) +#define ZL_OUTPUT_CTRL_SYNTH_SEL GENMASK(6, 4) + +/******************************* + * Register Page 10, Ref Mailbox + *******************************/ + +#define ZL_REG_REF_MB_MASK ZL_REG(10, 0x02, 2) + +#define ZL_REG_REF_MB_SEM ZL_REG(10, 0x04, 1) +#define ZL_REF_MB_SEM_WR BIT(0) +#define ZL_REF_MB_SEM_RD BIT(1) + +#define ZL_REG_REF_FREQ_BASE ZL_REG(10, 0x05, 2) +#define ZL_REG_REF_FREQ_MULT ZL_REG(10, 0x07, 2) +#define ZL_REG_REF_RATIO_M ZL_REG(10, 0x09, 2) +#define ZL_REG_REF_RATIO_N ZL_REG(10, 0x0b, 2) + +#define ZL_REG_REF_CONFIG ZL_REG(10, 0x0d, 1) +#define ZL_REF_CONFIG_ENABLE BIT(0) +#define ZL_REF_CONFIG_DIFF_EN BIT(2) + +/******************************** + * Register Page 12, DPLL Mailbox + ********************************/ + +#define ZL_REG_DPLL_MB_MASK ZL_REG(12, 0x02, 2) + +#define ZL_REG_DPLL_MB_SEM ZL_REG(12, 0x04, 1) +#define ZL_DPLL_MB_SEM_WR BIT(0) +#define ZL_DPLL_MB_SEM_RD BIT(1) + +#define ZL_REG_DPLL_REF_PRIO(_idx) \ + ZL_REG_IDX(_idx, 12, 0x52, 1, ZL3073X_NUM_REFS / 2, 1) +#define ZL_DPLL_REF_PRIO_REF_P GENMASK(3, 0) +#define ZL_DPLL_REF_PRIO_REF_N GENMASK(7, 4) +#define ZL_DPLL_REF_PRIO_MAX 14 +#define ZL_DPLL_REF_PRIO_NONE 15 + +/********************************* + * Register Page 13, Synth Mailbox + *********************************/ + +#define ZL_REG_SYNTH_MB_MASK ZL_REG(13, 0x02, 2) + +#define ZL_REG_SYNTH_MB_SEM ZL_REG(13, 0x04, 1) +#define ZL_SYNTH_MB_SEM_WR BIT(0) +#define ZL_SYNTH_MB_SEM_RD BIT(1) + +#define ZL_REG_SYNTH_FREQ_BASE ZL_REG(13, 0x06, 2) +#define ZL_REG_SYNTH_FREQ_MULT ZL_REG(13, 0x08, 4) +#define ZL_REG_SYNTH_FREQ_M ZL_REG(13, 0x0c, 2) +#define ZL_REG_SYNTH_FREQ_N ZL_REG(13, 0x0e, 2) + +/********************************** + * Register Page 14, Output Mailbox + **********************************/ +#define ZL_REG_OUTPUT_MB_MASK ZL_REG(14, 0x02, 2) + +#define ZL_REG_OUTPUT_MB_SEM ZL_REG(14, 0x04, 1) +#define ZL_OUTPUT_MB_SEM_WR BIT(0) +#define ZL_OUTPUT_MB_SEM_RD BIT(1) + +#define ZL_REG_OUTPUT_MODE ZL_REG(14, 0x05, 1) +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT GENMASK(7, 4) +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_DISABLED 0 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_LVDS 1 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_DIFF 2 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_LOWVCM 3 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_2 4 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_1P 5 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_1N 6 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_INV 7 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV 12 +#define ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV 15 + +#define ZL_REG_OUTPUT_DIV ZL_REG(14, 0x0c, 4) +#define ZL_REG_OUTPUT_WIDTH ZL_REG(14, 0x10, 4) +#define ZL_REG_OUTPUT_ESYNC_PERIOD ZL_REG(14, 0x14, 4) +#define ZL_REG_OUTPUT_ESYNC_WIDTH ZL_REG(14, 0x18, 4) + +#endif /* _ZL3073X_REGS_H */ diff --git a/drivers/dpll/zl3073x/spi.c b/drivers/dpll/zl3073x/spi.c new file mode 100644 index 000000000000..af901b4d6dda --- /dev/null +++ b/drivers/dpll/zl3073x/spi.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/dev_printk.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/regmap.h> +#include <linux/spi/spi.h> + +#include "core.h" + +static int zl3073x_spi_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct zl3073x_dev *zldev; + + zldev = zl3073x_devm_alloc(dev); + if (IS_ERR(zldev)) + return PTR_ERR(zldev); + + zldev->regmap = devm_regmap_init_spi(spi, &zl3073x_regmap_config); + if (IS_ERR(zldev->regmap)) + return dev_err_probe(dev, PTR_ERR(zldev->regmap), + "Failed to initialize regmap\n"); + + return zl3073x_dev_probe(zldev, spi_get_device_match_data(spi)); +} + +static const struct spi_device_id zl3073x_spi_id[] = { + { + .name = "zl30731", + .driver_data = (kernel_ulong_t)&zl30731_chip_info + }, + { + .name = "zl30732", + .driver_data = (kernel_ulong_t)&zl30732_chip_info, + }, + { + .name = "zl30733", + .driver_data = (kernel_ulong_t)&zl30733_chip_info, + }, + { + .name = "zl30734", + .driver_data = (kernel_ulong_t)&zl30734_chip_info, + }, + { + .name = "zl30735", + .driver_data = (kernel_ulong_t)&zl30735_chip_info, + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(spi, zl3073x_spi_id); + +static const struct of_device_id zl3073x_spi_of_match[] = { + { .compatible = "microchip,zl30731", .data = &zl30731_chip_info }, + { .compatible = "microchip,zl30732", .data = &zl30732_chip_info }, + { .compatible = "microchip,zl30733", .data = &zl30733_chip_info }, + { .compatible = "microchip,zl30734", .data = &zl30734_chip_info }, + { .compatible = "microchip,zl30735", .data = &zl30735_chip_info }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, zl3073x_spi_of_match); + +static struct spi_driver zl3073x_spi_driver = { + .driver = { + .name = "zl3073x-spi", + .of_match_table = zl3073x_spi_of_match, + }, + .probe = zl3073x_spi_probe, + .id_table = zl3073x_spi_id, +}; +module_spi_driver(zl3073x_spi_driver); + +MODULE_AUTHOR("Ivan Vecera <ivecera@redhat.com>"); +MODULE_DESCRIPTION("Microchip ZL3073x SPI driver"); +MODULE_IMPORT_NS("ZL3073X"); +MODULE_LICENSE("GPL"); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index b681c0663203..07f1e9dc1ca7 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_PRIMARY; - /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_SECONDARY; + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_SECONDARY; @@ -1230,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) return cs_mode; } -static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, - int csrow_nr, int dimm) +static int calculate_cs_size(u32 mask, unsigned int cs_mode) { - u32 msb, weight, num_zero_bits; - u32 addr_mask_deinterleaved; - int size = 0; + int msb, weight, num_zero_bits; + u32 deinterleaved_mask; + + if (!mask) + return 0; /* * The number of zero bits in the mask is equal to the number of bits @@ -1248,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, * without swapping with the most significant bit. This can be handled * by keeping the MSB where it is and ignoring the single zero bit. */ - msb = fls(addr_mask_orig) - 1; - weight = hweight_long(addr_mask_orig); + msb = fls(mask) - 1; + weight = hweight_long(mask); num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); /* Take the number of zero bits off from the top of the mask. */ - addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + deinterleaved_mask = GENMASK(msb - num_zero_bits, 1); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask); + + return (deinterleaved_mask >> 2) + 1; +} + +static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec, + unsigned int cs_mode, int csrow_nr, int dimm) +{ + int size; edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); - edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); - edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask); /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - size = (addr_mask_deinterleaved >> 2) + 1; + size = calculate_cs_size(addr_mask, cs_mode); + + edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec); + size += calculate_cs_size(addr_mask_sec, cs_mode); /* Return size in MBs. */ return size >> 10; @@ -1269,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { + u32 addr_mask = 0, addr_mask_sec = 0; int cs_mask_nr = csrow_nr; - u32 addr_mask_orig; int dimm, size = 0; /* No Chip Selects are enabled. */ @@ -1308,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, if (!pvt->flags.zn_regs_v2) cs_mask_nr >>= 1; - /* Asymmetric dual-rank DIMM support. */ - if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; - else - addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; + if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY)) + addr_mask = pvt->csels[umc].csmasks[cs_mask_nr]; + + if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY)) + addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr]; - return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm); + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm); } static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) @@ -3512,9 +3526,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err) static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { - u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr]; + u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr]; + u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr]; - return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1); + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1); } static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c index 1d51838a60c1..51c451c7f0f0 100755 --- a/drivers/edac/ecs.c +++ b/drivers/edac/ecs.c @@ -170,8 +170,10 @@ static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru); fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru); - for (i = 0; i < ECS_MAX_ATTRS; i++) + for (i = 0; i < ECS_MAX_ATTRS; i++) { + sysfs_attr_init(&fru_ctx->dev_attr[i].dev_attr.attr); fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr; + } sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru); group->name = fru_ctx->name; diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c index d1a8caa85369..70a033a76233 100755 --- a/drivers/edac/mem_repair.c +++ b/drivers/edac/mem_repair.c @@ -333,6 +333,7 @@ static int mem_repair_create_desc(struct device *dev, for (i = 0; i < MR_MAX_ATTRS; i++) { memcpy(&ctx->mem_repair_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + sysfs_attr_init(&ctx->mem_repair_dev_attr[i].dev_attr.attr); ctx->mem_repair_attrs[i] = &ctx->mem_repair_dev_attr[i].dev_attr.attr; } diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c index e421d3ebd959..f9d02af2fc3a 100755 --- a/drivers/edac/scrub.c +++ b/drivers/edac/scrub.c @@ -176,6 +176,7 @@ static int scrub_create_desc(struct device *scrub_dev, group = &scrub_ctx->group; for (i = 0; i < SCRUB_MAX_ATTRS; i++) { memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + sysfs_attr_init(&scrub_ctx->scrub_dev_attr[i].dev_attr.attr); scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr; } sprintf(scrub_ctx->name, "%s%d", "scrub", instance); diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index fe55613a8ea9..37eb2e6c2f9f 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -110,7 +110,7 @@ struct ffa_drv_info { struct work_struct sched_recv_irq_work; struct xarray partition_info; DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS)); - struct mutex notify_lock; /* lock to protect notifier hashtable */ + rwlock_t notify_lock; /* lock to protect notifier hashtable */ }; static struct ffa_drv_info *drv_info; @@ -1250,13 +1250,12 @@ notifier_hnode_get_by_type(u16 notify_id, enum notify_type type) return NULL; } -static int -update_notifier_cb(struct ffa_device *dev, int notify_id, void *cb, - void *cb_data, bool is_registration, bool is_framework) +static int update_notifier_cb(struct ffa_device *dev, int notify_id, + struct notifier_cb_info *cb, bool is_framework) { struct notifier_cb_info *cb_info = NULL; enum notify_type type = ffa_notify_type_get(dev->vm_id); - bool cb_found; + bool cb_found, is_registration = !!cb; if (is_framework) cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, dev->vm_id, @@ -1270,20 +1269,10 @@ update_notifier_cb(struct ffa_device *dev, int notify_id, void *cb, return -EINVAL; if (is_registration) { - cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL); - if (!cb_info) - return -ENOMEM; - - cb_info->dev = dev; - cb_info->cb_data = cb_data; - if (is_framework) - cb_info->fwk_cb = cb; - else - cb_info->cb = cb; - - hash_add(drv_info->notifier_hash, &cb_info->hnode, notify_id); + hash_add(drv_info->notifier_hash, &cb->hnode, notify_id); } else { hash_del(&cb_info->hnode); + kfree(cb_info); } return 0; @@ -1300,20 +1289,19 @@ static int __ffa_notify_relinquish(struct ffa_device *dev, int notify_id, if (notify_id >= FFA_MAX_NOTIFICATIONS) return -EINVAL; - mutex_lock(&drv_info->notify_lock); + write_lock(&drv_info->notify_lock); - rc = update_notifier_cb(dev, notify_id, NULL, NULL, false, - is_framework); + rc = update_notifier_cb(dev, notify_id, NULL, is_framework); if (rc) { pr_err("Could not unregister notification callback\n"); - mutex_unlock(&drv_info->notify_lock); + write_unlock(&drv_info->notify_lock); return rc; } if (!is_framework) rc = ffa_notification_unbind(dev->vm_id, BIT(notify_id)); - mutex_unlock(&drv_info->notify_lock); + write_unlock(&drv_info->notify_lock); return rc; } @@ -1334,6 +1322,7 @@ static int __ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, { int rc; u32 flags = 0; + struct notifier_cb_info *cb_info = NULL; if (ffa_notifications_disabled()) return -EOPNOTSUPP; @@ -1341,28 +1330,40 @@ static int __ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, if (notify_id >= FFA_MAX_NOTIFICATIONS) return -EINVAL; - mutex_lock(&drv_info->notify_lock); + cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL); + if (!cb_info) + return -ENOMEM; + + cb_info->dev = dev; + cb_info->cb_data = cb_data; + if (is_framework) + cb_info->fwk_cb = cb; + else + cb_info->cb = cb; + + write_lock(&drv_info->notify_lock); if (!is_framework) { if (is_per_vcpu) flags = PER_VCPU_NOTIFICATION_FLAG; rc = ffa_notification_bind(dev->vm_id, BIT(notify_id), flags); - if (rc) { - mutex_unlock(&drv_info->notify_lock); - return rc; - } + if (rc) + goto out_unlock_free; } - rc = update_notifier_cb(dev, notify_id, cb, cb_data, true, - is_framework); + rc = update_notifier_cb(dev, notify_id, cb_info, is_framework); if (rc) { pr_err("Failed to register callback for %d - %d\n", notify_id, rc); if (!is_framework) ffa_notification_unbind(dev->vm_id, BIT(notify_id)); } - mutex_unlock(&drv_info->notify_lock); + +out_unlock_free: + write_unlock(&drv_info->notify_lock); + if (rc) + kfree(cb_info); return rc; } @@ -1406,9 +1407,9 @@ static void handle_notif_callbacks(u64 bitmap, enum notify_type type) if (!(bitmap & 1)) continue; - mutex_lock(&drv_info->notify_lock); + read_lock(&drv_info->notify_lock); cb_info = notifier_hnode_get_by_type(notify_id, type); - mutex_unlock(&drv_info->notify_lock); + read_unlock(&drv_info->notify_lock); if (cb_info && cb_info->cb) cb_info->cb(notify_id, cb_info->cb_data); @@ -1446,9 +1447,9 @@ static void handle_fwk_notif_callbacks(u32 bitmap) ffa_rx_release(); - mutex_lock(&drv_info->notify_lock); + read_lock(&drv_info->notify_lock); cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, &uuid); - mutex_unlock(&drv_info->notify_lock); + read_unlock(&drv_info->notify_lock); if (cb_info && cb_info->fwk_cb) cb_info->fwk_cb(notify_id, cb_info->cb_data, buf); @@ -1973,7 +1974,7 @@ static void ffa_notifications_setup(void) goto cleanup; hash_init(drv_info->notifier_hash); - mutex_init(&drv_info->notify_lock); + rwlock_init(&drv_info->notify_lock); drv_info->notif_enabled = true; return; diff --git a/drivers/firmware/efi/libstub/zboot.lds b/drivers/firmware/efi/libstub/zboot.lds index c3a166675450..367907eb7d86 100644 --- a/drivers/firmware/efi/libstub/zboot.lds +++ b/drivers/firmware/efi/libstub/zboot.lds @@ -29,14 +29,12 @@ SECTIONS . = _etext; } -#ifdef CONFIG_EFI_SBAT .sbat : ALIGN(4096) { _sbat = .; *(.sbat) _esbat = ALIGN(4096); . = _esbat; } -#endif .data : ALIGN(4096) { _data = .; @@ -60,6 +58,6 @@ SECTIONS PROVIDE(__efistub__gzdata_size = ABSOLUTE(__efistub__gzdata_end - __efistub__gzdata_start)); -PROVIDE(__data_rawsize = ABSOLUTE(_edata - _etext)); -PROVIDE(__data_size = ABSOLUTE(_end - _etext)); +PROVIDE(__data_rawsize = ABSOLUTE(_edata - _data)); +PROVIDE(__data_size = ABSOLUTE(_end - _data)); PROVIDE(__sbat_size = ABSOLUTE(_esbat - _sbat)); diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index e02f14f4bd7c..3a69fe3234c7 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -430,6 +430,9 @@ int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer) return -EOPNOTSUPP; } + msg.chan_id = xfer->acpm_chan_id; + msg.chan_type = EXYNOS_MBOX_CHAN_TYPE_DOORBELL; + scoped_guard(mutex, &achan->tx_lock) { tx_front = readl(achan->tx.front); idx = (tx_front + 1) % achan->qlen; @@ -446,25 +449,15 @@ int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer) /* Advance TX front. */ writel(idx, achan->tx.front); - } - msg.chan_id = xfer->acpm_chan_id; - msg.chan_type = EXYNOS_MBOX_CHAN_TYPE_DOORBELL; - ret = mbox_send_message(achan->chan, (void *)&msg); - if (ret < 0) - return ret; - - ret = acpm_wait_for_message_response(achan, xfer); + ret = mbox_send_message(achan->chan, (void *)&msg); + if (ret < 0) + return ret; - /* - * NOTE: we might prefer not to need the mailbox ticker to manage the - * transfer queueing since the protocol layer queues things by itself. - * Unfortunately, we have to kick the mailbox framework after we have - * received our message. - */ - mbox_client_txdone(achan->chan, ret); + mbox_client_txdone(achan->chan, 0); + } - return ret; + return acpm_wait_for_message_response(achan, xfer); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ca4a6b82817f..df77558e03ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -561,6 +561,13 @@ static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -578,4 +585,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 0f3e2944edd7..e68c0fa8d751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -582,6 +582,13 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev, lower_32_bits(page_table_base)); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -599,4 +606,5 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 85567d0d9545..f5d5c45ddc0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -944,6 +944,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) drm_sched_entity_fini(entity); } } + kref_put(&ctx->refcount, amdgpu_ctx_fini); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a0e9bf9b2710..81b3443c8d7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -321,10 +321,12 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, const struct firmware *fw; int r; - r = request_firmware(&fw, fw_name, adev->dev); + r = firmware_request_nowarn(&fw, fw_name, adev->dev); if (r) { - dev_err(adev->dev, "can't load firmware \"%s\"\n", - fw_name); + if (amdgpu_discovery == 2) + dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name); + else + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name); return r; } @@ -459,16 +461,12 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) /* Read from file if it is the preferred option */ fw_name = amdgpu_discovery_get_fw_name(adev); if (fw_name != NULL) { - dev_info(adev->dev, "use ip discovery information from file"); + drm_dbg(&adev->ddev, "use ip discovery information from file"); r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); - - if (r) { - dev_err(adev->dev, "failed to read ip discovery binary from file\n"); - r = -EINVAL; + if (r) goto out; - } - } else { + drm_dbg(&adev->ddev, "use ip discovery information from memory"); r = amdgpu_discovery_read_binary_from_mem( adev, adev->mman.discovery_bin); if (r) @@ -1338,10 +1336,8 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) int r; r = amdgpu_discovery_init(adev); - if (r) { - DRM_ERROR("amdgpu_discovery_init failed\n"); + if (r) return r; - } wafl_ver = 0; adev->gfx.xcc_mask = 0; @@ -2579,8 +2575,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; default: r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; + if (r) { + drm_err(&adev->ddev, "discovery failed: %d\n", r); + return r; + } amdgpu_discovery_harvest_ip(adev); amdgpu_discovery_get_gfx_info(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d377a7c57d5e..ad9be3656653 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2235,6 +2235,25 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 3, 0): + adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 167 && + adev->gfx.pfp_fw_version >= 196 && + adev->gfx.mec_fw_version >= 474) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(9, 4, 2): adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c9eba537de09..28eb846280dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1630,10 +1630,12 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - r = mes_v11_0_set_hw_resources_1(&adev->mes); - if (r) { - DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); - goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } } r = mes_v11_0_query_sched_status(&adev->mes); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b4f17332d466..6b222630f3fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1742,7 +1742,8 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b) + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index cef68df4c663..bb82c652e4c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1813c3ed0aa6..37f4b5b4a098 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1543,8 +1543,13 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 inst_id = ring->me; + int r; + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_resume(adev, true); - return amdgpu_sdma_reset_engine(adev, inst_id); + return r; } static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 23f97da62808..0b40411b92a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1456,8 +1456,13 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 inst_id = ring->me; + int r; + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_resume(adev, true); - return amdgpu_sdma_reset_engine(adev, inst_id); + return r; } static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 5a70ae17be04..a9bdf8d61d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1374,9 +1374,22 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); - /* add firmware version checks here */ - if (0 && !adev->sdma.disable_uq) - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(6, 0, 0): + if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 2): + if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 3): + if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index ad47d0bdf777..86903eccbd4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1349,9 +1349,15 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); - /* add firmware version checks here */ - if (0 && !adev->sdma.disable_uq) - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + if ((adev->sdma.instance[0].fw_version >= 7836028) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 865dca2547de..a0f22ea6d15a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1171,13 +1171,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start, } static void -svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, - struct svm_range *pchild, enum svm_work_list_ops op) +svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op) { pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", pchild, pchild->start, pchild->last, prange, op); - pchild->work_item.mm = mm; + pchild->work_item.mm = NULL; pchild->work_item.op = op; list_add_tail(&pchild->child_list, &prange->child_list); } @@ -1278,7 +1277,7 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - if (gc_ip_version < IP_VERSION(9, 5, 0)) + if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_UC; else mapping_flags |= AMDGPU_VM_MTYPE_NC; @@ -2394,15 +2393,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, prange->work_item.op != SVM_OP_UNMAP_RANGE) prange->work_item.op = op; } else { - prange->work_item.op = op; - - /* Pairs with mmput in deferred_list_work */ - mmget(mm); - prange->work_item.mm = mm; - list_add_tail(&prange->deferred_list, - &prange->svms->deferred_range_list); - pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", - prange, prange->start, prange->last, op); + /* Pairs with mmput in deferred_list_work. + * If process is exiting and mm is gone, don't update mmu notifier. + */ + if (mmget_not_zero(mm)) { + prange->work_item.mm = mm; + prange->work_item.op = op; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } } spin_unlock(&svms->deferred_list_lock); } @@ -2416,8 +2417,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms) } static void -svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, - struct svm_range *prange, unsigned long start, +svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start, unsigned long last) { struct svm_range *head; @@ -2438,12 +2438,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, svm_range_split(tail, last + 1, tail->last, &head); if (head != prange && tail != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE); } else if (tail != prange) { - svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE); } else if (head != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); } else if (parent != prange) { prange->work_item.op = SVM_OP_UNMAP_RANGE; } @@ -2520,14 +2520,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, l = min(last, pchild->last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); - svm_range_unmap_split(mm, prange, pchild, start, last); + svm_range_unmap_split(prange, pchild, start, last); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); - svm_range_unmap_split(mm, prange, prange, start, last); + svm_range_unmap_split(prange, prange, start, last); if (unmap_parent) svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); @@ -2570,8 +2570,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, if (range->event == MMU_NOTIFY_RELEASE) return true; - if (!mmget_not_zero(mni->mm)) - return true; start = mni->interval_tree.start; last = mni->interval_tree.last; @@ -2598,7 +2596,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, } svm_range_unlock(prange); - mmput(mni->mm); return true; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bc4cd11bfc79..f58fa5da7fe5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3610,13 +3610,15 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) luminance_range = &conn_base->display_info.luminance_range; - if (luminance_range->max_luminance) { - caps->aux_min_input_signal = luminance_range->min_luminance; + if (luminance_range->max_luminance) caps->aux_max_input_signal = luminance_range->max_luminance; - } else { - caps->aux_min_input_signal = 0; + else caps->aux_max_input_signal = 512; - } + + if (luminance_range->min_luminance) + caps->aux_min_input_signal = luminance_range->min_luminance; + else + caps->aux_min_input_signal = 1; min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid); if (min_input_signal_override >= 0) @@ -4718,16 +4720,16 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } -/* Rescale from [min..max] to [0..AMDGPU_MAX_BL_LEVEL] */ +/* Rescale from [min..max] to [0..MAX_BACKLIGHT_LEVEL] */ static inline u32 scale_input_to_fw(int min, int max, u64 input) { - return DIV_ROUND_CLOSEST_ULL(input * AMDGPU_MAX_BL_LEVEL, max - min); + return DIV_ROUND_CLOSEST_ULL(input * MAX_BACKLIGHT_LEVEL, max - min); } -/* Rescale from [0..AMDGPU_MAX_BL_LEVEL] to [min..max] */ +/* Rescale from [0..MAX_BACKLIGHT_LEVEL] to [min..max] */ static inline u32 scale_fw_to_input(int min, int max, u64 input) { - return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), AMDGPU_MAX_BL_LEVEL); + return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), MAX_BACKLIGHT_LEVEL); } static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, @@ -4947,7 +4949,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, caps->ac_level, caps->dc_level); } else - props.brightness = props.max_brightness = AMDGPU_MAX_BL_LEVEL; + props.brightness = props.max_brightness = MAX_BACKLIGHT_LEVEL; if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) drm_info(drm, "Using custom brightness curve\n"); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index d4395b92fb85..9e3e51a2dc49 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1029,6 +1029,10 @@ enum dc_edid_status dm_helpers_read_local_edid( return EDID_NO_RESPONSE; edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw() + if (!edid || + edid->extensions >= sizeof(sink->dc_edid.raw_edid) / EDID_LENGTH) + return EDID_BAD_INPUT; + sink->dc_edid.length = EDID_LENGTH * (edid->extensions + 1); memmove(sink->dc_edid.raw_edid, (uint8_t *)edid, sink->dc_edid.length); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index d562ddeca512..c9f6c6275ca1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -974,6 +974,7 @@ struct dc_crtc_timing { uint32_t pix_clk_100hz; uint32_t min_refresh_in_uhz; + uint32_t max_refresh_in_uhz; uint32_t vic; uint32_t hdmi_vic; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 3ba9b62ba70b..250f09922d2f 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -155,6 +155,14 @@ unsigned int mod_freesync_calc_v_total_from_refresh( v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000000); + } else if (refresh_in_uhz >= stream->timing.max_refresh_in_uhz) { + /* When the target refresh rate is the maximum panel refresh rate + * round up the vtotal value to prevent off-by-one error causing + * v_total_min to be below the panel's lower bound + */ + v_total = div64_u64(div64_u64(((unsigned long long)( + frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), + stream->timing.h_total) + (1000000 - 1), 1000000); } else { v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index b3f588b71a7d..af6f79793407 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -64,10 +64,11 @@ struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, str adev->id = ret; adev->name = "dp_hpd_bridge"; adev->dev.parent = parent; - adev->dev.of_node = of_node_get(parent->of_node); adev->dev.release = drm_aux_hpd_bridge_release; adev->dev.platform_data = of_node_get(np); + device_set_of_node_from_dev(&adev->dev, parent); + ret = auxiliary_device_init(adev); if (ret) { of_node_put(adev->dev.platform_data); diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 79b009ab9396..29b0358a7b6d 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -299,6 +299,7 @@ struct drm_bridge *drm_panel_bridge_add_typed(struct drm_panel *panel, panel_bridge->bridge.of_node = panel->dev->of_node; panel_bridge->bridge.ops = DRM_BRIDGE_OP_MODES; panel_bridge->bridge.type = connector_type; + panel_bridge->bridge.pre_enable_prev_first = panel->prepare_prev_first; drm_bridge_add(&panel_bridge->bridge); @@ -413,8 +414,6 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, return bridge; } - bridge->pre_enable_prev_first = panel->prepare_prev_first; - *ptr = bridge; devres_add(dev, ptr); @@ -456,8 +455,6 @@ struct drm_bridge *drmm_panel_bridge_add(struct drm_device *drm, if (ret) return ERR_PTR(ret); - bridge->pre_enable_prev_first = panel->prepare_prev_first; - return bridge; } EXPORT_SYMBOL(drmm_panel_bridge_add); diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index 0014c497e3fe..bccc88d25948 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -1095,7 +1095,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, bool first = !xfer->tx_done; u32 reg; - dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n", + dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n", xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done); if (length > DSI_TX_FIFO_SIZE) @@ -1293,7 +1293,7 @@ static bool samsung_dsim_transfer_finish(struct samsung_dsim *dsi) spin_unlock_irqrestore(&dsi->transfer_lock, flags); dev_dbg(dsi->dev, - "> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", + "> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len, xfer->rx_done); diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 60224f476e1d..de9c23537465 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -348,12 +348,18 @@ static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 *pdata, * 200 ms. We'll assume that the panel driver will have the hardcoded * delay in its prepare and always disable HPD. * - * If HPD somehow makes sense on some future panel we'll have to - * change this to be conditional on someone specifying that HPD should - * be used. + * For DisplayPort bridge type, we need HPD. So we use the bridge type + * to conditionally disable HPD. + * NOTE: The bridge type is set in ti_sn_bridge_probe() but enable_comms() + * can be called before. So for DisplayPort, HPD will be enabled once + * bridge type is set. We are using bridge type instead of "no-hpd" + * property because it is not used properly in devicetree description + * and hence is unreliable. */ - regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, - HPD_DISABLE); + + if (pdata->bridge.type != DRM_MODE_CONNECTOR_DisplayPort) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, + HPD_DISABLE); pdata->comms_enabled = true; @@ -1195,9 +1201,14 @@ static enum drm_connector_status ti_sn_bridge_detect(struct drm_bridge *bridge) struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); int val = 0; - pm_runtime_get_sync(pdata->dev); + /* + * Runtime reference is grabbed in ti_sn_bridge_hpd_enable() + * as the chip won't report HPD just after being powered on. + * HPD_DEBOUNCED_STATE reflects correct state only after the + * debounce time (~100-400 ms). + */ + regmap_read(pdata->regmap, SN_HPD_DISABLE_REG, &val); - pm_runtime_put_autosuspend(pdata->dev); return val & HPD_DEBOUNCED_STATE ? connector_status_connected : connector_status_disconnected; @@ -1220,6 +1231,26 @@ static void ti_sn65dsi86_debugfs_init(struct drm_bridge *bridge, struct dentry * debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); } +static void ti_sn_bridge_hpd_enable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + /* + * Device needs to be powered on before reading the HPD state + * for reliable hpd detection in ti_sn_bridge_detect() due to + * the high debounce time. + */ + + pm_runtime_get_sync(pdata->dev); +} + +static void ti_sn_bridge_hpd_disable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + pm_runtime_put_autosuspend(pdata->dev); +} + static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .attach = ti_sn_bridge_attach, .detach = ti_sn_bridge_detach, @@ -1234,6 +1265,8 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .debugfs_init = ti_sn65dsi86_debugfs_init, + .hpd_enable = ti_sn_bridge_hpd_enable, + .hpd_disable = ti_sn_bridge_hpd_disable, }; static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata, @@ -1321,8 +1354,26 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, pdata->bridge.type = pdata->next_bridge->type == DRM_MODE_CONNECTOR_DisplayPort ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; - if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) - pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; + if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) { + pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT | + DRM_BRIDGE_OP_HPD; + /* + * If comms were already enabled they would have been enabled + * with the wrong value of HPD_DISABLE. Update it now. Comms + * could be enabled if anyone is holding a pm_runtime reference + * (like if a GPIO is in use). Note that in most cases nobody + * is doing AUX channel xfers before the bridge is added so + * HPD doesn't _really_ matter then. The only exception is in + * the eDP case where the panel wants to read the EDID before + * the bridge is added. We always consistently have HPD disabled + * for eDP. + */ + mutex_lock(&pdata->comms_mutex); + if (pdata->comms_enabled) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, + HPD_DISABLE, 0); + mutex_unlock(&pdata->comms_mutex); + }; drm_bridge_add(&pdata->bridge); diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 7d2e499ea5de..262e93e07a28 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -708,11 +708,14 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (bridge_connector->bridge_hdmi_audio || bridge_connector->bridge_dp_audio) { struct device *dev; + struct drm_bridge *bridge; if (bridge_connector->bridge_hdmi_audio) - dev = bridge_connector->bridge_hdmi_audio->hdmi_audio_dev; + bridge = bridge_connector->bridge_hdmi_audio; else - dev = bridge_connector->bridge_dp_audio->hdmi_audio_dev; + bridge = bridge_connector->bridge_dp_audio; + + dev = bridge->hdmi_audio_dev; ret = drm_connector_hdmi_audio_init(connector, dev, &drm_bridge_connector_hdmi_audio_funcs, diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index f2a6559a2710..dc622c78db9d 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -725,7 +725,7 @@ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, * monitor doesn't power down exactly after the throw away read. */ if (!aux->is_remote) { - ret = drm_dp_dpcd_probe(aux, DP_DPCD_REV); + ret = drm_dp_dpcd_probe(aux, DP_LANE0_1_STATUS); if (ret < 0) return ret; } diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 1e659d2660f7..4bf0a76bb35e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -212,6 +212,35 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_private_object_fini); +static void drm_gem_object_handle_get(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock)); + + if (obj->handle_count++ == 0) + drm_gem_object_get(obj); +} + +/** + * drm_gem_object_handle_get_unlocked - acquire reference on user-space handles + * @obj: GEM object + * + * Acquires a reference on the GEM buffer object's handle. Required + * to keep the GEM object alive. Call drm_gem_object_handle_put_unlocked() + * to release the reference. + */ +void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + guard(mutex)(&dev->object_name_lock); + + drm_WARN_ON(dev, !obj->handle_count); /* first ref taken in create-tail helper */ + drm_gem_object_handle_get(obj); +} +EXPORT_SYMBOL(drm_gem_object_handle_get_unlocked); + /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -242,8 +271,14 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) } } -static void -drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) +/** + * drm_gem_object_handle_put_unlocked - releases reference on user-space handles + * @obj: GEM object + * + * Releases a reference on the GEM buffer object's handle. Possibly releases + * the GEM buffer object and associated dma-buf objects. + */ +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; bool final = false; @@ -268,6 +303,7 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) if (final) drm_gem_object_put(obj); } +EXPORT_SYMBOL(drm_gem_object_handle_put_unlocked); /* * Called at device or object close to release the file's @@ -389,8 +425,8 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); - if (obj->handle_count++ == 0) - drm_gem_object_get(obj); + + drm_gem_object_handle_get(obj); /* * Get the user-visible handle using idr. Preload and perform diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 6f72e7a0f427..14a87788695d 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -99,7 +99,7 @@ void drm_gem_fb_destroy(struct drm_framebuffer *fb) unsigned int i; for (i = 0; i < fb->format->num_planes; i++) - drm_gem_object_put(fb->obj[i]); + drm_gem_object_handle_put_unlocked(fb->obj[i]); drm_framebuffer_cleanup(fb); kfree(fb); @@ -182,8 +182,10 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, if (!objs[i]) { drm_dbg_kms(dev, "Failed to lookup GEM object\n"); ret = -ENOENT; - goto err_gem_object_put; + goto err_gem_object_handle_put_unlocked; } + drm_gem_object_handle_get_unlocked(objs[i]); + drm_gem_object_put(objs[i]); min_size = (height - 1) * mode_cmd->pitches[i] + drm_format_info_min_pitch(info, i, width) @@ -193,22 +195,22 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, drm_dbg_kms(dev, "GEM object size (%zu) smaller than minimum size (%u) for plane %d\n", objs[i]->size, min_size, i); - drm_gem_object_put(objs[i]); + drm_gem_object_handle_put_unlocked(objs[i]); ret = -EINVAL; - goto err_gem_object_put; + goto err_gem_object_handle_put_unlocked; } } ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs); if (ret) - goto err_gem_object_put; + goto err_gem_object_handle_put_unlocked; return 0; -err_gem_object_put: +err_gem_object_handle_put_unlocked: while (i > 0) { --i; - drm_gem_object_put(objs[i]); + drm_gem_object_handle_put_unlocked(objs[i]); } return ret; } diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index e44f28fd81d3..be77d61a16ce 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -161,6 +161,8 @@ void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ int drm_gem_init(struct drm_device *dev); +void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj); +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index e5184a0c2465..21fd647f8ce1 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -91,12 +91,13 @@ static const struct dev_pm_ops mipi_dsi_device_pm_ops = { .restore = pm_generic_restore, }; -static const struct bus_type mipi_dsi_bus_type = { +const struct bus_type mipi_dsi_bus_type = { .name = "mipi-dsi", .match = mipi_dsi_device_match, .uevent = mipi_dsi_uevent, .pm = &mipi_dsi_device_pm_ops, }; +EXPORT_SYMBOL_GPL(mipi_dsi_bus_type); /** * of_find_mipi_dsi_device_by_node() - find the MIPI DSI device matching a diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index edbeab88ff2b..d983ee85cf13 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -343,17 +343,18 @@ EXPORT_SYMBOL(drm_writeback_connector_init_with_encoder); /** * drm_writeback_connector_cleanup - Cleanup the writeback connector * @dev: DRM device - * @wb_connector: Pointer to the writeback connector to clean up + * @data: Pointer to the writeback connector to clean up * * This will decrement the reference counter of blobs and destroy properties. It * will also clean the remaining jobs in this writeback connector. Caution: This helper will not * clean up the attached encoder and the drm_connector. */ static void drm_writeback_connector_cleanup(struct drm_device *dev, - struct drm_writeback_connector *wb_connector) + void *data) { unsigned long flags; struct drm_writeback_job *pos, *n; + struct drm_writeback_connector *wb_connector = data; delete_writeback_properties(dev); drm_property_blob_put(wb_connector->pixel_formats_blob_ptr); @@ -405,7 +406,7 @@ int drmm_writeback_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drmm_add_action_or_reset(dev, (void *)drm_writeback_connector_cleanup, + ret = drmm_add_action_or_reset(dev, drm_writeback_connector_cleanup, wb_connector); if (ret) return ret; diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index f91daefa9d2b..805aa28c1723 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -636,6 +636,10 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id) if (!ctx->drm_dev) goto out; + /* check if crtc and vblank have been initialized properly */ + if (!drm_dev_has_vblank(ctx->drm_dev)) + goto out; + if (!ctx->i80_if) { drm_crtc_handle_vblank(&ctx->crtc->base); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index c394cc702d7d..205c238cc73a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -187,6 +187,7 @@ struct fimd_context { u32 i80ifcon; bool i80_if; bool suspended; + bool dp_clk_enabled; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; atomic_t win_updated; @@ -1047,7 +1048,18 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable) struct fimd_context *ctx = container_of(clk, struct fimd_context, dp_clk); u32 val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; + + if (enable == ctx->dp_clk_enabled) + return; + + if (enable) + pm_runtime_resume_and_get(ctx->dev); + + ctx->dp_clk_enabled = enable; writel(val, ctx->regs + DP_MIE_CLKCON); + + if (!enable) + pm_runtime_put(ctx->dev); } static const struct exynos_drm_crtc_ops fimd_crtc_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 4787fee4696f..d44401a695e2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -174,7 +174,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev, return ERR_PTR(ret); } - DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %pK\n", obj->filp); + DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %p\n", obj->filp); return exynos_gem; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index ea9f66037600..03c8490af4f4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -271,7 +271,7 @@ static inline struct exynos_drm_ipp_task * task->src.rect.h = task->dst.rect.h = UINT_MAX; task->transform.rotation = DRM_MODE_ROTATE_0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %p\n", task); return task; } @@ -339,7 +339,7 @@ static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, } DRM_DEV_DEBUG_DRIVER(task->dev, - "Got task %pK configuration from userspace\n", + "Got task %p configuration from userspace\n", task); return 0; } @@ -394,7 +394,7 @@ static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, struct exynos_drm_ipp_task *task) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %p\n", task); exynos_drm_ipp_task_release_buf(&task->src); exynos_drm_ipp_task_release_buf(&task->dst); @@ -559,7 +559,7 @@ static int exynos_drm_ipp_check_format(struct exynos_drm_ipp_task *task, DRM_EXYNOS_IPP_FORMAT_DESTINATION); if (!fmt) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: %s format not supported\n", + "Task %p: %s format not supported\n", task, buf == src ? "src" : "dst"); return -EINVAL; } @@ -609,7 +609,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) bool rotate = (rotation != DRM_MODE_ROTATE_0); bool scale = false; - DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %p\n", task); if (src->rect.w == UINT_MAX) src->rect.w = src->buf.width; @@ -625,7 +625,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) dst->rect.x + dst->rect.w > (dst->buf.width) || dst->rect.y + dst->rect.h > (dst->buf.height)) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: defined area is outside provided buffers\n", + "Task %p: defined area is outside provided buffers\n", task); return -EINVAL; } @@ -642,7 +642,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && src->buf.fourcc != dst->buf.fourcc)) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: hw capabilities exceeded\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: hw capabilities exceeded\n", task); return -EINVAL; } @@ -655,7 +655,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) if (ret) return ret; - DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %pK: all checks done.\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %p: all checks done.\n", task); return ret; @@ -667,25 +667,25 @@ static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; int ret = 0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %pK\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %p\n", task); ret = exynos_drm_ipp_task_setup_buffer(src, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: src buffer setup failed\n", + "Task %p: src buffer setup failed\n", task); return ret; } ret = exynos_drm_ipp_task_setup_buffer(dst, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: dst buffer setup failed\n", + "Task %p: dst buffer setup failed\n", task); return ret; } - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: buffers prepared.\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: buffers prepared.\n", task); return ret; @@ -764,7 +764,7 @@ void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) struct exynos_drm_ipp *ipp = task->ipp; unsigned long flags; - DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %pK done: %d\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %p done: %d\n", ipp->id, task, ret); spin_lock_irqsave(&ipp->lock, flags); @@ -807,7 +807,7 @@ static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) spin_unlock_irqrestore(&ipp->lock, flags); DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, selected task %pK to run\n", ipp->id, + "ipp: %d, selected task %p to run\n", ipp->id, task); ret = ipp->funcs->commit(ipp, task); @@ -917,14 +917,14 @@ int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, */ if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, nonblocking processing task %pK\n", + "ipp: %d, nonblocking processing task %p\n", ipp->id, task); task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; exynos_drm_ipp_schedule_task(task->ipp, task); ret = 0; } else { - DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %pK\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %p\n", ipp->id, task); exynos_drm_ipp_schedule_task(ipp, task); ret = wait_event_interruptible(ipp->done_wq, diff --git a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c index 74bb3bedf30f..5111bdc3075b 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c +++ b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c @@ -103,8 +103,8 @@ static void get_ana_cp_int_prop(u64 vco_clk, DIV_ROUND_DOWN_ULL(curve_1_interpolated, CURVE0_MULTIPLIER))); ana_cp_int_temp = - DIV_ROUND_CLOSEST_ULL(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1), - CURVE2_MULTIPLIER); + DIV64_U64_ROUND_CLOSEST(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1), + CURVE2_MULTIPLIER); *ana_cp_int = max(1, min(ana_cp_int_temp, 127)); diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 21c1e10caf68..2007bb9d974d 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1589,8 +1589,8 @@ static void vlv_dsi_add_properties(struct intel_connector *connector) static void vlv_dphy_param_init(struct intel_dsi *intel_dsi) { + struct intel_display *display = to_intel_display(&intel_dsi->base); struct intel_connector *connector = intel_dsi->attached_connector; - struct intel_display *display = to_intel_display(connector); struct mipi_config *mipi_config = connector->panel.vbt.dsi.config; u32 tlpx_ns, extra_byte_count, tlpx_ui; u32 ui_num, ui_den; diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 1e925c75fb08..c43febc862dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) if (gt->gsc.intf[intf_id].irq < 0) return; - ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq); if (ret) gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a876a34455f1..2a6d79abf25b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -610,7 +610,6 @@ static int ring_context_alloc(struct intel_context *ce) /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; - ce->timeline = intel_timeline_get(engine->legacy.timeline); GEM_BUG_ON(ce->state); if (engine->context_size) { @@ -623,6 +622,8 @@ static int ring_context_alloc(struct intel_context *ce) ce->state = vma; } + ce->timeline = intel_timeline_get(engine->legacy.timeline); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 990bfaba3ce4..5bc696bfbb0f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -108,7 +108,7 @@ static unsigned int config_bit(const u64 config) return other_bit(config); } -static u32 config_mask(const u64 config) +static __always_inline u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 88870844b5bd..2fb7a9e7efec 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -73,8 +73,8 @@ static int igt_add_request(void *arg) /* Basic preliminary test to create a request and let it loose! */ request = mock_request(rcs0(i915)->kernel_context, HZ / 10); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_add(request); @@ -91,8 +91,8 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_get(request); @@ -160,8 +160,8 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); @@ -219,8 +219,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); intel_context_put(ce); - if (!request) { - err = -ENOMEM; + if (IS_ERR(request)) { + err = PTR_ERR(request); goto err_context_0; } @@ -237,8 +237,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); intel_context_put(ce); - if (!vip) { - err = -ENOMEM; + if (IS_ERR(vip)) { + err = PTR_ERR(vip); goto err_context_1; } diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 09f747228dff..1b0cf073e964 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -35,7 +35,7 @@ mock_request(struct intel_context *ce, unsigned long delay) /* NB the i915->requests slab cache is enlarged to fit mock_request */ request = intel_context_create_request(ce); if (IS_ERR(request)) - return NULL; + return request; request->mock.delay = delay; return request; diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 0a3b26bb4d73..9f81fa960b46 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -26,6 +26,7 @@ #include <linux/i2c.h> #include <linux/media-bus-format.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -136,6 +137,14 @@ struct panel_desc { int connector_type; }; +struct panel_desc_dsi { + struct panel_desc desc; + + unsigned long flags; + enum mipi_dsi_pixel_format format; + unsigned int lanes; +}; + struct panel_simple { struct drm_panel base; @@ -430,10 +439,7 @@ static const struct drm_panel_funcs panel_simple_funcs = { .get_timings = panel_simple_get_timings, }; -static struct panel_desc panel_dpi; - -static int panel_dpi_probe(struct device *dev, - struct panel_simple *panel) +static struct panel_desc *panel_dpi_probe(struct device *dev) { struct display_timing *timing; const struct device_node *np; @@ -445,17 +451,17 @@ static int panel_dpi_probe(struct device *dev, np = dev->of_node; desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); if (!desc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); timing = devm_kzalloc(dev, sizeof(*timing), GFP_KERNEL); if (!timing) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ret = of_get_display_timing(np, "panel-timing", timing); if (ret < 0) { dev_err(dev, "%pOF: no panel-timing node found for \"panel-dpi\" binding\n", np); - return ret; + return ERR_PTR(ret); } desc->timings = timing; @@ -473,9 +479,7 @@ static int panel_dpi_probe(struct device *dev, /* We do not know the connector for the DT node, so guess it */ desc->connector_type = DRM_MODE_CONNECTOR_DPI; - panel->desc = desc; - - return 0; + return desc; } #define PANEL_SIMPLE_BOUNDS_CHECK(to_check, bounds, field) \ @@ -570,8 +574,44 @@ static int panel_simple_override_nondefault_lvds_datamapping(struct device *dev, return 0; } -static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) +static const struct panel_desc *panel_simple_get_desc(struct device *dev) { + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI) && + dev_is_mipi_dsi(dev)) { + const struct panel_desc_dsi *dsi_desc; + + dsi_desc = of_device_get_match_data(dev); + if (!dsi_desc) + return ERR_PTR(-ENODEV); + + return &dsi_desc->desc; + } + + if (dev_is_platform(dev)) { + const struct panel_desc *desc; + + desc = of_device_get_match_data(dev); + if (!desc) { + /* + * panel-dpi probes without a descriptor and + * panel_dpi_probe() will initialize one for us + * based on the device tree. + */ + if (of_device_is_compatible(dev->of_node, "panel-dpi")) + return panel_dpi_probe(dev); + else + return ERR_PTR(-ENODEV); + } + + return desc; + } + + return ERR_PTR(-ENODEV); +} + +static struct panel_simple *panel_simple_probe(struct device *dev) +{ + const struct panel_desc *desc; struct panel_simple *panel; struct display_timing dt; struct device_node *ddc; @@ -579,27 +619,31 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) u32 bus_flags; int err; + desc = panel_simple_get_desc(dev); + if (IS_ERR(desc)) + return ERR_CAST(desc); + panel = devm_drm_panel_alloc(dev, struct panel_simple, base, &panel_simple_funcs, desc->connector_type); if (IS_ERR(panel)) - return PTR_ERR(panel); + return ERR_CAST(panel); panel->desc = desc; panel->supply = devm_regulator_get(dev, "power"); if (IS_ERR(panel->supply)) - return PTR_ERR(panel->supply); + return ERR_CAST(panel->supply); panel->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(panel->enable_gpio)) - return dev_err_probe(dev, PTR_ERR(panel->enable_gpio), - "failed to request GPIO\n"); + return dev_err_cast_probe(dev, panel->enable_gpio, + "failed to request GPIO\n"); err = of_drm_get_panel_orientation(dev->of_node, &panel->orientation); if (err) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); - return err; + return ERR_PTR(err); } ddc = of_parse_phandle(dev->of_node, "ddc-i2c-bus", 0); @@ -608,19 +652,12 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) of_node_put(ddc); if (!panel->ddc) - return -EPROBE_DEFER; + return ERR_PTR(-EPROBE_DEFER); } - if (desc == &panel_dpi) { - /* Handle the generic panel-dpi binding */ - err = panel_dpi_probe(dev, panel); - if (err) - goto free_ddc; - desc = panel->desc; - } else { - if (!of_get_display_timing(dev->of_node, "panel-timing", &dt)) - panel_simple_parse_panel_timing_node(dev, panel, &dt); - } + if (!of_device_is_compatible(dev->of_node, "panel-dpi") && + !of_get_display_timing(dev->of_node, "panel-timing", &dt)) + panel_simple_parse_panel_timing_node(dev, panel, &dt); if (desc->connector_type == DRM_MODE_CONNECTOR_LVDS) { /* Optional data-mapping property for overriding bus format */ @@ -703,7 +740,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) drm_panel_add(&panel->base); - return 0; + return panel; disable_pm_runtime: pm_runtime_dont_use_autosuspend(dev); @@ -712,7 +749,7 @@ free_ddc: if (panel->ddc) put_device(&panel->ddc->dev); - return err; + return ERR_PTR(err); } static void panel_simple_shutdown(struct device *dev) @@ -5367,7 +5404,12 @@ static const struct of_device_id platform_of_match[] = { }, { /* Must be the last entry */ .compatible = "panel-dpi", - .data = &panel_dpi, + + /* + * Explicitly NULL, the panel_desc structure will be + * allocated by panel_dpi_probe(). + */ + .data = NULL, }, { /* sentinel */ } @@ -5376,13 +5418,13 @@ MODULE_DEVICE_TABLE(of, platform_of_match); static int panel_simple_platform_probe(struct platform_device *pdev) { - const struct panel_desc *desc; + struct panel_simple *panel; - desc = of_device_get_match_data(&pdev->dev); - if (!desc) - return -ENODEV; + panel = panel_simple_probe(&pdev->dev); + if (IS_ERR(panel)) + return PTR_ERR(panel); - return panel_simple_probe(&pdev->dev, desc); + return 0; } static void panel_simple_platform_remove(struct platform_device *pdev) @@ -5412,14 +5454,6 @@ static struct platform_driver panel_simple_platform_driver = { .shutdown = panel_simple_platform_shutdown, }; -struct panel_desc_dsi { - struct panel_desc desc; - - unsigned long flags; - enum mipi_dsi_pixel_format format; - unsigned int lanes; -}; - static const struct drm_display_mode auo_b080uan01_mode = { .clock = 154500, .hdisplay = 1200, @@ -5653,16 +5687,14 @@ MODULE_DEVICE_TABLE(of, dsi_of_match); static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi) { const struct panel_desc_dsi *desc; + struct panel_simple *panel; int err; - desc = of_device_get_match_data(&dsi->dev); - if (!desc) - return -ENODEV; - - err = panel_simple_probe(&dsi->dev, &desc->desc); - if (err < 0) - return err; + panel = panel_simple_probe(&dsi->dev); + if (IS_ERR(panel)) + return PTR_ERR(panel); + desc = container_of(panel->desc, struct panel_desc_dsi, desc); dsi->mode_flags = desc->flags; dsi->format = desc->format; dsi->lanes = desc->lanes; diff --git a/drivers/gpu/drm/sysfb/vesadrm.c b/drivers/gpu/drm/sysfb/vesadrm.c index 4d62c78e7d1e..f7532db3831f 100644 --- a/drivers/gpu/drm/sysfb/vesadrm.c +++ b/drivers/gpu/drm/sysfb/vesadrm.c @@ -362,14 +362,19 @@ static struct vesadrm_device *vesadrm_device_create(struct drm_driver *drv, if (!__screen_info_vbe_mode_nonvga(si)) { vesa->cmap_write = vesadrm_vga_cmap_write; -#if defined(CONFIG_X86_32) } else { +#if defined(CONFIG_X86_32) phys_addr_t pmi_base = __screen_info_vesapm_info_base(si); - const u16 *pmi_addr = phys_to_virt(pmi_base); - vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; - vesa->cmap_write = vesadrm_pmi_cmap_write; + if (pmi_base) { + const u16 *pmi_addr = phys_to_virt(pmi_base); + + vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; + vesa->cmap_write = vesadrm_pmi_cmap_write; + } else #endif + if (format->is_color_indexed) + drm_warn(dev, "hardware palette is unchangeable, colors may be incorrect\n"); } #ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 15cab9bda17f..bd90404ea609 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -254,6 +254,13 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); + ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + if (ret) { + dma_resv_unlock(&fbo->base.base._resv); + kfree(fbo); + return ret; + } + if (fbo->base.resource) { ttm_resource_set_bo(fbo->base.resource, &fbo->base); bo->resource = NULL; @@ -262,12 +269,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.bulk_move = NULL; } - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); - if (ret) { - kfree(fbo); - return ret; - } - ttm_bo_get(bo); fbo->bo = bo; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index b51f0b648a08..411e47702f8a 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -101,6 +101,12 @@ enum v3d_gen { V3D_GEN_71 = 71, }; +enum v3d_irq { + V3D_CORE_IRQ, + V3D_HUB_IRQ, + V3D_MAX_IRQS, +}; + struct v3d_dev { struct drm_device drm; @@ -112,6 +118,8 @@ struct v3d_dev { bool single_irq_line; + int irq[V3D_MAX_IRQS]; + struct v3d_perfmon_info perfmon_info; void __iomem *hub_regs; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index d7d16da78db3..37bf5eecdd2c 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -134,6 +134,8 @@ v3d_reset(struct v3d_dev *v3d) if (false) v3d_idle_axi(v3d, 0); + v3d_irq_disable(v3d); + v3d_idle_gca(v3d); v3d_reset_sms(v3d); v3d_reset_v3d(v3d); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 2cca5d3a26a2..a515a301e480 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -260,7 +260,7 @@ v3d_hub_irq(int irq, void *arg) int v3d_irq_init(struct v3d_dev *v3d) { - int irq1, ret, core; + int irq, ret, core; INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); @@ -271,17 +271,24 @@ v3d_irq_init(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); - irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1); - if (irq1 == -EPROBE_DEFER) - return irq1; - if (irq1 > 0) { - ret = devm_request_irq(v3d->drm.dev, irq1, + irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1); + if (irq == -EPROBE_DEFER) + return irq; + if (irq > 0) { + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d_core0", v3d); if (ret) goto fail; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_HUB_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_HUB_IRQ], v3d_hub_irq, IRQF_SHARED, "v3d_hub", v3d); if (ret) @@ -289,8 +296,12 @@ v3d_irq_init(struct v3d_dev *v3d) } else { v3d->single_irq_line = true; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d", v3d); if (ret) @@ -331,6 +342,12 @@ v3d_irq_disable(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + /* Finish any interrupt handler still in flight. */ + for (int i = 0; i < V3D_MAX_IRQS; i++) { + if (v3d->irq[i]) + synchronize_irq(v3d->irq[i]); + } + /* Clear any pending interrupts we might have left. */ for (core = 0; core < v3d->cores; core++) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 0695a342b1ef..5205552b1970 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -749,7 +749,7 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, dev->fifo_mem = devm_memremap(dev->drm.dev, fifo_start, fifo_size, - MEMREMAP_WB); + MEMREMAP_WB | MEMREMAP_DEC); if (IS_ERR(dev->fifo_mem)) { drm_err(&dev->drm, diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index fcc2677a4229..99a91355842e 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE - tristate "Intel Xe Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + tristate "Intel Xe2 Graphics" + depends on DRM && PCI + depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE @@ -46,7 +47,8 @@ config DRM_XE select AUXILIARY_BUS select HMM_MIRROR help - Experimental driver for Intel Xe series GPUs + Driver for Intel Xe2 series GPUs and later. Experimental support + for Xe series is also available. If "M" is selected, the module will be called xe. diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 68f064f33d4b..9f4ade25787a 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -104,6 +104,8 @@ int xe_display_create(struct xe_device *xe) spin_lock_init(&xe->display.fb_tracking.lock); xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); + if (!xe->display.hotplug.dp_wq) + return -ENOMEM; return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); } diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d918ae1c8061..55259969480b 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -333,8 +336,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index 5394a1373a6b..ef2bf984723f 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -40,6 +40,7 @@ #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) #define PWR_LIM_VAL REG_GENMASK(14, 0) #define PWR_LIM_EN REG_BIT(15) +#define PWR_LIM REG_GENMASK(15, 0) #define PWR_LIM_TIME REG_GENMASK(23, 17) #define PWR_LIM_TIME_X REG_GENMASK(23, 22) #define PWR_LIM_TIME_Y REG_GENMASK(21, 17) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c02c4c4e9412..e9f3c1a53db2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -40,6 +40,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" @@ -986,38 +987,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1027,6 +1005,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1057,15 +1036,52 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_WA(root_gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); + tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d61650d4aa0b..95242a375e54 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -9,7 +9,7 @@ #include <drm/drm_drv.h> #define DRIVER_NAME "xe" -#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DESC "Intel Xe2 Graphics" /* Interface history: * diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7062115909f2..2c799958c1e4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -201,6 +201,13 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -257,6 +264,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); if (err) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d0ac48d8f4f7..bbcbb348256f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -34,6 +34,11 @@ #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -186,14 +191,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 3beaaa7b25c1..c0ca61695d76 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,8 +5,11 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> @@ -51,9 +54,12 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -553,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -563,26 +618,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -596,24 +653,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -626,24 +687,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -657,24 +722,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; - - pc->user_requested_max = freq; + guard(mutex)(&pc->freq_lock); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -817,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); int ret; lockdep_assert_held(&pc->freq_lock); @@ -843,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); + if (XE_WA(tile->primary_gt, 14022085890)) + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); + out: return ret; } @@ -868,30 +927,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..52ecdd5ddbff 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..c02053948a57 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9567f6700cf2..2ac87ff4a057 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -891,12 +891,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -1070,7 +1071,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1116,7 +1117,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 74f31639b37f..f008e8049700 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -159,8 +159,8 @@ static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 att return ret; } -static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, - u32 uval) +static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, + u32 clr, u32 set) { struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); u32 val0, val1; @@ -179,7 +179,7 @@ static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 at channel, val0, val1, ret); if (attr == PL1_HWMON_ATTR) - val0 = uval; + val0 = (val0 & ~clr) | set; else return -EIO; @@ -339,7 +339,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe if (hwmon->xe->info.has_mbx_power_limits) { drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", PWR_ATTR_TO_STR(attr), channel); - xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, 0); + xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); @@ -370,10 +370,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe } if (hwmon->xe->info.has_mbx_power_limits) - ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val); + ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); else - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN | PWR_LIM_VAL, - reg_val); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; @@ -563,14 +562,11 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a mutex_lock(&hwmon->hwmon_lock); - if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); - r = (r & ~PWR_LIM_TIME) | rxy; - xe_hwmon_pcode_write_power_limit(hwmon, power_attr, channel, r); - } else { + if (hwmon->xe->info.has_mbx_power_limits) + xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); + else r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), PWR_LIM_TIME, rxy); - } mutex_unlock(&hwmon->hwmon_lock); @@ -1138,12 +1134,12 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) } else { drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); /* Write default limits to read from pcode from now on. */ - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_CARD, - hwmon->pl1_on_boot[CHANNEL_CARD]); - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_PKG, - hwmon->pl1_on_boot[CHANNEL_PKG]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_PKG]); hwmon->scl_shift_power = PWR_UNIT; hwmon->scl_shift_energy = ENERGY_UNIT; hwmon->scl_shift_time = TIME_UNIT; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index bf7c3981897d..6e7b70532d11 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -40,6 +40,7 @@ #define LRC_PPHWSP_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_WA_BB_SIZE SZ_4K static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -910,7 +911,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); xe_bo_unpin_map_no_vm(lrc->bo); - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +static size_t wa_bb_offset(struct xe_lrc *lrc) +{ + return lrc->bo->size - LRC_WA_BB_SIZE; } /* @@ -943,15 +948,16 @@ static void xe_lrc_finish(struct xe_lrc *lrc) #define CONTEXT_ACTIVE 1ULL static int xe_lrc_setup_utilization(struct xe_lrc *lrc) { + const size_t max_size = LRC_WA_BB_SIZE; u32 *cmd, *buf = NULL; - if (lrc->bb_per_ctx_bo->vmap.is_iomem) { - buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (lrc->bo->vmap.is_iomem) { + buf = kmalloc(max_size, GFP_KERNEL); if (!buf) return -ENOMEM; cmd = buf; } else { - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); } *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; @@ -974,13 +980,14 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = MI_BATCH_BUFFER_END; if (buf) { - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, - buf, (cmd - buf) * sizeof(*cmd)); + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, + wa_bb_offset(lrc), buf, + (cmd - buf) * sizeof(*cmd)); kfree(buf); } - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + + wa_bb_offset(lrc) + 1); return 0; } @@ -1018,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, + lrc_size + LRC_WA_BB_SIZE, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, - ttm_bo_type_kernel, - bo_flags); - if (IS_ERR(lrc->bb_per_ctx_bo)) { - err = PTR_ERR(lrc->bb_per_ctx_bo); - goto err_lrc_finish; - } - lrc->size = lrc_size; lrc->ring.size = ring_size; lrc->ring.tail = 0; @@ -1819,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - + LRC_WA_BB_SIZE; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index ae24cf6f8dd9..883e550a9423 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -53,9 +53,6 @@ struct xe_lrc { /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ u64 ctx_timestamp; - - /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ - struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..7acdc4c78866 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -82,7 +82,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE +#define MAX_PTE_PER_SDI 0x1FEU /** * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. @@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9efc5accd43d..69c1d7fc695e 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,7 +21,8 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) -14018094691 GRAPHICS_VERSION(2004) +14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) + GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 18024947630 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) @@ -59,3 +60,7 @@ no_media_l3 MEDIA_VERSION(3000) MEDIA_VERSION_RANGE(1301, 3000) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + +# SoC workaround - currently applies to all platforms with the following +# primary GT GMDID +14022085890 GRAPHICS_VERSION(2001) diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c index 6f251b284018..b00687e67ce8 100644 --- a/drivers/hid/hid-appletb-kbd.c +++ b/drivers/hid/hid-appletb-kbd.c @@ -430,14 +430,20 @@ static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id ret = appletb_kbd_set_mode(kbd, appletb_tb_def_mode); if (ret) { dev_err_probe(dev, ret, "Failed to set touchbar mode\n"); - goto close_hw; + goto unregister_handler; } hid_set_drvdata(hdev, kbd); return 0; +unregister_handler: + input_unregister_handler(&kbd->inp_handler); close_hw: + if (kbd->backlight_dev) { + put_device(&kbd->backlight_dev->dev); + timer_delete_sync(&kbd->inactivity_timer); + } hid_hw_close(hdev); stop_hw: hid_hw_stop(hdev); @@ -451,7 +457,10 @@ static void appletb_kbd_remove(struct hid_device *hdev) appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); input_unregister_handler(&kbd->inp_handler); - timer_delete_sync(&kbd->inactivity_timer); + if (kbd->backlight_dev) { + put_device(&kbd->backlight_dev->dev); + timer_delete_sync(&kbd->inactivity_timer); + } hid_hw_close(hdev); hid_hw_stop(hdev); diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 8433306148d5..c6b6b1029540 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3298,8 +3298,8 @@ static const char *keys[KEY_MAX + 1] = { [BTN_TOUCH] = "Touch", [BTN_STYLUS] = "Stylus", [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap", [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap", - [BTN_GEAR_DOWN] = "WheelBtn", - [BTN_GEAR_UP] = "Gear up", [KEY_OK] = "Ok", + [BTN_GEAR_DOWN] = "BtnGearDown", [BTN_GEAR_UP] = "BtnGearUp", + [BTN_WHEEL] = "BtnWheel", [KEY_OK] = "Ok", [KEY_SELECT] = "Select", [KEY_GOTO] = "Goto", [KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2", [KEY_OPTION] = "Option", [KEY_INFO] = "Info", diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index defcf91fdd14..0ad7d25d9864 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -89,7 +89,8 @@ static const __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, break; case USB_DEVICE_ID_ELECOM_M_DT1URBK: case USB_DEVICE_ID_ELECOM_M_DT1DRBK: - case USB_DEVICE_ID_ELECOM_M_HT1URBK: + case USB_DEVICE_ID_ELECOM_M_HT1URBK_010C: + case USB_DEVICE_ID_ELECOM_M_HT1URBK_019B: case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D: /* * Report descriptor format: @@ -122,7 +123,8 @@ static const struct hid_device_id elecom_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, { } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e3fb4e2fe911..33cc5820f2be 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -312,6 +312,8 @@ #define USB_DEVICE_ID_ASUS_AK1D 0x1125 #define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408 #define USB_DEVICE_ID_CHICONY_ACER_SWITCH12 0x1421 +#define USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA 0xb824 +#define USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2 0xb82c #define USB_VENDOR_ID_CHUNGHWAT 0x2247 #define USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH 0x0001 @@ -446,7 +448,8 @@ #define USB_DEVICE_ID_ELECOM_M_XT4DRBK 0x00fd #define USB_DEVICE_ID_ELECOM_M_DT1URBK 0x00fe #define USB_DEVICE_ID_ELECOM_M_DT1DRBK 0x00ff -#define USB_DEVICE_ID_ELECOM_M_HT1URBK 0x010c +#define USB_DEVICE_ID_ELECOM_M_HT1URBK_010C 0x010c +#define USB_DEVICE_ID_ELECOM_M_HT1URBK_019B 0x019b #define USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D 0x010d #define USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C 0x011c @@ -819,6 +822,7 @@ #define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067 #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 +#define USB_DEVICE_ID_LENOVO_X1_TAB2 0x60a4 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 #define USB_DEVICE_ID_LENOVO_X12_TAB 0x60fe #define USB_DEVICE_ID_LENOVO_X12_TAB2 0x61ae @@ -1525,4 +1529,7 @@ #define USB_VENDOR_ID_SIGNOTEC 0x2133 #define USB_DEVICE_ID_SIGNOTEC_VIEWSONIC_PD1011 0x0018 +#define USB_VENDOR_ID_SMARTLINKTECHNOLOGY 0x4c4a +#define USB_DEVICE_ID_SMARTLINKTECHNOLOGY_4155 0x4155 + #endif diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 9d80635a91eb..ff1784b5c2a4 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -2343,7 +2343,7 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } if (list_empty(&hid->inputs)) { - hid_err(hid, "No inputs registered, leaving\n"); + hid_dbg(hid, "No inputs registered, leaving\n"); goto out_unwind; } diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index af29ba840522..b3121fa7a72d 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -492,6 +492,7 @@ static int lenovo_input_mapping(struct hid_device *hdev, case USB_DEVICE_ID_LENOVO_X12_TAB: case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: return lenovo_input_mapping_x1_tab_kbd(hdev, hi, field, usage, bit, max); default: @@ -548,11 +549,14 @@ static void lenovo_features_set_cptkbd(struct hid_device *hdev) /* * Tell the keyboard a driver understands it, and turn F7, F9, F11 into - * regular keys + * regular keys (Compact only) */ - ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); - if (ret) - hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); + if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD || + hdev->product == USB_DEVICE_ID_LENOVO_CBTKBD) { + ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); + if (ret) + hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); + } /* Switch middle button to native mode */ ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01); @@ -605,6 +609,7 @@ static ssize_t attr_fn_lock_store(struct device *dev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_led_set_tp10ubkbd(hdev, TP10UBKBD_FN_LOCK_LED, value); if (ret) @@ -861,6 +866,7 @@ static int lenovo_event(struct hid_device *hdev, struct hid_field *field, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: return lenovo_event_tp10ubkbd(hdev, field, usage, value); default: @@ -1144,6 +1150,7 @@ static int lenovo_led_brightness_set(struct led_classdev *led_cdev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_led_set_tp10ubkbd(hdev, tp10ubkbd_led[led_nr], value); break; @@ -1384,6 +1391,7 @@ static int lenovo_probe(struct hid_device *hdev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_probe_tp10ubkbd(hdev); break; @@ -1473,6 +1481,7 @@ static void lenovo_remove(struct hid_device *hdev) case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: lenovo_remove_tp10ubkbd(hdev); break; @@ -1524,6 +1533,8 @@ static const struct hid_device_id lenovo_devices[] = { { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB2) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X12_TAB) }, diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index b41001e02da7..a1c54ffe02b4 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2132,12 +2132,18 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_GENERIC, USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_7010) }, - /* Lenovo X1 TAB Gen 2 */ + /* Lenovo X1 TAB Gen 1 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) }, + /* Lenovo X1 TAB Gen 2 */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_LENOVO, + USB_DEVICE_ID_LENOVO_X1_TAB2) }, + /* Lenovo X1 TAB Gen 3 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 839d5bcd72b1..fb4985988615 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -308,6 +308,7 @@ enum joycon_ctlr_state { JOYCON_CTLR_STATE_INIT, JOYCON_CTLR_STATE_READ, JOYCON_CTLR_STATE_REMOVED, + JOYCON_CTLR_STATE_SUSPENDED, }; /* Controller type received as part of device info */ @@ -2750,14 +2751,46 @@ static void nintendo_hid_remove(struct hid_device *hdev) static int nintendo_hid_resume(struct hid_device *hdev) { - int ret = joycon_init(hdev); + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + int ret; + + hid_dbg(hdev, "resume\n"); + if (!joycon_using_usb(ctlr)) { + hid_dbg(hdev, "no-op resume for bt ctlr\n"); + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; + return 0; + } + ret = joycon_init(hdev); if (ret) - hid_err(hdev, "Failed to restore controller after resume"); + hid_err(hdev, + "Failed to restore controller after resume: %d\n", + ret); + else + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; return ret; } +static int nintendo_hid_suspend(struct hid_device *hdev, pm_message_t message) +{ + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + + hid_dbg(hdev, "suspend: %d\n", message.event); + /* + * Avoid any blocking loops in suspend/resume transitions. + * + * joycon_enforce_subcmd_rate() can result in repeated retries if for + * whatever reason the controller stops providing input reports. + * + * This has been observed with bluetooth controllers which lose + * connectivity prior to suspend (but not long enough to result in + * complete disconnection). + */ + ctlr->ctlr_state = JOYCON_CTLR_STATE_SUSPENDED; + return 0; +} + #endif static const struct hid_device_id nintendo_hid_devices[] = { @@ -2796,6 +2829,7 @@ static struct hid_driver nintendo_hid_driver = { #ifdef CONFIG_PM .resume = nintendo_hid_resume, + .suspend = nintendo_hid_suspend, #endif }; static int __init nintendo_init(void) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 7fefeb413ec3..9bf9ce8dc803 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -410,7 +410,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, #endif @@ -757,6 +758,8 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AVERMEDIA, USB_DEVICE_ID_AVER_FM_MR800) }, { HID_USB_DEVICE(USB_VENDOR_ID_AXENTIA, USB_DEVICE_ID_AXENTIA_FM_RADIO) }, { HID_USB_DEVICE(USB_VENDOR_ID_BERKSHIRE, USB_DEVICE_ID_BERKSHIRE_PCWD) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CIDC, 0x0103) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI470X) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI4713) }, @@ -904,6 +907,7 @@ static const struct hid_device_id hid_ignore_list[] = { #endif { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) }, { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) }, + { HID_USB_DEVICE(USB_VENDOR_ID_SMARTLINKTECHNOLOGY, USB_DEVICE_ID_SMARTLINKTECHNOLOGY_4155) }, { } }; diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index 07e90d51f073..fa5d68c36313 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -38,6 +38,7 @@ #define PCI_DEVICE_ID_INTEL_ISH_LNL_M 0xA845 #define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345 #define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445 +#define PCI_DEVICE_ID_INTEL_ISH_WCL 0x4D45 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index ff0fc8010072..c57483224db6 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -27,10 +27,12 @@ enum ishtp_driver_data_index { ISHTP_DRIVER_DATA_NONE, ISHTP_DRIVER_DATA_LNL_M, ISHTP_DRIVER_DATA_PTL, + ISHTP_DRIVER_DATA_WCL, }; #define ISH_FW_GEN_LNL_M "lnlm" #define ISH_FW_GEN_PTL "ptl" +#define ISH_FW_GEN_WCL "wcl" #define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin" #define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin" @@ -42,6 +44,9 @@ static struct ishtp_driver_data ishtp_driver_data[] = { [ISHTP_DRIVER_DATA_PTL] = { .fw_generation = ISH_FW_GEN_PTL, }, + [ISHTP_DRIVER_DATA_WCL] = { + .fw_generation = ISH_FW_GEN_WCL, + }, }; static const struct pci_device_id ish_pci_tbl[] = { @@ -67,9 +72,10 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_MTL_P)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_H)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_S)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_LNL_M), .driver_data = ISHTP_DRIVER_DATA_LNL_M}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_H), .driver_data = ISHTP_DRIVER_DATA_PTL}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_P), .driver_data = ISHTP_DRIVER_DATA_PTL}, + {PCI_DEVICE_DATA(INTEL, ISH_LNL_M, ISHTP_DRIVER_DATA_LNL_M)}, + {PCI_DEVICE_DATA(INTEL, ISH_PTL_H, ISHTP_DRIVER_DATA_PTL)}, + {PCI_DEVICE_DATA(INTEL, ISH_PTL_P, ISHTP_DRIVER_DATA_PTL)}, + {PCI_DEVICE_DATA(INTEL, ISH_WCL, ISHTP_DRIVER_DATA_WCL)}, {} }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c index f493df0d5dc4..a63f8c833252 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c @@ -4,6 +4,7 @@ #include <linux/bitfield.h> #include <linux/hid.h> #include <linux/hid-over-i2c.h> +#include <linux/unaligned.h> #include "intel-thc-dev.h" #include "intel-thc-dma.h" @@ -200,6 +201,9 @@ int quicki2c_set_report(struct quicki2c_device *qcdev, u8 report_type, int quicki2c_reset(struct quicki2c_device *qcdev) { + u16 input_reg = le16_to_cpu(qcdev->dev_desc.input_reg); + size_t read_len = HIDI2C_LENGTH_LEN; + u32 prd_len = read_len; int ret; qcdev->reset_ack = false; @@ -213,12 +217,32 @@ int quicki2c_reset(struct quicki2c_device *qcdev) ret = wait_event_interruptible_timeout(qcdev->reset_ack_wq, qcdev->reset_ack, HIDI2C_RESET_TIMEOUT * HZ); - if (ret <= 0 || !qcdev->reset_ack) { + if (qcdev->reset_ack) + return 0; + + /* + * Manually read reset response if it wasn't received, in case reset interrupt + * was missed by touch device or THC hardware. + */ + ret = thc_tic_pio_read(qcdev->thc_hw, input_reg, read_len, &prd_len, + (u32 *)qcdev->input_buf); + if (ret) { + dev_err_once(qcdev->dev, "Read Reset Response failed, ret %d\n", ret); + return ret; + } + + /* + * Check response packet length, it's first 16 bits of packet. + * If response packet length is zero, it's reset response, otherwise not. + */ + if (get_unaligned_le16(qcdev->input_buf)) { dev_err_once(qcdev->dev, "Wait reset response timed out ret:%d timeout:%ds\n", ret, HIDI2C_RESET_TIMEOUT); return -ETIMEDOUT; } + qcdev->reset_ack = true; + return 0; } diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index eaf099b2efdb..9a57504e51a1 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2048,14 +2048,18 @@ static int wacom_initialize_remotes(struct wacom *wacom) remote->remote_dir = kobject_create_and_add("wacom_remote", &wacom->hdev->dev.kobj); - if (!remote->remote_dir) + if (!remote->remote_dir) { + kfifo_free(&remote->remote_fifo); return -ENOMEM; + } error = sysfs_create_files(remote->remote_dir, remote_unpair_attrs); if (error) { hid_err(wacom->hdev, "cannot create sysfs group err: %d\n", error); + kfifo_free(&remote->remote_fifo); + kobject_put(remote->remote_dir); return error; } @@ -2901,6 +2905,7 @@ static void wacom_remove(struct hid_device *hdev) hid_hw_stop(hdev); cancel_delayed_work_sync(&wacom->init_work); + cancel_delayed_work_sync(&wacom->aes_battery_work); cancel_work_sync(&wacom->wireless_work); cancel_work_sync(&wacom->battery_work); cancel_work_sync(&wacom->remote_work); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 48c5ab832009..c8d115b58e44 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -200,7 +200,7 @@ config I2C_ISMT config I2C_PIIX4 tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)" - depends on PCI && HAS_IOPORT && X86 + depends on PCI && HAS_IOPORT select I2C_SMBUS help If you say yes to this option, support will be included for the Intel @@ -1530,7 +1530,7 @@ config I2C_XGENE_SLIMPRO config SCx200_ACB tristate "Geode ACCESS.bus support" - depends on X86_32 && PCI + depends on X86_32 && PCI && HAS_IOPORT help Enable the use of the ACCESS.bus controllers on the Geode SCx200 and SC1100 processors and the CS5535 and CS5536 Geode companion devices. diff --git a/drivers/i2c/busses/i2c-designware-amdisp.c b/drivers/i2c/busses/i2c-designware-amdisp.c index ad6f08338124..450793d5f839 100644 --- a/drivers/i2c/busses/i2c-designware-amdisp.c +++ b/drivers/i2c/busses/i2c-designware-amdisp.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/soc/amd/isp4_misc.h> #include "i2c-designware-core.h" @@ -62,6 +63,7 @@ static int amd_isp_dw_i2c_plat_probe(struct platform_device *pdev) adap = &isp_i2c_dev->adapter; adap->owner = THIS_MODULE; + scnprintf(adap->name, sizeof(adap->name), AMDISP_I2C_ADAP_NAME); ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev)); adap->dev.of_node = pdev->dev.of_node; /* use dynamically allocated adapter id */ diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index c5394229b77f..cbd88ffa5610 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -363,6 +363,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs, dev->msgs = msgs; dev->msgs_num = num_msgs; + dev->msg_write_idx = 0; i2c_dw_xfer_init(dev); /* Initiate messages read/write transaction */ @@ -1042,8 +1043,9 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev) if (ret) return ret; - snprintf(adap->name, sizeof(adap->name), - "Synopsys DesignWare I2C adapter"); + if (!adap->name[0]) + scnprintf(adap->name, sizeof(adap->name), + "Synopsys DesignWare I2C adapter"); adap->retries = 3; adap->algo = &i2c_dw_algo; adap->quirks = &i2c_dw_quirks; diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index e5732b0557fb..205cc132fdec 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1008,7 +1008,7 @@ static inline int i2c_imx_isr_read(struct imx_i2c_struct *i2c_imx) /* setup bus to read data */ temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp &= ~I2CR_MTX; - if (i2c_imx->msg->len - 1) + if ((i2c_imx->msg->len - 1) || (i2c_imx->msg->flags & I2C_M_RECV_LEN)) temp &= ~I2CR_TXAK; imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); @@ -1063,6 +1063,7 @@ static inline void i2c_imx_isr_read_block_data_len(struct imx_i2c_struct *i2c_im wake_up(&i2c_imx->queue); } i2c_imx->msg->len += len; + i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = len; } static irqreturn_t i2c_imx_master_isr(struct imx_i2c_struct *i2c_imx, unsigned int status) diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index f173bda1c98c..c8599733633e 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -435,6 +435,7 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned u8 tx_buf[I2C_SMBUS_BLOCK_MAX + 2]; u8 rx_buf[I2C_SMBUS_BLOCK_MAX + 1]; int num_msgs = 1; + int ret; msgs[CORE_I2C_SMBUS_MSG_WR].addr = addr; msgs[CORE_I2C_SMBUS_MSG_WR].flags = 0; @@ -505,7 +506,10 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned return -EOPNOTSUPP; } - mchp_corei2c_xfer(&idev->adapter, msgs, num_msgs); + ret = mchp_corei2c_xfer(&idev->adapter, msgs, num_msgs); + if (ret < 0) + return ret; + if (read_write == I2C_SMBUS_WRITE || size <= I2C_SMBUS_BYTE_DATA) return 0; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index f1cc26ac5b80..8b01df3cc8e9 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1461,13 +1461,13 @@ omap_i2c_probe(struct platform_device *pdev) if (IS_ERR(mux_state)) { r = PTR_ERR(mux_state); dev_dbg(&pdev->dev, "failed to get I2C mux: %d\n", r); - goto err_disable_pm; + goto err_put_pm; } omap->mux_state = mux_state; r = mux_state_select(omap->mux_state); if (r) { dev_err(&pdev->dev, "failed to select I2C mux: %d\n", r); - goto err_disable_pm; + goto err_put_pm; } } @@ -1515,6 +1515,9 @@ omap_i2c_probe(struct platform_device *pdev) err_unuse_clocks: omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0); + if (omap->mux_state) + mux_state_deselect(omap->mux_state); +err_put_pm: pm_runtime_dont_use_autosuspend(omap->dev); pm_runtime_put_sync(omap->dev); err_disable_pm: diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 9d3a4dc2bd60..ac3bb550303f 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -34,7 +34,7 @@ #include <linux/dmi.h> #include <linux/acpi.h> #include <linux/io.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include "i2c-piix4.h" diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c index 80d45079b763..e0a76fb5bc31 100644 --- a/drivers/i2c/busses/i2c-robotfuzz-osif.c +++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c @@ -111,6 +111,11 @@ static u32 osif_func(struct i2c_adapter *adapter) return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } +/* prevent invalid 0-length usb_control_msg */ +static const struct i2c_adapter_quirks osif_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN_READ, +}; + static const struct i2c_algorithm osif_algorithm = { .xfer = osif_xfer, .functionality = osif_func, @@ -143,6 +148,7 @@ static int osif_probe(struct usb_interface *interface, priv->adapter.owner = THIS_MODULE; priv->adapter.class = I2C_CLASS_HWMON; + priv->adapter.quirks = &osif_quirks; priv->adapter.algo = &osif_algorithm; priv->adapter.algo_data = priv; snprintf(priv->adapter.name, sizeof(priv->adapter.name), diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index a18eab0992a1..57dfe5f1a7d9 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -139,6 +139,11 @@ out: return ret; } +/* prevent invalid 0-length usb_control_msg */ +static const struct i2c_adapter_quirks usb_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN_READ, +}; + /* This is the actual algorithm we define */ static const struct i2c_algorithm usb_algorithm = { .xfer = usb_xfer, @@ -247,6 +252,7 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface, /* setup i2c adapter description */ dev->adapter.owner = THIS_MODULE; dev->adapter.class = I2C_CLASS_HWMON; + dev->adapter.quirks = &usb_quirks; dev->adapter.algo = &usb_algorithm; dev->adapter.algo_data = dev; snprintf(dev->adapter.name, sizeof(dev->adapter.name), diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 9979a351577f..81cf3c902e81 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port, out_unlock: mutex_unlock(&table->lock); if (ret) - pr_warn("%s: unable to add gid %pI6 error=%d\n", - __func__, gid->raw, ret); + pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n", + __func__, gid->raw, ret); return ret; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index c752ae9fad6c..b1c44ec1a3f3 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,6 +76,17 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp, end = ALIGN(end, page_size); if (unlikely(end < page_size)) return -EOVERFLOW; + /* + * The mmu notifier can be called within reclaim contexts and takes the + * umem_mutex. This is rare to trigger in testing, teach lockdep about + * it. + */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + mutex_lock(&umem_odp->umem_mutex); + mutex_unlock(&umem_odp->umem_mutex); + fs_reclaim_release(GFP_KERNEL); + } nr_entries = (end - start) >> PAGE_SHIFT; if (!(nr_entries * PAGE_SIZE / page_size)) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index b847084dcd99..a506fafd2b15 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -398,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, return ret; /* We don't expose device counters over Vports */ - if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) + if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) goto done; if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { @@ -418,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, */ goto done; } - ret = mlx5_lag_query_cong_counters(dev->mdev, + ret = mlx5_lag_query_cong_counters(mdev, stats->value + cnts->num_q_counters, cnts->num_cong_counters, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2479da8620ca..843dcd312242 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1958,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, /* Level1 is valid for future use, no need to free */ return -ENOMEM; + INIT_LIST_HEAD(&obj_event->obj_sub_list); err = xa_insert(&event->object_ids, key_level2, obj_event, @@ -1966,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, kfree(obj_event); return err; } - INIT_LIST_HEAD(&obj_event->obj_sub_list); } return 0; @@ -2669,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd) void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) { - struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS]; + struct mlx5_async_cmd *async_cmd; struct ib_ucontext *ucontext = ufile->ucontext; struct ib_device *device = ucontext->device; struct mlx5_ib_dev *dev = to_mdev(device); @@ -2678,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) int head = 0; int tail = 0; + async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL); + if (!async_cmd) + return; + list_for_each_entry(uobject, &ufile->uobjects, list) { WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE)); @@ -2713,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]); head++; } + + kfree(async_cmd); } static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c index b4c97fb62abf..9ded2b7c1e31 100644 --- a/drivers/infiniband/hw/mlx5/dm.c +++ b/drivers/infiniband/hw/mlx5/dm.c @@ -282,7 +282,7 @@ static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx, int err; u64 address; - if (!MLX5_CAP_DEV_MEM(dm_db->dev, memic)) + if (!dm_db || !MLX5_CAP_DEV_MEM(dm_db->dev, memic)) return ERR_PTR(-EOPNOTSUPP); dm = kzalloc(sizeof(*dm), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ce7610740412..df6557ddbdfc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1791,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, context->devx_uid); } +static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int err; + + err = mlx5_nic_vport_update_local_lb(master, true); + if (err) + return err; + + err = mlx5_nic_vport_update_local_lb(slave, true); + if (err) + goto out; + + return 0; + +out: + mlx5_nic_vport_update_local_lb(master, false); + return err; +} + +static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + mlx5_nic_vport_update_local_lb(slave, false); + mlx5_nic_vport_update_local_lb(master, false); +} + int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) { int err = 0; @@ -3495,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev); + mlx5_core_mp_event_replay(ibdev->mdev, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, NULL); @@ -3590,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, MLX5_DRIVER_EVENT_AFFILIATION_DONE, &key); + err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev); + if (err) + goto unbind; + return true; unbind: diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 57f9bc2a4a3a..bd35e75d9ce5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -2027,23 +2027,50 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) } } -static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) +static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; - bool is_odp = is_odp_mr(mr); bool is_odp_dma_buf = is_dmabuf_mr(mr) && - !to_ib_umem_dmabuf(mr->umem)->pinned; - bool from_cache = !!ent; - int ret = 0; + !to_ib_umem_dmabuf(mr->umem)->pinned; + bool is_odp = is_odp_mr(mr); + int ret; if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); if (is_odp_dma_buf) - dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); + + ret = mlx5r_umr_revoke_mr(mr); + + if (is_odp) { + if (!ret) + to_ib_umem_odp(mr->umem)->private = NULL; + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } - if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { + return ret; +} + +static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) +{ + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + bool is_odp = is_odp_mr(mr); + bool from_cache = !!ent; + int ret; + + if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && + !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ spin_lock_irq(&ent->mkeys_queue.lock); @@ -2055,7 +2082,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) ent->tmp_cleanup_scheduled = true; } spin_unlock_irq(&ent->mkeys_queue.lock); - goto out; + return 0; } if (ent) { @@ -2064,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) mr->mmkey.cache_ent = NULL; spin_unlock_irq(&ent->mkeys_queue.lock); } + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); ret = destroy_mkey(dev, mr); -out: if (is_odp) { if (!ret) to_ib_umem_odp(mr->umem)->private = NULL; @@ -2075,9 +2108,9 @@ out: if (is_odp_dma_buf) { if (!ret) to_ib_umem_dmabuf(mr->umem)->private = NULL; - dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); } - return ret; } @@ -2126,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) } /* Stop DMA */ - rc = mlx5_revoke_mr(mr); + rc = mlx5r_handle_mkey_cleanup(mr); if (rc) return rc; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index eaa2f9f5f3a9..f6abd64f07f7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) } if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault)) - __xa_erase(&mr_to_mdev(mr)->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key)); xa_unlock(&imr->implicit_children); /* Freeing a MR is a sleeping operation, so bounce to a work queue */ @@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, } if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) { - ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL); + ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL); if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); __xa_erase(&imr->implicit_children, idx); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1378651735f6..23ed2fc688f0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3705,9 +3705,10 @@ static ssize_t add_target_store(struct device *dev, target_host->max_id = 1; target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; - target_host->max_segment_size = ib_dma_max_seg_size(ibdev); - if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) + if (ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + target_host->max_segment_size = ib_dma_max_seg_size(ibdev); + else target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; target = host_to_target(target_host); diff --git a/drivers/input/joystick/fsia6b.c b/drivers/input/joystick/fsia6b.c index 76ffdec5c183..7e3bc99d766f 100644 --- a/drivers/input/joystick/fsia6b.c +++ b/drivers/input/joystick/fsia6b.c @@ -149,7 +149,7 @@ static int fsia6b_serio_connect(struct serio *serio, struct serio_driver *drv) } fsia6b->dev = input_dev; - snprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys); + scnprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys); input_dev->name = DRIVER_DESC; input_dev->phys = fsia6b->phys; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c066a4da7c14..5d9b7007a730 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -177,6 +177,7 @@ static const struct xpad_device { { 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX }, { 0x05fe, 0x3030, "Chic Controller", 0, XTYPE_XBOX }, { 0x05fe, 0x3031, "Chic Controller", 0, XTYPE_XBOX }, + { 0x0502, 0x1305, "Acer NGR200", 0, XTYPE_XBOX }, { 0x062a, 0x0020, "Logic3 Xbox GamePad", 0, XTYPE_XBOX }, { 0x062a, 0x0033, "Competition Pro Steering Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, @@ -524,6 +525,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */ + XPAD_XBOX360_VENDOR(0x0502), /* Acer Inc. Xbox 360 style controllers */ XPAD_XBOX360_VENDOR(0x056e), /* Elecom JC-U3613M */ XPAD_XBOX360_VENDOR(0x06a3), /* Saitek P3600 */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz Xbox 360 controllers */ @@ -1344,11 +1346,12 @@ static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad) usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor); error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC); if (error) { - dev_err(&xpad->intf->dev, - "%s - usb_submit_urb failed with result %d\n", - __func__, error); + if (error != -ENODEV) + dev_err(&xpad->intf->dev, + "%s - usb_submit_urb failed with result %d\n", + __func__, error); usb_unanchor_urb(xpad->irq_out); - return -EIO; + return error; } xpad->irq_out_active = true; diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 3ff2fcf05ad5..c9e1127578b9 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -1191,8 +1191,8 @@ static void atkbd_set_device_attrs(struct atkbd *atkbd) "AT %s Set %d keyboard", atkbd->translated ? "Translated" : "Raw", atkbd->set); - snprintf(atkbd->phys, sizeof(atkbd->phys), - "%s/input0", atkbd->ps2dev.serio->phys); + scnprintf(atkbd->phys, sizeof(atkbd->phys), + "%s/input0", atkbd->ps2dev.serio->phys); input_dev->name = atkbd->name; input_dev->phys = atkbd->phys; diff --git a/drivers/input/misc/cs40l50-vibra.c b/drivers/input/misc/cs40l50-vibra.c index dce3b0ec8cf3..330f09123631 100644 --- a/drivers/input/misc/cs40l50-vibra.c +++ b/drivers/input/misc/cs40l50-vibra.c @@ -238,6 +238,8 @@ static int cs40l50_upload_owt(struct cs40l50_work *work_data) header.data_words = len / sizeof(u32); new_owt_effect_data = kmalloc(sizeof(header) + len, GFP_KERNEL); + if (!new_owt_effect_data) + return -ENOMEM; memcpy(new_owt_effect_data, &header, sizeof(header)); memcpy(new_owt_effect_data + sizeof(header), work_data->custom_data, len); diff --git a/drivers/input/misc/gpio-beeper.c b/drivers/input/misc/gpio-beeper.c index d2d2954e2f79..3d65cb4f4ef3 100644 --- a/drivers/input/misc/gpio-beeper.c +++ b/drivers/input/misc/gpio-beeper.c @@ -94,7 +94,7 @@ static int gpio_beeper_probe(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id gpio_beeper_of_match[] = { - { .compatible = BEEPER_MODNAME, }, + { .compatible = "gpio-beeper", }, { } }; MODULE_DEVICE_TABLE(of, gpio_beeper_of_match); diff --git a/drivers/input/misc/iqs626a.c b/drivers/input/misc/iqs626a.c index 7a6e6927f331..7fba4a8edceb 100644 --- a/drivers/input/misc/iqs626a.c +++ b/drivers/input/misc/iqs626a.c @@ -771,7 +771,7 @@ static int iqs626_parse_trackpad(struct iqs626_private *iqs626, u8 *thresh = &sys_reg->tp_grp_reg.ch_reg_tp[i].thresh; char tc_name[10]; - snprintf(tc_name, sizeof(tc_name), "channel-%d", i); + scnprintf(tc_name, sizeof(tc_name), "channel-%d", i); struct fwnode_handle *tc_node __free(fwnode_handle) = fwnode_get_named_child_node(ch_node, tc_name); diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c index 80b917944b51..6fac31c0d99f 100644 --- a/drivers/input/misc/iqs7222.c +++ b/drivers/input/misc/iqs7222.c @@ -301,6 +301,7 @@ struct iqs7222_dev_desc { int allow_offset; int event_offset; int comms_offset; + int ext_chan; bool legacy_gesture; struct iqs7222_reg_grp_desc reg_grps[IQS7222_NUM_REG_GRPS]; }; @@ -315,6 +316,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { .allow_offset = 9, .event_offset = 10, .comms_offset = 12, + .ext_chan = 10, .reg_grps = { [IQS7222_REG_GRP_STAT] = { .base = IQS7222_SYS_STATUS, @@ -373,6 +375,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { .allow_offset = 9, .event_offset = 10, .comms_offset = 12, + .ext_chan = 10, .legacy_gesture = true, .reg_grps = { [IQS7222_REG_GRP_STAT] = { @@ -2244,7 +2247,7 @@ static int iqs7222_parse_chan(struct iqs7222_private *iqs7222, const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc; struct i2c_client *client = iqs7222->client; int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row; - int ext_chan = rounddown(num_chan, 10); + int ext_chan = dev_desc->ext_chan ? : num_chan; int error, i; u16 *chan_setup = iqs7222->chan_setup[chan_index]; u16 *sys_setup = iqs7222->sys_setup; @@ -2445,7 +2448,7 @@ static int iqs7222_parse_sldr(struct iqs7222_private *iqs7222, const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc; struct i2c_client *client = iqs7222->client; int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row; - int ext_chan = rounddown(num_chan, 10); + int ext_chan = dev_desc->ext_chan ? : num_chan; int count, error, reg_offset, i; u16 *event_mask = &iqs7222->sys_setup[dev_desc->event_offset]; u16 *sldr_setup = iqs7222->sldr_setup[sldr_index]; diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index be734d65ea72..d0cb9fb94821 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1408,9 +1408,9 @@ static int alps_do_register_bare_ps2_mouse(struct alps_data *priv) return -ENOMEM; } - snprintf(priv->phys3, sizeof(priv->phys3), "%s/%s", - psmouse->ps2dev.serio->phys, - (priv->dev2 ? "input2" : "input1")); + scnprintf(priv->phys3, sizeof(priv->phys3), "%s/%s", + psmouse->ps2dev.serio->phys, + (priv->dev2 ? "input2" : "input1")); dev3->phys = priv->phys3; /* @@ -3103,8 +3103,8 @@ int alps_init(struct psmouse *psmouse) goto init_fail; } - snprintf(priv->phys2, sizeof(priv->phys2), "%s/input1", - psmouse->ps2dev.serio->phys); + scnprintf(priv->phys2, sizeof(priv->phys2), "%s/input1", + psmouse->ps2dev.serio->phys); dev2->phys = priv->phys2; /* diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c index 7147dacc404f..283ef46f039f 100644 --- a/drivers/input/mouse/lifebook.c +++ b/drivers/input/mouse/lifebook.c @@ -279,8 +279,8 @@ static int lifebook_create_relative_device(struct psmouse *psmouse) goto err_out; priv->dev2 = dev2; - snprintf(priv->phys, sizeof(priv->phys), - "%s/input1", psmouse->ps2dev.serio->phys); + scnprintf(priv->phys, sizeof(priv->phys), + "%s/input1", psmouse->ps2dev.serio->phys); dev2->phys = priv->phys; dev2->name = "LBPS/2 Fujitsu Lifebook Touchpad"; diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index a2c9f7144864..77ea7da3b1c5 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -1600,7 +1600,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv) psmouse_pre_receive_byte, psmouse_receive_byte); INIT_DELAYED_WORK(&psmouse->resync_work, psmouse_resync); psmouse->dev = input_dev; - snprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys); + scnprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys); psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 91a2b584dab1..196905162945 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -105,7 +105,6 @@ config TOUCHSCREEN_ADC config TOUCHSCREEN_APPLE_Z2 tristate "Apple Z2 touchscreens" - default ARCH_APPLE depends on SPI && (ARCH_APPLE || COMPILE_TEST) help Say Y here if you have an ARM Apple device with diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c index a6946e3d8376..869884219908 100644 --- a/drivers/input/touchscreen/melfas_mip4.c +++ b/drivers/input/touchscreen/melfas_mip4.c @@ -1554,7 +1554,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(mip4_pm_ops, mip4_suspend, mip4_resume); #ifdef CONFIG_OF static const struct of_device_id mip4_of_match[] = { - { .compatible = "melfas,"MIP4_DEVICE_NAME, }, + { .compatible = "melfas,mip4_ts", }, { }, }; MODULE_DEVICE_TABLE(of, mip4_of_match); diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index fc35cba59145..47692cbfaabd 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, } /* Assign a cache tag with specified type to domain. */ -static int cache_tag_assign(struct dmar_domain *domain, u16 did, - struct device *dev, ioasid_t pasid, - enum cache_tag_type type) +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7aa3932251b2..148b944143b8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3780,8 +3780,17 @@ static void intel_iommu_probe_finalize(struct device *dev) !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { iommu_enable_pci_ats(info); + /* Assign a DEVTLB cache tag to the default domain. */ + if (info->ats_enabled && info->domain) { + u16 did = domain_id_iommu(info->domain, iommu); + + if (cache_tag_assign(info->domain, did, dev, + IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) + iommu_disable_pci_ats(info); + } + } iommu_enable_pci_pri(info); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 3ddbcc603de2..2d1afab5eedc 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1289,6 +1289,8 @@ struct cache_tag { unsigned int users; }; +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type); int cache_tag_assign_domain(struct dmar_domain *domain, struct device *dev, ioasid_t pasid); void cache_tag_unassign_domain(struct dmar_domain *domain, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 22f74ba33a0e..e6bb3c784017 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1157,7 +1157,6 @@ static int rk_iommu_of_xlate(struct device *dev, return -ENOMEM; data->iommu = platform_get_drvdata(iommu_dev); - data->iommu->domain = &rk_identity_domain; dev_iommu_priv_set(dev, data); platform_device_put(iommu_dev); @@ -1195,6 +1194,8 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!iommu) return -ENOMEM; + iommu->domain = &rk_identity_domain; + platform_set_drvdata(pdev, iommu); iommu->dev = dev; iommu->num_mmu = 0; diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 0d196e447142..c3928ef79344 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -74,6 +74,7 @@ config ARM_VIC_NR config IRQ_MSI_LIB bool + select GENERIC_MSI_IRQ config ARMADA_370_XP_IRQ bool diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c index 488e346047c1..77230fbe07be 100644 --- a/drivers/mfd/88pm860x-core.c +++ b/drivers/mfd/88pm860x-core.c @@ -573,7 +573,6 @@ static int device_irq_init(struct pm860x_chip *chip, unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; int data, mask, ret = -EINVAL; int nr_irqs, irq_base = -1; - struct device_node *node = i2c->dev.of_node; mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR | PM8607_B0_MISC1_INT_MASK; @@ -624,7 +623,7 @@ static int device_irq_init(struct pm860x_chip *chip, ret = -EBUSY; goto out; } - irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, chip->irq_base, 0, + irq_domain_create_legacy(dev_fwnode(&i2c->dev), nr_irqs, chip->irq_base, 0, &pm860x_irq_domain_ops, chip); chip->core_irq = i2c->irq; if (!chip->core_irq) diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c index 78b16c67a5fc..25377dcce60e 100644 --- a/drivers/mfd/max8925-core.c +++ b/drivers/mfd/max8925-core.c @@ -656,7 +656,6 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq, { unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; int ret; - struct device_node *node = chip->dev->of_node; /* clear all interrupts */ max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1); @@ -682,8 +681,9 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq, return -EBUSY; } - irq_domain_create_legacy(of_fwnode_handle(node), MAX8925_NR_IRQS, chip->irq_base, 0, - &max8925_irq_domain_ops, chip); + irq_domain_create_legacy(dev_fwnode(chip->dev), MAX8925_NR_IRQS, + chip->irq_base, 0, &max8925_irq_domain_ops, + chip); /* request irq handler for pmic main irq*/ chip->core_irq = irq; diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 232c2bfe8c18..d3ab40651307 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -676,7 +676,6 @@ int twl4030_init_irq(struct device *dev, int irq_num) static struct irq_chip twl4030_irq_chip; int status, i; int irq_base, irq_end, nr_irqs; - struct device_node *node = dev->of_node; /* * TWL core and pwr interrupts must be contiguous because @@ -691,7 +690,7 @@ int twl4030_init_irq(struct device *dev, int irq_num) return irq_base; } - irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, irq_base, 0, + irq_domain_create_legacy(dev_fwnode(dev), nr_irqs, irq_base, 0, &irq_domain_simple_ops, NULL); irq_end = irq_base + TWL4030_CORE_NR_IRQS; diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 7f893bafaa60..c417ed34c057 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -44,6 +44,12 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = { 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd, MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY), + /* + * Some SD cards reports discard support while they don't + */ + MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, + MMC_QUIRK_BROKEN_SD_DISCARD), + END_FIXUP }; @@ -147,12 +153,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, MMC_QUIRK_TRIM_BROKEN), - /* - * Some SD cards reports discard support while they don't - */ - MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, - MMC_QUIRK_BROKEN_SD_DISCARD), - END_FIXUP }; diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c index 1c31d0dfa961..de17d1611290 100644 --- a/drivers/mmc/core/sd_uhs2.c +++ b/drivers/mmc/core/sd_uhs2.c @@ -91,8 +91,8 @@ static int sd_uhs2_phy_init(struct mmc_host *host) err = host->ops->uhs2_control(host, UHS2_PHY_INIT); if (err) { - pr_err("%s: failed to initial phy for UHS-II!\n", - mmc_hostname(host)); + pr_debug("%s: failed to initial phy for UHS-II!\n", + mmc_hostname(host)); } return err; diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 31eb90536bce..d7020e06dd55 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -846,12 +846,18 @@ static inline void msdc_dma_setup(struct msdc_host *host, struct msdc_dma *dma, static void msdc_prepare_data(struct msdc_host *host, struct mmc_data *data) { if (!(data->host_cookie & MSDC_PREPARE_FLAG)) { - data->host_cookie |= MSDC_PREPARE_FLAG; data->sg_count = dma_map_sg(host->dev, data->sg, data->sg_len, mmc_get_dma_dir(data)); + if (data->sg_count) + data->host_cookie |= MSDC_PREPARE_FLAG; } } +static bool msdc_data_prepared(struct mmc_data *data) +{ + return data->host_cookie & MSDC_PREPARE_FLAG; +} + static void msdc_unprepare_data(struct msdc_host *host, struct mmc_data *data) { if (data->host_cookie & MSDC_ASYNC_FLAG) @@ -1483,8 +1489,19 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq) WARN_ON(!host->hsq_en && host->mrq); host->mrq = mrq; - if (mrq->data) + if (mrq->data) { msdc_prepare_data(host, mrq->data); + if (!msdc_data_prepared(mrq->data)) { + host->mrq = NULL; + /* + * Failed to prepare DMA area, fail fast before + * starting any commands. + */ + mrq->cmd->error = -ENOSPC; + mmc_request_done(mmc_from_priv(host), mrq); + return; + } + } /* if SBC is required, we have HW option and SW option. * if HW option is enabled, and SBC does not have "special" flags, diff --git a/drivers/mmc/host/sdhci-of-k1.c b/drivers/mmc/host/sdhci-of-k1.c index 6880d3e9ab62..2e5da7c5834c 100644 --- a/drivers/mmc/host/sdhci-of-k1.c +++ b/drivers/mmc/host/sdhci-of-k1.c @@ -276,7 +276,8 @@ static int spacemit_sdhci_probe(struct platform_device *pdev) host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY; - if (spacemit_sdhci_get_clocks(dev, pltfm_host)) + ret = spacemit_sdhci_get_clocks(dev, pltfm_host); + if (ret) goto err_pltfm; ret = sdhci_add_host(host); diff --git a/drivers/mmc/host/sdhci-uhs2.c b/drivers/mmc/host/sdhci-uhs2.c index c53b64d50c0d..0efeb9d0c376 100644 --- a/drivers/mmc/host/sdhci-uhs2.c +++ b/drivers/mmc/host/sdhci-uhs2.c @@ -99,8 +99,8 @@ void sdhci_uhs2_reset(struct sdhci_host *host, u16 mask) /* hw clears the bit when it's done */ if (read_poll_timeout_atomic(sdhci_readw, val, !(val & mask), 10, UHS2_RESET_TIMEOUT_100MS, true, host, SDHCI_UHS2_SW_RESET)) { - pr_warn("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__, - mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc)); + pr_debug("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__, + mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc)); sdhci_writeb(host, 0, SDHCI_UHS2_SW_RESET); return; } @@ -335,8 +335,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IF_DETECT), 100, UHS2_INTERFACE_DETECT_TIMEOUT_100MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 interface detect timeout in 100ms"); return -EIO; } @@ -345,8 +345,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_LANE_SYNC), 100, UHS2_LANE_SYNC_TIMEOUT_150MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 Lane sync fail in 150ms"); return -EIO; } @@ -417,12 +417,12 @@ static int sdhci_uhs2_do_detect_init(struct mmc_host *mmc) host->ops->uhs2_pre_detect_init(host); if (sdhci_uhs2_interface_detect(host)) { - pr_warn("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc)); + pr_debug("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc)); return -EIO; } if (sdhci_uhs2_init(host)) { - pr_warn("%s: UHS2 init fail.\n", mmc_hostname(host->mmc)); + pr_debug("%s: UHS2 init fail.\n", mmc_hostname(host->mmc)); return -EIO; } @@ -504,8 +504,8 @@ static int sdhci_uhs2_check_dormant(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IN_DORMANT_STATE), 100, UHS2_CHECK_DORMANT_TIMEOUT_100MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 IN_DORMANT fail in 100ms"); return -EIO; } return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index f008167d1863..e116f2db34d5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2065,15 +2065,10 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) host->mmc->actual_clock = 0; - clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); - if (clk & SDHCI_CLOCK_CARD_EN) - sdhci_writew(host, clk & ~SDHCI_CLOCK_CARD_EN, - SDHCI_CLOCK_CONTROL); + sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); - if (clock == 0) { - sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); + if (clock == 0) return; - } clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock); sdhci_enable_clk(host, clk); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index f9d65dd0f2b2..70ada1857a4c 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -900,4 +900,20 @@ void sdhci_switch_external_dma(struct sdhci_host *host, bool en); void sdhci_set_data_timeout_irq(struct sdhci_host *host, bool enable); void __sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd); +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define SDHCI_DBG_ANYWAY 0 +#elif defined(DEBUG) +#define SDHCI_DBG_ANYWAY 1 +#else +#define SDHCI_DBG_ANYWAY 0 +#endif + +#define sdhci_dbg_dumpregs(host, fmt) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) || SDHCI_DBG_ANYWAY) \ + sdhci_dumpregs(host); \ +} while (0) + #endif /* __SDHCI_HW_H */ diff --git a/drivers/mtd/nand/qpic_common.c b/drivers/mtd/nand/qpic_common.c index 4dc4d65e7d32..8e604cc22ca3 100644 --- a/drivers/mtd/nand/qpic_common.c +++ b/drivers/mtd/nand/qpic_common.c @@ -57,14 +57,15 @@ qcom_alloc_bam_transaction(struct qcom_nand_controller *nandc) bam_txn_buf += sizeof(*bam_txn); bam_txn->bam_ce = bam_txn_buf; - bam_txn_buf += - sizeof(*bam_txn->bam_ce) * QPIC_PER_CW_CMD_ELEMENTS * num_cw; + bam_txn->bam_ce_nitems = QPIC_PER_CW_CMD_ELEMENTS * num_cw; + bam_txn_buf += sizeof(*bam_txn->bam_ce) * bam_txn->bam_ce_nitems; bam_txn->cmd_sgl = bam_txn_buf; - bam_txn_buf += - sizeof(*bam_txn->cmd_sgl) * QPIC_PER_CW_CMD_SGL * num_cw; + bam_txn->cmd_sgl_nitems = QPIC_PER_CW_CMD_SGL * num_cw; + bam_txn_buf += sizeof(*bam_txn->cmd_sgl) * bam_txn->cmd_sgl_nitems; bam_txn->data_sgl = bam_txn_buf; + bam_txn->data_sgl_nitems = QPIC_PER_CW_DATA_SGL * num_cw; init_completion(&bam_txn->txn_done); @@ -238,6 +239,11 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, struct bam_transaction *bam_txn = nandc->bam_txn; u32 offset; + if (bam_txn->bam_ce_pos + size > bam_txn->bam_ce_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "CE"); + return -EINVAL; + } + bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_pos]; /* fill the command desc */ @@ -258,6 +264,12 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, /* use the separate sgl after this command */ if (flags & NAND_BAM_NEXT_SGL) { + if (bam_txn->cmd_sgl_pos >= bam_txn->cmd_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", + "CMD sgl"); + return -EINVAL; + } + bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_start]; bam_ce_size = (bam_txn->bam_ce_pos - bam_txn->bam_ce_start) * @@ -297,10 +309,20 @@ int qcom_prep_bam_dma_desc_data(struct qcom_nand_controller *nandc, bool read, struct bam_transaction *bam_txn = nandc->bam_txn; if (read) { + if (bam_txn->rx_sgl_pos >= bam_txn->data_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "RX sgl"); + return -EINVAL; + } + sg_set_buf(&bam_txn->data_sgl[bam_txn->rx_sgl_pos], vaddr, size); bam_txn->rx_sgl_pos++; } else { + if (bam_txn->tx_sgl_pos >= bam_txn->data_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "TX sgl"); + return -EINVAL; + } + sg_set_buf(&bam_txn->data_sgl[bam_txn->tx_sgl_pos], vaddr, size); bam_txn->tx_sgl_pos++; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index c6807e473ab7..2fca8e84ab10 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -982,6 +982,17 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker) return 0; } +static void ad_cond_set_peer_notif(struct port *port) +{ + struct bonding *bond = port->slave->bond; + + if (bond->params.broadcast_neighbor && rtnl_trylock()) { + bond->send_peer_notif = bond->params.num_peer_notif * + max(1, bond->params.peer_notif_delay); + rtnl_unlock(); + } +} + /** * ad_mux_machine - handle a port's mux state machine * @port: the port we're looking at @@ -1378,7 +1389,7 @@ static void ad_tx_machine(struct port *port) /* check if tx timer expired, to verify that we do not send more than * 3 packets per second */ - if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { + if (!port->sm_tx_timer_counter || !(--port->sm_tx_timer_counter)) { /* check if there is something to send */ if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) { __update_lacpdu_from_port(port); @@ -1393,12 +1404,13 @@ static void ad_tx_machine(struct port *port) * again until demanded */ port->ntt = false; + + /* restart tx timer(to verify that we will not + * exceed AD_MAX_TX_IN_SECOND + */ + port->sm_tx_timer_counter = ad_ticks_per_sec / AD_MAX_TX_IN_SECOND; } } - /* restart tx timer(to verify that we will not exceed - * AD_MAX_TX_IN_SECOND - */ - port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; } } @@ -2061,6 +2073,8 @@ static void ad_enable_collecting_distributing(struct port *port, __enable_port(port); /* Slave array needs update */ *update_slave_arr = true; + /* Should notify peers if possible */ + ad_cond_set_peer_notif(port); } } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c4d53e8e7c15..17c7542be6a5 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -212,6 +212,8 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0); unsigned int bond_net_id __read_mostly; +DEFINE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled); + static const struct flow_dissector_key flow_keys_bonding_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, @@ -1235,17 +1237,32 @@ static struct slave *bond_find_best_slave(struct bonding *bond) /* must be called in RCU critical section or with RTNL held */ static bool bond_should_notify_peers(struct bonding *bond) { - struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); + struct bond_up_slave *usable; + struct slave *slave = NULL; - if (!slave || !bond->send_peer_notif || + if (!bond->send_peer_notif || bond->send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 || - !netif_carrier_ok(bond->dev) || - test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) + !netif_carrier_ok(bond->dev)) return false; + /* The send_peer_notif is set by active-backup or 8023ad + * mode, and cleared in bond_close() when changing mode. + * It is safe to only check bond mode here. + */ + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + usable = rcu_dereference_rtnl(bond->usable_slaves); + if (!usable || !READ_ONCE(usable->count)) + return false; + } else { + slave = rcu_dereference_rtnl(bond->curr_active_slave); + if (!slave || test_bit(__LINK_STATE_LINKWATCH_PENDING, + &slave->dev->state)) + return false; + } + netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n", - slave ? slave->dev->name : "NULL"); + slave ? slave->dev->name : "all"); return true; } @@ -4456,6 +4473,9 @@ static int bond_open(struct net_device *bond_dev) bond_for_each_slave(bond, slave, iter) dev_mc_add(slave->dev, lacpdu_mcast_addr); + + if (bond->params.broadcast_neighbor) + static_branch_inc(&bond_bcast_neigh_enabled); } if (bond_mode_can_use_xmit_hash(bond)) @@ -4475,6 +4495,10 @@ static int bond_close(struct net_device *bond_dev) bond_alb_deinitialize(bond); bond->recv_probe = NULL; + if (BOND_MODE(bond) == BOND_MODE_8023AD && + bond->params.broadcast_neighbor) + static_branch_dec(&bond_bcast_neigh_enabled); + if (bond_uses_primary(bond)) { rcu_read_lock(); slave = rcu_dereference(bond->curr_active_slave); @@ -5310,6 +5334,37 @@ static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond, return slaves->arr[hash % count]; } +static bool bond_should_broadcast_neighbor(struct sk_buff *skb, + struct net_device *dev) +{ + struct bonding *bond = netdev_priv(dev); + struct { + struct ipv6hdr ip6; + struct icmp6hdr icmp6; + } *combined, _combined; + + if (!static_branch_unlikely(&bond_bcast_neigh_enabled)) + return false; + + if (!bond->params.broadcast_neighbor) + return false; + + if (skb->protocol == htons(ETH_P_ARP)) + return true; + + if (skb->protocol == htons(ETH_P_IPV6)) { + combined = skb_header_pointer(skb, skb_mac_header_len(skb), + sizeof(_combined), + &_combined); + if (combined && combined->ip6.nexthdr == NEXTHDR_ICMP && + (combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || + combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) + return true; + } + + return false; +} + /* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out. @@ -5329,17 +5384,27 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, return bond_tx_drop(dev, skb); } -/* in broadcast mode, we send everything to all usable interfaces. */ +/* in broadcast mode, we send everything to all or usable slave interfaces. + * under rcu_read_lock when this function is called. + */ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, - struct net_device *bond_dev) + struct net_device *bond_dev, + bool all_slaves) { struct bonding *bond = netdev_priv(bond_dev); - struct slave *slave = NULL; - struct list_head *iter; + struct bond_up_slave *slaves; bool xmit_suc = false; bool skb_used = false; + int slaves_count, i; - bond_for_each_slave_rcu(bond, slave, iter) { + if (all_slaves) + slaves = rcu_dereference(bond->all_slaves); + else + slaves = rcu_dereference(bond->usable_slaves); + + slaves_count = slaves ? READ_ONCE(slaves->count) : 0; + for (i = 0; i < slaves_count; i++) { + struct slave *slave = slaves->arr[i]; struct sk_buff *skb2; if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) @@ -5577,10 +5642,13 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev case BOND_MODE_ACTIVEBACKUP: return bond_xmit_activebackup(skb, dev); case BOND_MODE_8023AD: + if (bond_should_broadcast_neighbor(skb, dev)) + return bond_xmit_broadcast(skb, dev, false); + fallthrough; case BOND_MODE_XOR: return bond_3ad_xor_xmit(skb, dev); case BOND_MODE_BROADCAST: - return bond_xmit_broadcast(skb, dev); + return bond_xmit_broadcast(skb, dev, true); case BOND_MODE_ALB: return bond_alb_xmit(skb, dev); case BOND_MODE_TLB: @@ -6456,6 +6524,7 @@ static int __init bond_check_params(struct bond_params *params) eth_zero_addr(params->ad_actor_system); params->ad_user_port_key = ad_user_port_key; params->coupled_control = 1; + params->broadcast_neighbor = 0; if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index ac5e402c34bc..57fff2421f1b 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -124,6 +124,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, [IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED }, [IFLA_BOND_COUPLED_CONTROL] = { .type = NLA_U8 }, + [IFLA_BOND_BROADCAST_NEIGH] = { .type = NLA_U8 }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { @@ -561,6 +562,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], return err; } + if (data[IFLA_BOND_BROADCAST_NEIGH]) { + int broadcast_neigh = nla_get_u8(data[IFLA_BOND_BROADCAST_NEIGH]); + + bond_opt_initval(&newval, broadcast_neigh); + err = __bond_opt_set(bond, BOND_OPT_BROADCAST_NEIGH, &newval, + data[IFLA_BOND_BROADCAST_NEIGH], extack); + if (err) + return err; + } + return 0; } @@ -630,6 +641,7 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS + nla_total_size(sizeof(u8)) + /* IFLA_BOND_COUPLED_CONTROL */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_BROADCAST_NEIGH */ 0; } @@ -793,6 +805,10 @@ static int bond_fill_info(struct sk_buff *skb, bond->params.coupled_control)) goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_BROADCAST_NEIGH, + bond->params.broadcast_neighbor)) + goto nla_put_failure; + if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 91893c29b899..1d639a3be6ba 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -87,6 +87,8 @@ static int bond_option_missed_max_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_coupled_control_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_broadcast_neigh_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { { "balance-rr", BOND_MODE_ROUNDROBIN, BOND_VALFLAG_DEFAULT}, @@ -240,6 +242,12 @@ static const struct bond_opt_value bond_coupled_control_tbl[] = { { NULL, -1, 0}, }; +static const struct bond_opt_value bond_broadcast_neigh_tbl[] = { + { "off", 0, BOND_VALFLAG_DEFAULT}, + { "on", 1, 0}, + { NULL, -1, 0} +}; + static const struct bond_option bond_opts[BOND_OPT_LAST] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -513,6 +521,14 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .flags = BOND_OPTFLAG_IFDOWN, .values = bond_coupled_control_tbl, .set = bond_option_coupled_control_set, + }, + [BOND_OPT_BROADCAST_NEIGH] = { + .id = BOND_OPT_BROADCAST_NEIGH, + .name = "broadcast_neighbor", + .desc = "Broadcast neighbor packets to all active slaves", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .values = bond_broadcast_neigh_tbl, + .set = bond_option_broadcast_neigh_set, } }; @@ -894,6 +910,13 @@ static int bond_option_mode_set(struct bonding *bond, bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; bond->params.mode = newval->value; + /* When changing mode, the bond device is down, we may reduce + * the bond_bcast_neigh_enabled in bond_close() if broadcast_neighbor + * enabled in 8023ad mode. Therefore, only clear broadcast_neighbor + * to 0. + */ + bond->params.broadcast_neighbor = 0; + if (bond->dev->reg_state == NETREG_REGISTERED) { bool update = false; @@ -1840,3 +1863,22 @@ static int bond_option_coupled_control_set(struct bonding *bond, bond->params.coupled_control = newval->value; return 0; } + +static int bond_option_broadcast_neigh_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + if (bond->params.broadcast_neighbor == newval->value) + return 0; + + bond->params.broadcast_neighbor = newval->value; + if (bond->dev->flags & IFF_UP) { + if (bond->params.broadcast_neighbor) + static_branch_inc(&bond_bcast_neigh_enabled); + else + static_branch_dec(&bond_bcast_neigh_enabled); + } + + netdev_dbg(bond->dev, "Setting broadcast_neighbor to %s (%llu)\n", + newval->string, newval->value); + return 0; +} diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index bb9812b3b0e8..ec759f8cb0e2 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -92,7 +92,7 @@ source "drivers/net/dsa/realtek/Kconfig" config NET_DSA_RZN1_A5PSW tristate "Renesas RZ/N1 A5PSW Ethernet switch support" - depends on OF && ARCH_RZN1 + depends on OF && (ARCH_RZN1 || COMPILE_TEST) select NET_DSA_TAG_RZN1_A5PSW select PCS_RZN1_MIIC help diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c index df7466d4fe8f..1635255f58e4 100644 --- a/drivers/net/dsa/rzn1_a5psw.c +++ b/drivers/net/dsa/rzn1_a5psw.c @@ -1227,35 +1227,27 @@ static int a5psw_probe(struct platform_device *pdev) if (ret) return ret; - a5psw->hclk = devm_clk_get(dev, "hclk"); + a5psw->hclk = devm_clk_get_enabled(dev, "hclk"); if (IS_ERR(a5psw->hclk)) { dev_err(dev, "failed get hclk clock\n"); ret = PTR_ERR(a5psw->hclk); goto free_pcs; } - a5psw->clk = devm_clk_get(dev, "clk"); + a5psw->clk = devm_clk_get_enabled(dev, "clk"); if (IS_ERR(a5psw->clk)) { dev_err(dev, "failed get clk_switch clock\n"); ret = PTR_ERR(a5psw->clk); goto free_pcs; } - ret = clk_prepare_enable(a5psw->clk); - if (ret) - goto free_pcs; - - ret = clk_prepare_enable(a5psw->hclk); - if (ret) - goto clk_disable; - mdio = of_get_available_child_by_name(dev->of_node, "mdio"); if (mdio) { ret = a5psw_probe_mdio(a5psw, mdio); of_node_put(mdio); if (ret) { dev_err(dev, "Failed to register MDIO: %d\n", ret); - goto hclk_disable; + goto free_pcs; } } @@ -1269,15 +1261,11 @@ static int a5psw_probe(struct platform_device *pdev) ret = dsa_register_switch(ds); if (ret) { dev_err(dev, "Failed to register DSA switch: %d\n", ret); - goto hclk_disable; + goto free_pcs; } return 0; -hclk_disable: - clk_disable_unprepare(a5psw->hclk); -clk_disable: - clk_disable_unprepare(a5psw->clk); free_pcs: a5psw_pcs_free(a5psw); @@ -1293,8 +1281,6 @@ static void a5psw_remove(struct platform_device *pdev) dsa_unregister_switch(&a5psw->ds); a5psw_pcs_free(a5psw); - clk_disable_unprepare(a5psw->hclk); - clk_disable_unprepare(a5psw->clk); } static void a5psw_shutdown(struct platform_device *pdev) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 10a167224bf5..e6b802e3d844 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2979,6 +2979,7 @@ static int airoha_probe(struct platform_device *pdev) error_napi_stop: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_stop_napi(ð->qdma[i]); + airoha_ppe_deinit(eth); error_hw_cleanup: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_hw_cleanup(ð->qdma[i]); diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 0e5b8c21b9aa..4e8deb87f751 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -161,7 +161,7 @@ static int airoha_npu_send_msg(struct airoha_npu *npu, int func_id, } static int airoha_npu_run_firmware(struct device *dev, void __iomem *base, - struct reserved_mem *rmem) + struct resource *res) { const struct firmware *fw; void __iomem *addr; @@ -178,7 +178,7 @@ static int airoha_npu_run_firmware(struct device *dev, void __iomem *base, goto out; } - addr = devm_ioremap(dev, rmem->base, rmem->size); + addr = devm_ioremap_resource(dev, res); if (!addr) { ret = -ENOMEM; goto out; @@ -474,9 +474,8 @@ static const struct regmap_config regmap_config = { static int airoha_npu_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct reserved_mem *rmem; struct airoha_npu *npu; - struct device_node *np; + struct resource res; void __iomem *base; int i, irq, err; @@ -498,15 +497,9 @@ static int airoha_npu_probe(struct platform_device *pdev) if (IS_ERR(npu->regmap)) return PTR_ERR(npu->regmap); - np = of_parse_phandle(dev->of_node, "memory-region", 0); - if (!np) - return -ENODEV; - - rmem = of_reserved_mem_lookup(np); - of_node_put(np); - - if (!rmem) - return -ENODEV; + err = of_reserved_mem_region_to_resource(dev->of_node, 0, &res); + if (err) + return err; irq = platform_get_irq(pdev, 0); if (irq < 0) @@ -539,12 +532,12 @@ static int airoha_npu_probe(struct platform_device *pdev) if (err) return err; - err = airoha_npu_run_firmware(dev, base, rmem); + err = airoha_npu_run_firmware(dev, base, &res); if (err) return dev_err_probe(dev, err, "failed to run npu firmware\n"); regmap_write(npu->regmap, REG_CR_NPU_MIB(10), - rmem->base + NPU_EN7581_FIRMWARE_RV32_MAX_SIZE); + res.start + NPU_EN7581_FIRMWARE_RV32_MAX_SIZE); regmap_write(npu->regmap, REG_CR_NPU_MIB(11), 0x40000); /* SRAM 256K */ regmap_write(npu->regmap, REG_CR_NPU_MIB(12), 0); regmap_write(npu->regmap, REG_CR_NPU_MIB(21), 1); @@ -552,7 +545,7 @@ static int airoha_npu_probe(struct platform_device *pdev) /* setting booting address */ for (i = 0; i < NPU_NUM_CORES; i++) - regmap_write(npu->regmap, REG_CR_BOOT_BASE(i), rmem->base); + regmap_write(npu->regmap, REG_CR_BOOT_BASE(i), res.start); usleep_range(1000, 2000); /* enable NPU cores */ diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 734f44660620..e54e3e36d3f9 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1277,6 +1277,8 @@ #define MDIO_VEND2_CTRL1_SS13 BIT(13) #endif +#define XGBE_VEND2_MAC_AUTO_SW BIT(9) + /* MDIO mask values */ #define XGBE_AN_CL73_INT_CMPLT BIT(0) #define XGBE_AN_CL73_INC_LINK BIT(1) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 71449edbb76d..1a37ec45e650 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -266,6 +266,10 @@ static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable, reg |= MDIO_VEND2_CTRL1_AN_RESTART; XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL); + reg |= XGBE_VEND2_MAC_AUTO_SW; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL, reg); } static void xgbe_an37_restart(struct xgbe_prv_data *pdata) @@ -894,6 +898,11 @@ static void xgbe_an37_init(struct xgbe_prv_data *pdata) netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n", (pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII"); + + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1); + reg &= ~MDIO_AN_CTRL1_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg); + } static void xgbe_an73_init(struct xgbe_prv_data *pdata) @@ -1295,6 +1304,10 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata, &an_restart); + /* bail out if the link status register read fails */ + if (pdata->phy.link < 0) + return; + if (an_restart) { xgbe_phy_config_aneg(pdata); goto adjust_link; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 7a4dfa4e19c7..23c39e92e783 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2746,8 +2746,7 @@ static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int reg; - int ret; + int reg, ret; *an_restart = 0; @@ -2781,11 +2780,20 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) return 0; } - /* Link status is latched low, so read once to clear - * and then read again to get current state - */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + return reg; + + /* Link status is latched low so that momentary link drops + * can be detected. If link was already down read again + * to get the latest state. + */ + + if (!pdata->phy.link && !(reg & MDIO_STAT1_LSTATUS)) { + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + return reg; + } if (pdata->en_rx_adap) { /* if the link is available and adaptation is done, @@ -2804,9 +2812,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) xgbe_phy_set_mode(pdata, phy_data->cur_mode); } - /* check again for the link and adaptation status */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done) + if (pdata->rx_adapt_done) return 1; } else if (reg & MDIO_STAT1_LSTATUS) return 1; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 29b42e7ab6cf..70169ea23c7f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -185,12 +185,12 @@ #define XGBE_LINK_TIMEOUT 5 #define XGBE_KR_TRAINING_WAIT_ITER 50 -#define XGBE_SGMII_AN_LINK_STATUS BIT(1) +#define XGBE_SGMII_AN_LINK_DUPLEX BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) #define XGBE_SGMII_AN_LINK_SPEED_10 0x00 #define XGBE_SGMII_AN_LINK_SPEED_100 0x04 #define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 -#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) +#define XGBE_SGMII_AN_LINK_STATUS BIT(4) /* ECC correctable error notification window (seconds) */ #define XGBE_ECC_LIMIT 60 diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index cfdb546a09e7..98a4d089270e 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -1861,14 +1861,21 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter) break; } - buffer_info->alloced = 1; - buffer_info->skb = skb; - buffer_info->length = (u16) adapter->rx_buffer_len; page = virt_to_page(skb->data); offset = offset_in_page(skb->data); buffer_info->dma = dma_map_page(&pdev->dev, page, offset, adapter->rx_buffer_len, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { + kfree_skb(skb); + adapter->soft_stats.rx_dropped++; + break; + } + + buffer_info->alloced = 1; + buffer_info->skb = skb; + buffer_info->length = (u16)adapter->rx_buffer_len; + rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma); rfd_desc->buf_len = cpu_to_le16(adapter->rx_buffer_len); rfd_desc->coalese = 0; @@ -2183,8 +2190,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb, return 0; } -static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, - struct tx_packet_desc *ptpd) +static bool atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, + struct tx_packet_desc *ptpd) { struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring; struct atl1_buffer *buffer_info; @@ -2194,6 +2201,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, unsigned int nr_frags; unsigned int f; int retval; + u16 first_mapped; u16 next_to_use; u16 data_len; u8 hdr_len; @@ -2201,6 +2209,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buf_len -= skb->data_len; nr_frags = skb_shinfo(skb)->nr_frags; next_to_use = atomic_read(&tpd_ring->next_to_use); + first_mapped = next_to_use; buffer_info = &tpd_ring->buffer_info[next_to_use]; BUG_ON(buffer_info->skb); /* put skb in last TPD */ @@ -2216,6 +2225,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = dma_map_page(&adapter->pdev->dev, page, offset, hdr_len, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; @@ -2242,6 +2253,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, page, offset, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; } @@ -2254,6 +2268,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = dma_map_page(&adapter->pdev->dev, page, offset, buf_len, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; } @@ -2277,6 +2293,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = skb_frag_dma_map(&adapter->pdev->dev, frag, i * ATL1_MAX_TX_BUF_LEN, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; @@ -2285,6 +2304,22 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, /* last tpd's buffer-info */ buffer_info->skb = skb; + + return true; + + dma_err: + while (first_mapped != next_to_use) { + buffer_info = &tpd_ring->buffer_info[first_mapped]; + dma_unmap_page(&adapter->pdev->dev, + buffer_info->dma, + buffer_info->length, + DMA_TO_DEVICE); + buffer_info->dma = 0; + + if (++first_mapped == tpd_ring->count) + first_mapped = 0; + } + return false; } static void atl1_tx_queue(struct atl1_adapter *adapter, u16 count, @@ -2355,10 +2390,8 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, len = skb_headlen(skb); - if (unlikely(skb->len <= 0)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (unlikely(skb->len <= 0)) + goto drop_packet; nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) { @@ -2371,10 +2404,9 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, if (mss) { if (skb->protocol == htons(ETH_P_IP)) { proto_hdr_len = skb_tcp_all_headers(skb); - if (unlikely(proto_hdr_len > len)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (unlikely(proto_hdr_len > len)) + goto drop_packet; + /* need additional TPD ? */ if (proto_hdr_len != len) count += (len - proto_hdr_len + @@ -2406,23 +2438,26 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, } tso = atl1_tso(adapter, skb, ptpd); - if (tso < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (tso < 0) + goto drop_packet; if (!tso) { ret_val = atl1_tx_csum(adapter, skb, ptpd); - if (ret_val < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (ret_val < 0) + goto drop_packet; } - atl1_tx_map(adapter, skb, ptpd); + if (!atl1_tx_map(adapter, skb, ptpd)) + goto drop_packet; + atl1_tx_queue(adapter, count, ptpd); atl1_update_mailbox(adapter); return NETDEV_TX_OK; + +drop_packet: + adapter->soft_stats.tx_errors++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } static int atl1_rings_clean(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 81a74e07464f..0fc10e6c6902 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -253,6 +253,15 @@ config BNXT_HWMON Say Y if you want to expose the thermal sensor data on NetXtreme-C/E devices, via the hwmon sysfs interface. +config BNGE + tristate "Broadcom Ethernet device support" + depends on PCI + select NET_DEVLINK + help + This driver supports Broadcom 50/100/200/400/800 gigabit Ethernet cards. + The module will be called bng_en. To compile this driver as a module, + choose M here. + config BCMASP tristate "Broadcom ASP 2.0 Ethernet support" depends on ARCH_BRCMSTB || COMPILE_TEST diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile index bac5cb6ad0cd..10cc1c92ecfc 100644 --- a/drivers/net/ethernet/broadcom/Makefile +++ b/drivers/net/ethernet/broadcom/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o obj-$(CONFIG_BNXT) += bnxt/ obj-$(CONFIG_BCMASP) += asp2/ +obj-$(CONFIG_BNGE) += bnge/ diff --git a/drivers/net/ethernet/broadcom/bnge/Makefile b/drivers/net/ethernet/broadcom/bnge/Makefile new file mode 100644 index 000000000000..6142d9c57f49 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_BNGE) += bng_en.o + +bng_en-y := bnge_core.o \ + bnge_devlink.o \ + bnge_hwrm.o \ + bnge_hwrm_lib.o \ + bnge_rmem.o \ + bnge_resc.o \ + bnge_netdev.o \ + bnge_ethtool.o diff --git a/drivers/net/ethernet/broadcom/bnge/bnge.h b/drivers/net/ethernet/broadcom/bnge/bnge.h new file mode 100644 index 000000000000..a1795302c15a --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_H_ +#define _BNGE_H_ + +#define DRV_NAME "bng_en" +#define DRV_SUMMARY "Broadcom 800G Ethernet Linux Driver" + +#include <linux/etherdevice.h> +#include "../bnxt/bnxt_hsi.h" +#include "bnge_rmem.h" +#include "bnge_resc.h" + +#define DRV_VER_MAJ 1 +#define DRV_VER_MIN 15 +#define DRV_VER_UPD 1 + +extern char bnge_driver_name[]; + +enum board_idx { + BCM57708, +}; + +struct bnge_pf_info { + u16 fw_fid; + u16 port_id; + u8 mac_addr[ETH_ALEN]; +}; + +#define INVALID_HW_RING_ID ((u16)-1) + +enum { + BNGE_FW_CAP_SHORT_CMD = BIT_ULL(0), + BNGE_FW_CAP_LLDP_AGENT = BIT_ULL(1), + BNGE_FW_CAP_DCBX_AGENT = BIT_ULL(2), + BNGE_FW_CAP_IF_CHANGE = BIT_ULL(3), + BNGE_FW_CAP_KONG_MB_CHNL = BIT_ULL(4), + BNGE_FW_CAP_ERROR_RECOVERY = BIT_ULL(5), + BNGE_FW_CAP_PKG_VER = BIT_ULL(6), + BNGE_FW_CAP_CFA_ADV_FLOW = BIT_ULL(7), + BNGE_FW_CAP_CFA_RFS_RING_TBL_IDX_V2 = BIT_ULL(8), + BNGE_FW_CAP_PCIE_STATS_SUPPORTED = BIT_ULL(9), + BNGE_FW_CAP_EXT_STATS_SUPPORTED = BIT_ULL(10), + BNGE_FW_CAP_ERR_RECOVER_RELOAD = BIT_ULL(11), + BNGE_FW_CAP_HOT_RESET = BIT_ULL(12), + BNGE_FW_CAP_RX_ALL_PKT_TS = BIT_ULL(13), + BNGE_FW_CAP_VLAN_RX_STRIP = BIT_ULL(14), + BNGE_FW_CAP_VLAN_TX_INSERT = BIT_ULL(15), + BNGE_FW_CAP_EXT_HW_STATS_SUPPORTED = BIT_ULL(16), + BNGE_FW_CAP_LIVEPATCH = BIT_ULL(17), + BNGE_FW_CAP_HOT_RESET_IF = BIT_ULL(18), + BNGE_FW_CAP_RING_MONITOR = BIT_ULL(19), + BNGE_FW_CAP_DBG_QCAPS = BIT_ULL(20), + BNGE_FW_CAP_THRESHOLD_TEMP_SUPPORTED = BIT_ULL(21), + BNGE_FW_CAP_DFLT_VLAN_TPID_PCP = BIT_ULL(22), + BNGE_FW_CAP_VNIC_TUNNEL_TPA = BIT_ULL(23), + BNGE_FW_CAP_CFA_NTUPLE_RX_EXT_IP_PROTO = BIT_ULL(24), + BNGE_FW_CAP_CFA_RFS_RING_TBL_IDX_V3 = BIT_ULL(25), + BNGE_FW_CAP_VNIC_RE_FLUSH = BIT_ULL(26), +}; + +enum { + BNGE_EN_ROCE_V1 = BIT_ULL(0), + BNGE_EN_ROCE_V2 = BIT_ULL(1), + BNGE_EN_STRIP_VLAN = BIT_ULL(2), + BNGE_EN_SHARED_CHNL = BIT_ULL(3), + BNGE_EN_UDP_GSO_SUPP = BIT_ULL(4), +}; + +#define BNGE_EN_ROCE (BNGE_EN_ROCE_V1 | BNGE_EN_ROCE_V2) + +enum { + BNGE_RSS_CAP_RSS_HASH_TYPE_DELTA = BIT(0), + BNGE_RSS_CAP_UDP_RSS_CAP = BIT(1), + BNGE_RSS_CAP_NEW_RSS_CAP = BIT(2), + BNGE_RSS_CAP_RSS_TCAM = BIT(3), + BNGE_RSS_CAP_AH_V4_RSS_CAP = BIT(4), + BNGE_RSS_CAP_AH_V6_RSS_CAP = BIT(5), + BNGE_RSS_CAP_ESP_V4_RSS_CAP = BIT(6), + BNGE_RSS_CAP_ESP_V6_RSS_CAP = BIT(7), +}; + +#define BNGE_MAX_QUEUE 8 +struct bnge_queue_info { + u8 queue_id; + u8 queue_profile; +}; + +struct bnge_dev { + struct device *dev; + struct pci_dev *pdev; + struct net_device *netdev; + u64 dsn; +#define BNGE_VPD_FLD_LEN 32 + char board_partno[BNGE_VPD_FLD_LEN]; + char board_serialno[BNGE_VPD_FLD_LEN]; + + void __iomem *bar0; + void __iomem *bar1; + + u16 chip_num; + u8 chip_rev; + + int db_offset; /* db_offset within db_size */ + int db_size; + + /* HWRM members */ + u16 hwrm_cmd_seq; + u16 hwrm_cmd_kong_seq; + struct dma_pool *hwrm_dma_pool; + struct hlist_head hwrm_pending_list; + u16 hwrm_max_req_len; + u16 hwrm_max_ext_req_len; + unsigned int hwrm_cmd_timeout; + unsigned int hwrm_cmd_max_timeout; + struct mutex hwrm_cmd_lock; /* serialize hwrm messages */ + + struct hwrm_ver_get_output ver_resp; +#define FW_VER_STR_LEN 32 + char fw_ver_str[FW_VER_STR_LEN]; + char hwrm_ver_supp[FW_VER_STR_LEN]; + char nvm_cfg_ver[FW_VER_STR_LEN]; + u64 fw_ver_code; +#define BNGE_FW_VER_CODE(maj, min, bld, rsv) \ + ((u64)(maj) << 48 | (u64)(min) << 32 | (u64)(bld) << 16 | (rsv)) + + struct bnge_pf_info pf; + + unsigned long state; +#define BNGE_STATE_DRV_REGISTERED 0 + + u64 fw_cap; + + /* Backing stores */ + struct bnge_ctx_mem_info *ctx; + + u64 flags; + + struct bnge_hw_resc hw_resc; + + u16 tso_max_segs; + + int max_fltr; +#define BNGE_L2_FLTR_MAX_FLTR 1024 + + u32 *rss_indir_tbl; +#define BNGE_RSS_TABLE_ENTRIES 64 +#define BNGE_RSS_TABLE_SIZE (BNGE_RSS_TABLE_ENTRIES * 4) +#define BNGE_RSS_TABLE_MAX_TBL 8 +#define BNGE_MAX_RSS_TABLE_SIZE \ + (BNGE_RSS_TABLE_SIZE * BNGE_RSS_TABLE_MAX_TBL) +#define BNGE_MAX_RSS_TABLE_ENTRIES \ + (BNGE_RSS_TABLE_ENTRIES * BNGE_RSS_TABLE_MAX_TBL) + u16 rss_indir_tbl_entries; + + u32 rss_cap; + + u16 rx_nr_rings; + u16 tx_nr_rings; + u16 tx_nr_rings_per_tc; + /* Number of NQs */ + u16 nq_nr_rings; + + /* Aux device resources */ + u16 aux_num_msix; + u16 aux_num_stat_ctxs; + + u16 max_mtu; +#define BNGE_MAX_MTU 9500 + + u16 hw_ring_stats_size; +#define BNGE_NUM_RX_RING_STATS 8 +#define BNGE_NUM_TX_RING_STATS 8 +#define BNGE_NUM_TPA_RING_STATS 6 +#define BNGE_RING_STATS_SIZE \ + ((BNGE_NUM_RX_RING_STATS + BNGE_NUM_TX_RING_STATS + \ + BNGE_NUM_TPA_RING_STATS) * 8) + + u16 max_tpa_v2; +#define BNGE_SUPPORTS_TPA(bd) ((bd)->max_tpa_v2) + + u8 num_tc; + u8 max_tc; + u8 max_lltc; /* lossless TCs */ + struct bnge_queue_info q_info[BNGE_MAX_QUEUE]; + u8 tc_to_qidx[BNGE_MAX_QUEUE]; + u8 q_ids[BNGE_MAX_QUEUE]; + u8 max_q; + u8 port_count; + + struct bnge_irq *irq_tbl; + u16 irqs_acquired; +}; + +static inline bool bnge_is_roce_en(struct bnge_dev *bd) +{ + return bd->flags & BNGE_EN_ROCE; +} + +static inline bool bnge_is_agg_reqd(struct bnge_dev *bd) +{ + if (bd->netdev) { + struct bnge_net *bn = netdev_priv(bd->netdev); + + if (bn->priv_flags & BNGE_NET_EN_TPA || + bn->priv_flags & BNGE_NET_EN_JUMBO) + return true; + else + return false; + } + + return true; +} + +bool bnge_aux_registered(struct bnge_dev *bd); + +#endif /* _BNGE_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_core.c b/drivers/net/ethernet/broadcom/bnge/bnge_core.c new file mode 100644 index 000000000000..68da656f2894 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_core.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2025 Broadcom. + +#include <linux/init.h> +#include <linux/crash_dump.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "bnge.h" +#include "bnge_devlink.h" +#include "bnge_hwrm.h" +#include "bnge_hwrm_lib.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRV_SUMMARY); + +char bnge_driver_name[] = DRV_NAME; + +static const struct { + char *name; +} board_info[] = { + [BCM57708] = { "Broadcom BCM57708 50Gb/100Gb/200Gb/400Gb/800Gb Ethernet" }, +}; + +static const struct pci_device_id bnge_pci_tbl[] = { + { PCI_VDEVICE(BROADCOM, 0x1780), .driver_data = BCM57708 }, + /* Required last entry */ + {0, } +}; +MODULE_DEVICE_TABLE(pci, bnge_pci_tbl); + +static void bnge_print_device_info(struct pci_dev *pdev, enum board_idx idx) +{ + struct device *dev = &pdev->dev; + + dev_info(dev, "%s found at mem %lx\n", board_info[idx].name, + (long)pci_resource_start(pdev, 0)); + + pcie_print_link_status(pdev); +} + +bool bnge_aux_registered(struct bnge_dev *bd) +{ + return false; +} + +static void bnge_nvm_cfg_ver_get(struct bnge_dev *bd) +{ + struct hwrm_nvm_get_dev_info_output nvm_info; + + if (!bnge_hwrm_nvm_dev_info(bd, &nvm_info)) + snprintf(bd->nvm_cfg_ver, FW_VER_STR_LEN, "%d.%d.%d", + nvm_info.nvm_cfg_ver_maj, nvm_info.nvm_cfg_ver_min, + nvm_info.nvm_cfg_ver_upd); +} + +static int bnge_func_qcaps(struct bnge_dev *bd) +{ + int rc; + + rc = bnge_hwrm_func_qcaps(bd); + if (rc) + return rc; + + rc = bnge_hwrm_queue_qportcfg(bd); + if (rc) { + dev_err(bd->dev, "query qportcfg failure rc: %d\n", rc); + return rc; + } + + rc = bnge_hwrm_func_resc_qcaps(bd); + if (rc) { + dev_err(bd->dev, "query resc caps failure rc: %d\n", rc); + return rc; + } + + rc = bnge_hwrm_func_qcfg(bd); + if (rc) { + dev_err(bd->dev, "query config failure rc: %d\n", rc); + return rc; + } + + rc = bnge_hwrm_vnic_qcaps(bd); + if (rc) { + dev_err(bd->dev, "vnic caps failure rc: %d\n", rc); + return rc; + } + + return 0; +} + +static void bnge_fw_unregister_dev(struct bnge_dev *bd) +{ + /* ctx mem free after unrgtr only */ + bnge_hwrm_func_drv_unrgtr(bd); + bnge_free_ctx_mem(bd); +} + +static int bnge_fw_register_dev(struct bnge_dev *bd) +{ + int rc; + + bd->fw_cap = 0; + rc = bnge_hwrm_ver_get(bd); + if (rc) { + dev_err(bd->dev, "Get Version command failed rc: %d\n", rc); + return rc; + } + + bnge_nvm_cfg_ver_get(bd); + + rc = bnge_hwrm_func_reset(bd); + if (rc) { + dev_err(bd->dev, "Failed to reset function rc: %d\n", rc); + return rc; + } + + bnge_hwrm_fw_set_time(bd); + + rc = bnge_hwrm_func_drv_rgtr(bd); + if (rc) { + dev_err(bd->dev, "Failed to rgtr with firmware rc: %d\n", rc); + return rc; + } + + rc = bnge_alloc_ctx_mem(bd); + if (rc) { + dev_err(bd->dev, "Failed to allocate ctx mem rc: %d\n", rc); + goto err_func_unrgtr; + } + + /* Get the resources and configuration from firmware */ + rc = bnge_func_qcaps(bd); + if (rc) { + dev_err(bd->dev, "Failed initial configuration rc: %d\n", rc); + rc = -ENODEV; + goto err_func_unrgtr; + } + + return 0; + +err_func_unrgtr: + bnge_fw_unregister_dev(bd); + return rc; +} + +static void bnge_pci_disable(struct pci_dev *pdev) +{ + pci_release_regions(pdev); + if (pci_is_enabled(pdev)) + pci_disable_device(pdev); +} + +static int bnge_pci_enable(struct pci_dev *pdev) +{ + int rc; + + rc = pci_enable_device(pdev); + if (rc) { + dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); + return rc; + } + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, + "Cannot find PCI device base address, aborting\n"); + rc = -ENODEV; + goto err_pci_disable; + } + + rc = pci_request_regions(pdev, bnge_driver_name); + if (rc) { + dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n"); + goto err_pci_disable; + } + + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + + pci_set_master(pdev); + + return 0; + +err_pci_disable: + pci_disable_device(pdev); + return rc; +} + +static void bnge_unmap_bars(struct pci_dev *pdev) +{ + struct bnge_dev *bd = pci_get_drvdata(pdev); + + if (bd->bar1) { + pci_iounmap(pdev, bd->bar1); + bd->bar1 = NULL; + } + + if (bd->bar0) { + pci_iounmap(pdev, bd->bar0); + bd->bar0 = NULL; + } +} + +static void bnge_set_max_func_irqs(struct bnge_dev *bd, + unsigned int max_irqs) +{ + bd->hw_resc.max_irqs = max_irqs; +} + +static int bnge_get_max_irq(struct pci_dev *pdev) +{ + u16 ctrl; + + pci_read_config_word(pdev, pdev->msix_cap + PCI_MSIX_FLAGS, &ctrl); + return (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; +} + +static int bnge_map_db_bar(struct bnge_dev *bd) +{ + if (!bd->db_size) + return -ENODEV; + + bd->bar1 = pci_iomap(bd->pdev, 2, bd->db_size); + if (!bd->bar1) + return -ENOMEM; + + return 0; +} + +static int bnge_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + unsigned int max_irqs; + struct bnge_dev *bd; + int rc; + + if (pci_is_bridge(pdev)) + return -ENODEV; + + if (!pdev->msix_cap) { + dev_err(&pdev->dev, "MSIX capability missing, aborting\n"); + return -ENODEV; + } + + if (is_kdump_kernel()) { + pci_clear_master(pdev); + pcie_flr(pdev); + } + + rc = bnge_pci_enable(pdev); + if (rc) + return rc; + + bnge_print_device_info(pdev, ent->driver_data); + + bd = bnge_devlink_alloc(pdev); + if (!bd) { + dev_err(&pdev->dev, "Devlink allocation failed\n"); + rc = -ENOMEM; + goto err_pci_disable; + } + + bd->bar0 = pci_ioremap_bar(pdev, 0); + if (!bd->bar0) { + dev_err(&pdev->dev, "Failed mapping BAR-0, aborting\n"); + rc = -ENOMEM; + goto err_devl_free; + } + + rc = bnge_init_hwrm_resources(bd); + if (rc) + goto err_bar_unmap; + + rc = bnge_fw_register_dev(bd); + if (rc) { + dev_err(&pdev->dev, "Failed to register with firmware rc = %d\n", rc); + goto err_hwrm_cleanup; + } + + bnge_devlink_register(bd); + + max_irqs = bnge_get_max_irq(pdev); + bnge_set_max_func_irqs(bd, max_irqs); + + bnge_aux_init_dflt_config(bd); + + rc = bnge_net_init_dflt_config(bd); + if (rc) { + dev_err(&pdev->dev, "Error setting up default cfg to netdev rc = %d\n", + rc); + goto err_fw_reg; + } + + rc = bnge_map_db_bar(bd); + if (rc) { + dev_err(&pdev->dev, "Failed mapping doorbell BAR rc = %d, aborting\n", + rc); + goto err_config_uninit; + } + + rc = bnge_alloc_irqs(bd); + if (rc) { + dev_err(&pdev->dev, "Error IRQ allocation rc = %d\n", rc); + goto err_config_uninit; + } + + rc = bnge_netdev_alloc(bd, max_irqs); + if (rc) + goto err_free_irq; + + pci_save_state(pdev); + + return 0; + +err_free_irq: + bnge_free_irqs(bd); + +err_config_uninit: + bnge_net_uninit_dflt_config(bd); + +err_fw_reg: + bnge_devlink_unregister(bd); + bnge_fw_unregister_dev(bd); + +err_hwrm_cleanup: + bnge_cleanup_hwrm_resources(bd); + +err_bar_unmap: + bnge_unmap_bars(pdev); + +err_devl_free: + bnge_devlink_free(bd); + +err_pci_disable: + bnge_pci_disable(pdev); + return rc; +} + +static void bnge_remove_one(struct pci_dev *pdev) +{ + struct bnge_dev *bd = pci_get_drvdata(pdev); + + bnge_netdev_free(bd); + + bnge_free_irqs(bd); + + bnge_net_uninit_dflt_config(bd); + + bnge_devlink_unregister(bd); + + bnge_fw_unregister_dev(bd); + + bnge_cleanup_hwrm_resources(bd); + + bnge_unmap_bars(pdev); + + bnge_devlink_free(bd); + + bnge_pci_disable(pdev); +} + +static void bnge_shutdown(struct pci_dev *pdev) +{ + pci_disable_device(pdev); + + if (system_state == SYSTEM_POWER_OFF) { + pci_wake_from_d3(pdev, 0); + pci_set_power_state(pdev, PCI_D3hot); + } +} + +static struct pci_driver bnge_driver = { + .name = bnge_driver_name, + .id_table = bnge_pci_tbl, + .probe = bnge_probe_one, + .remove = bnge_remove_one, + .shutdown = bnge_shutdown, +}; + +static int __init bnge_init_module(void) +{ + return pci_register_driver(&bnge_driver); +} +module_init(bnge_init_module); + +static void __exit bnge_exit_module(void) +{ + pci_unregister_driver(&bnge_driver); +} +module_exit(bnge_exit_module); diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_devlink.c b/drivers/net/ethernet/broadcom/bnge/bnge_devlink.c new file mode 100644 index 000000000000..a987afebd64d --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_devlink.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2025 Broadcom. + +#include <linux/unaligned.h> +#include <linux/pci.h> +#include <linux/types.h> +#include <net/devlink.h> + +#include "bnge.h" +#include "bnge_devlink.h" +#include "bnge_hwrm_lib.h" + +static int bnge_dl_info_put(struct bnge_dev *bd, struct devlink_info_req *req, + enum bnge_dl_version_type type, const char *key, + char *buf) +{ + if (!strlen(buf)) + return 0; + + if (!strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_NCSI) || + !strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_ROCE)) + return 0; + + switch (type) { + case BNGE_VERSION_FIXED: + return devlink_info_version_fixed_put(req, key, buf); + case BNGE_VERSION_RUNNING: + return devlink_info_version_running_put(req, key, buf); + case BNGE_VERSION_STORED: + return devlink_info_version_stored_put(req, key, buf); + } + + return 0; +} + +static void bnge_vpd_read_info(struct bnge_dev *bd) +{ + struct pci_dev *pdev = bd->pdev; + unsigned int vpd_size, kw_len; + int pos, size; + u8 *vpd_data; + + vpd_data = pci_vpd_alloc(pdev, &vpd_size); + if (IS_ERR(vpd_data)) { + pci_warn(pdev, "Unable to read VPD\n"); + return; + } + + pos = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, + PCI_VPD_RO_KEYWORD_PARTNO, &kw_len); + if (pos < 0) + goto read_sn; + + size = min_t(int, kw_len, BNGE_VPD_FLD_LEN - 1); + memcpy(bd->board_partno, &vpd_data[pos], size); + +read_sn: + pos = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, + PCI_VPD_RO_KEYWORD_SERIALNO, + &kw_len); + if (pos < 0) + goto exit; + + size = min_t(int, kw_len, BNGE_VPD_FLD_LEN - 1); + memcpy(bd->board_serialno, &vpd_data[pos], size); + +exit: + kfree(vpd_data); +} + +#define HWRM_FW_VER_STR_LEN 16 + +static int bnge_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct hwrm_nvm_get_dev_info_output nvm_dev_info; + struct bnge_dev *bd = devlink_priv(devlink); + struct hwrm_ver_get_output *ver_resp; + char mgmt_ver[FW_VER_STR_LEN]; + char roce_ver[FW_VER_STR_LEN]; + char ncsi_ver[FW_VER_STR_LEN]; + char buf[32]; + + int rc; + + if (bd->dsn) { + char buf[32]; + u8 dsn[8]; + int rc; + + put_unaligned_le64(bd->dsn, dsn); + sprintf(buf, "%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X", + dsn[7], dsn[6], dsn[5], dsn[4], + dsn[3], dsn[2], dsn[1], dsn[0]); + rc = devlink_info_serial_number_put(req, buf); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set dsn"); + return rc; + } + } + + if (strlen(bd->board_serialno)) { + rc = devlink_info_board_serial_number_put(req, + bd->board_serialno); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set board serial number"); + return rc; + } + } + + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_FIXED, + DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, + bd->board_partno); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set board part number"); + return rc; + } + + /* More information from HWRM ver get command */ + sprintf(buf, "%X", bd->chip_num); + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_FIXED, + DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, buf); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set asic id"); + return rc; + } + + ver_resp = &bd->ver_resp; + sprintf(buf, "%c%d", 'A' + ver_resp->chip_rev, ver_resp->chip_metal); + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_FIXED, + DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, buf); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set asic info"); + return rc; + } + + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_PSID, + bd->nvm_cfg_ver); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set firmware version"); + return rc; + } + + buf[0] = 0; + strncat(buf, ver_resp->active_pkg_name, HWRM_FW_VER_STR_LEN); + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW, buf); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set firmware generic version"); + return rc; + } + + if (ver_resp->flags & VER_GET_RESP_FLAGS_EXT_VER_AVAIL) { + snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + ver_resp->hwrm_fw_major, ver_resp->hwrm_fw_minor, + ver_resp->hwrm_fw_build, ver_resp->hwrm_fw_patch); + + snprintf(ncsi_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + ver_resp->mgmt_fw_major, ver_resp->mgmt_fw_minor, + ver_resp->mgmt_fw_build, ver_resp->mgmt_fw_patch); + + snprintf(roce_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + ver_resp->roce_fw_major, ver_resp->roce_fw_minor, + ver_resp->roce_fw_build, ver_resp->roce_fw_patch); + } else { + snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + ver_resp->hwrm_fw_maj_8b, ver_resp->hwrm_fw_min_8b, + ver_resp->hwrm_fw_bld_8b, ver_resp->hwrm_fw_rsvd_8b); + + snprintf(ncsi_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + ver_resp->mgmt_fw_maj_8b, ver_resp->mgmt_fw_min_8b, + ver_resp->mgmt_fw_bld_8b, ver_resp->mgmt_fw_rsvd_8b); + + snprintf(roce_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + ver_resp->roce_fw_maj_8b, ver_resp->roce_fw_min_8b, + ver_resp->roce_fw_bld_8b, ver_resp->roce_fw_rsvd_8b); + } + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, mgmt_ver); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set firmware mgmt version"); + return rc; + } + + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_MGMT_API, + bd->hwrm_ver_supp); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set firmware mgmt api version"); + return rc; + } + + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_NCSI, ncsi_ver); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set ncsi firmware version"); + return rc; + } + + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set roce firmware version"); + return rc; + } + + rc = bnge_hwrm_nvm_dev_info(bd, &nvm_dev_info); + if (!(nvm_dev_info.flags & NVM_GET_DEV_INFO_RESP_FLAGS_FW_VER_VALID)) + return 0; + + buf[0] = 0; + strncat(buf, nvm_dev_info.pkg_name, HWRM_FW_VER_STR_LEN); + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_STORED, + DEVLINK_INFO_VERSION_GENERIC_FW, buf); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set roce firmware version"); + return rc; + } + + snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + nvm_dev_info.hwrm_fw_major, nvm_dev_info.hwrm_fw_minor, + nvm_dev_info.hwrm_fw_build, nvm_dev_info.hwrm_fw_patch); + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_STORED, + DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, mgmt_ver); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set stored firmware version"); + return rc; + } + + snprintf(ncsi_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + nvm_dev_info.mgmt_fw_major, nvm_dev_info.mgmt_fw_minor, + nvm_dev_info.mgmt_fw_build, nvm_dev_info.mgmt_fw_patch); + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_STORED, + DEVLINK_INFO_VERSION_GENERIC_FW_NCSI, ncsi_ver); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set stored ncsi firmware version"); + return rc; + } + + snprintf(roce_ver, FW_VER_STR_LEN, "%d.%d.%d.%d", + nvm_dev_info.roce_fw_major, nvm_dev_info.roce_fw_minor, + nvm_dev_info.roce_fw_build, nvm_dev_info.roce_fw_patch); + rc = bnge_dl_info_put(bd, req, BNGE_VERSION_STORED, + DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver); + if (rc) + NL_SET_ERR_MSG_MOD(extack, + "Failed to set stored roce firmware version"); + + return rc; +} + +static const struct devlink_ops bnge_devlink_ops = { + .info_get = bnge_devlink_info_get, +}; + +void bnge_devlink_free(struct bnge_dev *bd) +{ + struct devlink *devlink = priv_to_devlink(bd); + + devlink_free(devlink); +} + +struct bnge_dev *bnge_devlink_alloc(struct pci_dev *pdev) +{ + struct devlink *devlink; + struct bnge_dev *bd; + + devlink = devlink_alloc(&bnge_devlink_ops, sizeof(*bd), &pdev->dev); + if (!devlink) + return NULL; + + bd = devlink_priv(devlink); + pci_set_drvdata(pdev, bd); + bd->dev = &pdev->dev; + bd->pdev = pdev; + + bd->dsn = pci_get_dsn(pdev); + if (!bd->dsn) + pci_warn(pdev, "Failed to get DSN\n"); + + bnge_vpd_read_info(bd); + + return bd; +} + +void bnge_devlink_register(struct bnge_dev *bd) +{ + struct devlink *devlink = priv_to_devlink(bd); + devlink_register(devlink); +} + +void bnge_devlink_unregister(struct bnge_dev *bd) +{ + struct devlink *devlink = priv_to_devlink(bd); + devlink_unregister(devlink); +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_devlink.h b/drivers/net/ethernet/broadcom/bnge/bnge_devlink.h new file mode 100644 index 000000000000..c6575255e650 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_devlink.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_DEVLINK_H_ +#define _BNGE_DEVLINK_H_ + +enum bnge_dl_version_type { + BNGE_VERSION_FIXED, + BNGE_VERSION_RUNNING, + BNGE_VERSION_STORED, +}; + +void bnge_devlink_free(struct bnge_dev *bd); +struct bnge_dev *bnge_devlink_alloc(struct pci_dev *pdev); +void bnge_devlink_register(struct bnge_dev *bd); +void bnge_devlink_unregister(struct bnge_dev *bd); + +#endif /* _BNGE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c new file mode 100644 index 000000000000..569371c1b4f2 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2025 Broadcom. + +#include <linux/unaligned.h> +#include <linux/pci.h> +#include <linux/types.h> +#include <net/devlink.h> +#include <linux/ethtool.h> +#include <linux/etherdevice.h> +#include <linux/ethtool_netlink.h> + +#include "bnge.h" +#include "bnge_ethtool.h" + +static void bnge_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct bnge_net *bn = netdev_priv(dev); + struct bnge_dev *bd = bn->bd; + + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->fw_version, bd->fw_ver_str, sizeof(info->fw_version)); + strscpy(info->bus_info, pci_name(bd->pdev), sizeof(info->bus_info)); +} + +static const struct ethtool_ops bnge_ethtool_ops = { + .get_drvinfo = bnge_get_drvinfo, +}; + +void bnge_set_ethtool_ops(struct net_device *dev) +{ + dev->ethtool_ops = &bnge_ethtool_ops; +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.h b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.h new file mode 100644 index 000000000000..21e96a0976d5 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_ETHTOOL_H_ +#define _BNGE_ETHTOOL_H_ + +void bnge_set_ethtool_ops(struct net_device *dev); + +#endif /* _BNGE_ETHTOOL_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm.c b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm.c new file mode 100644 index 000000000000..0f971af24142 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm.c @@ -0,0 +1,508 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2025 Broadcom. + +#include <asm/byteorder.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/pci.h> + +#include "bnge.h" +#include "bnge_hwrm.h" + +static u64 bnge_cal_sentinel(struct bnge_hwrm_ctx *ctx, u16 req_type) +{ + return (((uintptr_t)ctx) + req_type) ^ BNGE_HWRM_SENTINEL; +} + +int bnge_hwrm_req_create(struct bnge_dev *bd, void **req, u16 req_type, + u32 req_len) +{ + struct bnge_hwrm_ctx *ctx; + dma_addr_t dma_handle; + u8 *req_addr; + + if (req_len > BNGE_HWRM_CTX_OFFSET) + return -E2BIG; + + req_addr = dma_pool_alloc(bd->hwrm_dma_pool, GFP_KERNEL | __GFP_ZERO, + &dma_handle); + if (!req_addr) + return -ENOMEM; + + ctx = (struct bnge_hwrm_ctx *)(req_addr + BNGE_HWRM_CTX_OFFSET); + /* safety first, sentinel used to check for invalid requests */ + ctx->sentinel = bnge_cal_sentinel(ctx, req_type); + ctx->req_len = req_len; + ctx->req = (struct input *)req_addr; + ctx->resp = (struct output *)(req_addr + BNGE_HWRM_RESP_OFFSET); + ctx->dma_handle = dma_handle; + ctx->flags = 0; /* __GFP_ZERO, but be explicit regarding ownership */ + ctx->timeout = bd->hwrm_cmd_timeout ?: BNGE_DFLT_HWRM_CMD_TIMEOUT; + ctx->allocated = BNGE_HWRM_DMA_SIZE - BNGE_HWRM_CTX_OFFSET; + ctx->gfp = GFP_KERNEL; + ctx->slice_addr = NULL; + + /* initialize common request fields */ + ctx->req->req_type = cpu_to_le16(req_type); + ctx->req->resp_addr = cpu_to_le64(dma_handle + BNGE_HWRM_RESP_OFFSET); + ctx->req->cmpl_ring = cpu_to_le16(BNGE_HWRM_NO_CMPL_RING); + ctx->req->target_id = cpu_to_le16(BNGE_HWRM_TARGET); + *req = ctx->req; + + return 0; +} + +static struct bnge_hwrm_ctx *__hwrm_ctx_get(struct bnge_dev *bd, u8 *req_addr) +{ + void *ctx_addr = req_addr + BNGE_HWRM_CTX_OFFSET; + struct input *req = (struct input *)req_addr; + struct bnge_hwrm_ctx *ctx = ctx_addr; + u64 sentinel; + + if (!req) { + dev_err(bd->dev, "null HWRM request"); + dump_stack(); + return NULL; + } + + /* HWRM API has no type safety, verify sentinel to validate address */ + sentinel = bnge_cal_sentinel(ctx, le16_to_cpu(req->req_type)); + if (ctx->sentinel != sentinel) { + dev_err(bd->dev, "HWRM sentinel mismatch, req_type = %u\n", + (u32)le16_to_cpu(req->req_type)); + dump_stack(); + return NULL; + } + + return ctx; +} + +void bnge_hwrm_req_timeout(struct bnge_dev *bd, + void *req, unsigned int timeout) +{ + struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req); + + if (ctx) + ctx->timeout = timeout; +} + +void bnge_hwrm_req_alloc_flags(struct bnge_dev *bd, void *req, gfp_t gfp) +{ + struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req); + + if (ctx) + ctx->gfp = gfp; +} + +void bnge_hwrm_req_flags(struct bnge_dev *bd, void *req, + enum bnge_hwrm_ctx_flags flags) +{ + struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req); + + if (ctx) + ctx->flags |= (flags & BNGE_HWRM_API_FLAGS); +} + +void *bnge_hwrm_req_hold(struct bnge_dev *bd, void *req) +{ + struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req); + struct input *input = (struct input *)req; + + if (!ctx) + return NULL; + + if (ctx->flags & BNGE_HWRM_INTERNAL_CTX_OWNED) { + dev_err(bd->dev, "HWRM context already owned, req_type = %u\n", + (u32)le16_to_cpu(input->req_type)); + dump_stack(); + return NULL; + } + + ctx->flags |= BNGE_HWRM_INTERNAL_CTX_OWNED; + return ((u8 *)req) + BNGE_HWRM_RESP_OFFSET; +} + +static void __hwrm_ctx_invalidate(struct bnge_dev *bd, + struct bnge_hwrm_ctx *ctx) +{ + void *addr = ((u8 *)ctx) - BNGE_HWRM_CTX_OFFSET; + dma_addr_t dma_handle = ctx->dma_handle; /* save before invalidate */ + + /* unmap any auxiliary DMA slice */ + if (ctx->slice_addr) + dma_free_coherent(bd->dev, ctx->slice_size, + ctx->slice_addr, ctx->slice_handle); + + /* invalidate, ensure ownership, sentinel and dma_handle are cleared */ + memset(ctx, 0, sizeof(struct bnge_hwrm_ctx)); + + /* return the buffer to the DMA pool */ + if (dma_handle) + dma_pool_free(bd->hwrm_dma_pool, addr, dma_handle); +} + +void bnge_hwrm_req_drop(struct bnge_dev *bd, void *req) +{ + struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req); + + if (ctx) + __hwrm_ctx_invalidate(bd, ctx); +} + +static int bnge_map_hwrm_error(u32 hwrm_err) +{ + switch (hwrm_err) { + case HWRM_ERR_CODE_SUCCESS: + return 0; + case HWRM_ERR_CODE_RESOURCE_LOCKED: + return -EROFS; + case HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED: + return -EACCES; + case HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR: + return -ENOSPC; + case HWRM_ERR_CODE_INVALID_PARAMS: + case HWRM_ERR_CODE_INVALID_FLAGS: + case HWRM_ERR_CODE_INVALID_ENABLES: + case HWRM_ERR_CODE_UNSUPPORTED_TLV: + case HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR: + return -EINVAL; + case HWRM_ERR_CODE_NO_BUFFER: + return -ENOMEM; + case HWRM_ERR_CODE_HOT_RESET_PROGRESS: + case HWRM_ERR_CODE_BUSY: + return -EAGAIN; + case HWRM_ERR_CODE_CMD_NOT_SUPPORTED: + return -EOPNOTSUPP; + case HWRM_ERR_CODE_PF_UNAVAILABLE: + return -ENODEV; + default: + return -EIO; + } +} + +static struct bnge_hwrm_wait_token * +bnge_hwrm_create_token(struct bnge_dev *bd, enum bnge_hwrm_chnl dst) +{ + struct bnge_hwrm_wait_token *token; + + token = kzalloc(sizeof(*token), GFP_KERNEL); + if (!token) + return NULL; + + mutex_lock(&bd->hwrm_cmd_lock); + + token->dst = dst; + token->state = BNGE_HWRM_PENDING; + if (dst == BNGE_HWRM_CHNL_CHIMP) { + token->seq_id = bd->hwrm_cmd_seq++; + hlist_add_head_rcu(&token->node, &bd->hwrm_pending_list); + } else { + token->seq_id = bd->hwrm_cmd_kong_seq++; + } + + return token; +} + +static void +bnge_hwrm_destroy_token(struct bnge_dev *bd, struct bnge_hwrm_wait_token *token) +{ + if (token->dst == BNGE_HWRM_CHNL_CHIMP) { + hlist_del_rcu(&token->node); + kfree_rcu(token, rcu); + } else { + kfree(token); + } + mutex_unlock(&bd->hwrm_cmd_lock); +} + +static void bnge_hwrm_req_dbg(struct bnge_dev *bd, struct input *req) +{ + u32 ring = le16_to_cpu(req->cmpl_ring); + u32 type = le16_to_cpu(req->req_type); + u32 tgt = le16_to_cpu(req->target_id); + u32 seq = le16_to_cpu(req->seq_id); + char opt[32] = "\n"; + + if (unlikely(ring != (u16)BNGE_HWRM_NO_CMPL_RING)) + snprintf(opt, 16, " ring %d\n", ring); + + if (unlikely(tgt != BNGE_HWRM_TARGET)) + snprintf(opt + strlen(opt) - 1, 16, " tgt 0x%x\n", tgt); + + dev_dbg(bd->dev, "sent hwrm req_type 0x%x seq id 0x%x%s", + type, seq, opt); +} + +#define bnge_hwrm_err(bd, ctx, fmt, ...) \ + do { \ + if ((ctx)->flags & BNGE_HWRM_CTX_SILENT) \ + dev_dbg((bd)->dev, fmt, __VA_ARGS__); \ + else \ + dev_err((bd)->dev, fmt, __VA_ARGS__); \ + } while (0) + +static int __hwrm_send_ctx(struct bnge_dev *bd, struct bnge_hwrm_ctx *ctx) +{ + u32 doorbell_offset = BNGE_GRCPF_REG_CHIMP_COMM_TRIGGER; + enum bnge_hwrm_chnl dst = BNGE_HWRM_CHNL_CHIMP; + u32 bar_offset = BNGE_GRCPF_REG_CHIMP_COMM; + struct bnge_hwrm_wait_token *token = NULL; + u16 max_req_len = BNGE_HWRM_MAX_REQ_LEN; + unsigned int i, timeout, tmo_count; + u32 *data = (u32 *)ctx->req; + u32 msg_len = ctx->req_len; + int rc = -EBUSY; + u32 req_type; + u16 len = 0; + u8 *valid; + + if (ctx->flags & BNGE_HWRM_INTERNAL_RESP_DIRTY) + memset(ctx->resp, 0, PAGE_SIZE); + + req_type = le16_to_cpu(ctx->req->req_type); + + if (msg_len > BNGE_HWRM_MAX_REQ_LEN && + msg_len > bd->hwrm_max_ext_req_len) { + dev_warn(bd->dev, "oversized hwrm request, req_type 0x%x", + req_type); + rc = -E2BIG; + goto exit; + } + + token = bnge_hwrm_create_token(bd, dst); + if (!token) { + rc = -ENOMEM; + goto exit; + } + ctx->req->seq_id = cpu_to_le16(token->seq_id); + + /* Ensure any associated DMA buffers are written before doorbell */ + wmb(); + + /* Write request msg to hwrm channel */ + __iowrite32_copy(bd->bar0 + bar_offset, data, msg_len / 4); + + for (i = msg_len; i < max_req_len; i += 4) + writel(0, bd->bar0 + bar_offset + i); + + /* Ring channel doorbell */ + writel(1, bd->bar0 + doorbell_offset); + + bnge_hwrm_req_dbg(bd, ctx->req); + + /* Limit timeout to an upper limit */ + timeout = min(ctx->timeout, + bd->hwrm_cmd_max_timeout ?: BNGE_HWRM_CMD_MAX_TIMEOUT); + /* convert timeout to usec */ + timeout *= 1000; + + i = 0; + /* Short timeout for the first few iterations: + * number of loops = number of loops for short timeout + + * number of loops for standard timeout. + */ + tmo_count = BNGE_HWRM_SHORT_TIMEOUT_COUNTER; + timeout = timeout - BNGE_HWRM_SHORT_MIN_TIMEOUT * + BNGE_HWRM_SHORT_TIMEOUT_COUNTER; + tmo_count += DIV_ROUND_UP(timeout, BNGE_HWRM_MIN_TIMEOUT); + + if (le16_to_cpu(ctx->req->cmpl_ring) != INVALID_HW_RING_ID) { + /* Wait until hwrm response cmpl interrupt is processed */ + while (READ_ONCE(token->state) < BNGE_HWRM_COMPLETE && + i++ < tmo_count) { + /* on first few passes, just barely sleep */ + if (i < BNGE_HWRM_SHORT_TIMEOUT_COUNTER) { + usleep_range(BNGE_HWRM_SHORT_MIN_TIMEOUT, + BNGE_HWRM_SHORT_MAX_TIMEOUT); + } else { + usleep_range(BNGE_HWRM_MIN_TIMEOUT, + BNGE_HWRM_MAX_TIMEOUT); + } + } + + if (READ_ONCE(token->state) != BNGE_HWRM_COMPLETE) { + bnge_hwrm_err(bd, ctx, "No hwrm cmpl received: 0x%x\n", + req_type); + goto exit; + } + len = le16_to_cpu(READ_ONCE(ctx->resp->resp_len)); + valid = ((u8 *)ctx->resp) + len - 1; + } else { + __le16 seen_out_of_seq = ctx->req->seq_id; /* will never see */ + int j; + + /* Check if response len is updated */ + for (i = 0; i < tmo_count; i++) { + if (token && + READ_ONCE(token->state) == BNGE_HWRM_DEFERRED) { + bnge_hwrm_destroy_token(bd, token); + token = NULL; + } + + len = le16_to_cpu(READ_ONCE(ctx->resp->resp_len)); + if (len) { + __le16 resp_seq = READ_ONCE(ctx->resp->seq_id); + + if (resp_seq == ctx->req->seq_id) + break; + if (resp_seq != seen_out_of_seq) { + dev_warn(bd->dev, "Discarding out of seq response: 0x%x for msg {0x%x 0x%x}\n", + le16_to_cpu(resp_seq), req_type, le16_to_cpu(ctx->req->seq_id)); + seen_out_of_seq = resp_seq; + } + } + + /* on first few passes, just barely sleep */ + if (i < BNGE_HWRM_SHORT_TIMEOUT_COUNTER) { + usleep_range(BNGE_HWRM_SHORT_MIN_TIMEOUT, + BNGE_HWRM_SHORT_MAX_TIMEOUT); + } else { + usleep_range(BNGE_HWRM_MIN_TIMEOUT, + BNGE_HWRM_MAX_TIMEOUT); + } + } + + if (i >= tmo_count) { + bnge_hwrm_err(bd, ctx, + "Error (timeout: %u) msg {0x%x 0x%x} len:%d\n", + bnge_hwrm_timeout(i), req_type, + le16_to_cpu(ctx->req->seq_id), len); + goto exit; + } + + /* Last byte of resp contains valid bit */ + valid = ((u8 *)ctx->resp) + len - 1; + for (j = 0; j < BNGE_HWRM_FIN_WAIT_USEC; ) { + /* make sure we read from updated DMA memory */ + dma_rmb(); + if (*valid) + break; + if (j < 10) { + udelay(1); + j++; + } else { + usleep_range(20, 30); + j += 20; + } + } + + if (j >= BNGE_HWRM_FIN_WAIT_USEC) { + bnge_hwrm_err(bd, ctx, "Error (timeout: %u) msg {0x%x 0x%x} len:%d v:%d\n", + bnge_hwrm_timeout(i) + j, req_type, + le16_to_cpu(ctx->req->seq_id), len, *valid); + goto exit; + } + } + + /* Zero valid bit for compatibility. Valid bit in an older spec + * may become a new field in a newer spec. We must make sure that + * a new field not implemented by old spec will read zero. + */ + *valid = 0; + rc = le16_to_cpu(ctx->resp->error_code); + if (rc == HWRM_ERR_CODE_BUSY && !(ctx->flags & BNGE_HWRM_CTX_SILENT)) + dev_warn(bd->dev, "FW returned busy, hwrm req_type 0x%x\n", + req_type); + else if (rc && rc != HWRM_ERR_CODE_PF_UNAVAILABLE) + bnge_hwrm_err(bd, ctx, "hwrm req_type 0x%x seq id 0x%x error %d\n", + req_type, le16_to_cpu(ctx->req->seq_id), rc); + rc = bnge_map_hwrm_error(rc); + +exit: + if (token) + bnge_hwrm_destroy_token(bd, token); + if (ctx->flags & BNGE_HWRM_INTERNAL_CTX_OWNED) + ctx->flags |= BNGE_HWRM_INTERNAL_RESP_DIRTY; + else + __hwrm_ctx_invalidate(bd, ctx); + return rc; +} + +int bnge_hwrm_req_send(struct bnge_dev *bd, void *req) +{ + struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req); + + if (!ctx) + return -EINVAL; + + return __hwrm_send_ctx(bd, ctx); +} + +int bnge_hwrm_req_send_silent(struct bnge_dev *bd, void *req) +{ + bnge_hwrm_req_flags(bd, req, BNGE_HWRM_CTX_SILENT); + return bnge_hwrm_req_send(bd, req); +} + +void * +bnge_hwrm_req_dma_slice(struct bnge_dev *bd, void *req, u32 size, + dma_addr_t *dma_handle) +{ + struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req); + u8 *end = ((u8 *)req) + BNGE_HWRM_DMA_SIZE; + struct input *input = req; + u8 *addr, *req_addr = req; + u32 max_offset, offset; + + if (!ctx) + return NULL; + + max_offset = BNGE_HWRM_DMA_SIZE - ctx->allocated; + offset = max_offset - size; + offset = ALIGN_DOWN(offset, BNGE_HWRM_DMA_ALIGN); + addr = req_addr + offset; + + if (addr < req_addr + max_offset && req_addr + ctx->req_len <= addr) { + ctx->allocated = end - addr; + *dma_handle = ctx->dma_handle + offset; + return addr; + } + + if (ctx->slice_addr) { + dev_err(bd->dev, "HWRM refusing to reallocate DMA slice, req_type = %u\n", + (u32)le16_to_cpu(input->req_type)); + dump_stack(); + return NULL; + } + + addr = dma_alloc_coherent(bd->dev, size, dma_handle, ctx->gfp); + if (!addr) + return NULL; + + ctx->slice_addr = addr; + ctx->slice_size = size; + ctx->slice_handle = *dma_handle; + + return addr; +} + +void bnge_cleanup_hwrm_resources(struct bnge_dev *bd) +{ + struct bnge_hwrm_wait_token *token; + + dma_pool_destroy(bd->hwrm_dma_pool); + bd->hwrm_dma_pool = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(token, &bd->hwrm_pending_list, node) + WRITE_ONCE(token->state, BNGE_HWRM_CANCELLED); + rcu_read_unlock(); +} + +int bnge_init_hwrm_resources(struct bnge_dev *bd) +{ + bd->hwrm_dma_pool = dma_pool_create("bnge_hwrm", bd->dev, + BNGE_HWRM_DMA_SIZE, + BNGE_HWRM_DMA_ALIGN, 0); + if (!bd->hwrm_dma_pool) + return -ENOMEM; + + INIT_HLIST_HEAD(&bd->hwrm_pending_list); + mutex_init(&bd->hwrm_cmd_lock); + + return 0; +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm.h b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm.h new file mode 100644 index 000000000000..012aa4fa5aa9 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_HWRM_H_ +#define _BNGE_HWRM_H_ + +#include "../bnxt/bnxt_hsi.h" + +enum bnge_hwrm_ctx_flags { + BNGE_HWRM_INTERNAL_CTX_OWNED = BIT(0), + BNGE_HWRM_INTERNAL_RESP_DIRTY = BIT(1), + BNGE_HWRM_CTX_SILENT = BIT(2), + BNGE_HWRM_FULL_WAIT = BIT(3), +}; + +#define BNGE_HWRM_API_FLAGS (BNGE_HWRM_CTX_SILENT | BNGE_HWRM_FULL_WAIT) + +struct bnge_hwrm_ctx { + u64 sentinel; + dma_addr_t dma_handle; + struct output *resp; + struct input *req; + dma_addr_t slice_handle; + void *slice_addr; + u32 slice_size; + u32 req_len; + enum bnge_hwrm_ctx_flags flags; + unsigned int timeout; + u32 allocated; + gfp_t gfp; +}; + +enum bnge_hwrm_wait_state { + BNGE_HWRM_PENDING, + BNGE_HWRM_DEFERRED, + BNGE_HWRM_COMPLETE, + BNGE_HWRM_CANCELLED, +}; + +enum bnge_hwrm_chnl { BNGE_HWRM_CHNL_CHIMP, BNGE_HWRM_CHNL_KONG }; + +struct bnge_hwrm_wait_token { + struct rcu_head rcu; + struct hlist_node node; + enum bnge_hwrm_wait_state state; + enum bnge_hwrm_chnl dst; + u16 seq_id; +}; + +#define BNGE_DFLT_HWRM_CMD_TIMEOUT 500 + +#define BNGE_GRCPF_REG_CHIMP_COMM 0x0 +#define BNGE_GRCPF_REG_CHIMP_COMM_TRIGGER 0x100 + +#define BNGE_HWRM_MAX_REQ_LEN (bd->hwrm_max_req_len) +#define BNGE_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) +#define BNGE_HWRM_CMD_MAX_TIMEOUT 40000U +#define BNGE_SHORT_HWRM_CMD_TIMEOUT 20 +#define BNGE_HWRM_CMD_TIMEOUT (bd->hwrm_cmd_timeout) +#define BNGE_HWRM_RESET_TIMEOUT ((BNGE_HWRM_CMD_TIMEOUT) * 4) +#define BNGE_HWRM_TARGET 0xffff +#define BNGE_HWRM_NO_CMPL_RING -1 +#define BNGE_HWRM_REQ_MAX_SIZE 128 +#define BNGE_HWRM_DMA_SIZE (2 * PAGE_SIZE) /* space for req+resp */ +#define BNGE_HWRM_RESP_RESERVED PAGE_SIZE +#define BNGE_HWRM_RESP_OFFSET (BNGE_HWRM_DMA_SIZE - \ + BNGE_HWRM_RESP_RESERVED) +#define BNGE_HWRM_CTX_OFFSET (BNGE_HWRM_RESP_OFFSET - \ + sizeof(struct bnge_hwrm_ctx)) +#define BNGE_HWRM_DMA_ALIGN 16 +#define BNGE_HWRM_SENTINEL 0xb6e1f68a12e9a7eb /* arbitrary value */ +#define BNGE_HWRM_SHORT_MIN_TIMEOUT 3 +#define BNGE_HWRM_SHORT_MAX_TIMEOUT 10 +#define BNGE_HWRM_SHORT_TIMEOUT_COUNTER 5 + +#define BNGE_HWRM_MIN_TIMEOUT 25 +#define BNGE_HWRM_MAX_TIMEOUT 40 + +static inline unsigned int bnge_hwrm_timeout(unsigned int n) +{ + return n <= BNGE_HWRM_SHORT_TIMEOUT_COUNTER ? + n * BNGE_HWRM_SHORT_MIN_TIMEOUT : + BNGE_HWRM_SHORT_TIMEOUT_COUNTER * + BNGE_HWRM_SHORT_MIN_TIMEOUT + + (n - BNGE_HWRM_SHORT_TIMEOUT_COUNTER) * + BNGE_HWRM_MIN_TIMEOUT; +} + +#define BNGE_HWRM_FIN_WAIT_USEC 50000 + +void bnge_cleanup_hwrm_resources(struct bnge_dev *bd); +int bnge_init_hwrm_resources(struct bnge_dev *bd); + +int bnge_hwrm_req_create(struct bnge_dev *bd, void **req, u16 req_type, + u32 req_len); +#define bnge_hwrm_req_init(bd, req, req_type) \ + bnge_hwrm_req_create((bd), (void **)&(req), (req_type), \ + sizeof(*(req))) +void *bnge_hwrm_req_hold(struct bnge_dev *bd, void *req); +void bnge_hwrm_req_drop(struct bnge_dev *bd, void *req); +void bnge_hwrm_req_flags(struct bnge_dev *bd, void *req, + enum bnge_hwrm_ctx_flags flags); +void bnge_hwrm_req_timeout(struct bnge_dev *bd, void *req, + unsigned int timeout); +int bnge_hwrm_req_send(struct bnge_dev *bd, void *req); +int bnge_hwrm_req_send_silent(struct bnge_dev *bd, void *req); +void bnge_hwrm_req_alloc_flags(struct bnge_dev *bd, void *req, gfp_t flags); +void *bnge_hwrm_req_dma_slice(struct bnge_dev *bd, void *req, u32 size, + dma_addr_t *dma); +#endif /* _BNGE_HWRM_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c new file mode 100644 index 000000000000..19091318cfdd --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c @@ -0,0 +1,703 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2025 Broadcom. + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/pci.h> + +#include "bnge.h" +#include "../bnxt/bnxt_hsi.h" +#include "bnge_hwrm.h" +#include "bnge_hwrm_lib.h" +#include "bnge_rmem.h" +#include "bnge_resc.h" + +int bnge_hwrm_ver_get(struct bnge_dev *bd) +{ + u32 dev_caps_cfg, hwrm_ver, hwrm_spec_code; + u16 fw_maj, fw_min, fw_bld, fw_rsv; + struct hwrm_ver_get_output *resp; + struct hwrm_ver_get_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_VER_GET); + if (rc) + return rc; + + bnge_hwrm_req_flags(bd, req, BNGE_HWRM_FULL_WAIT); + bd->hwrm_max_req_len = HWRM_MAX_REQ_LEN; + req->hwrm_intf_maj = HWRM_VERSION_MAJOR; + req->hwrm_intf_min = HWRM_VERSION_MINOR; + req->hwrm_intf_upd = HWRM_VERSION_UPDATE; + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (rc) + goto hwrm_ver_get_exit; + + memcpy(&bd->ver_resp, resp, sizeof(struct hwrm_ver_get_output)); + + hwrm_spec_code = resp->hwrm_intf_maj_8b << 16 | + resp->hwrm_intf_min_8b << 8 | + resp->hwrm_intf_upd_8b; + hwrm_ver = HWRM_VERSION_MAJOR << 16 | HWRM_VERSION_MINOR << 8 | + HWRM_VERSION_UPDATE; + + if (hwrm_spec_code > hwrm_ver) + snprintf(bd->hwrm_ver_supp, FW_VER_STR_LEN, "%d.%d.%d", + HWRM_VERSION_MAJOR, HWRM_VERSION_MINOR, + HWRM_VERSION_UPDATE); + else + snprintf(bd->hwrm_ver_supp, FW_VER_STR_LEN, "%d.%d.%d", + resp->hwrm_intf_maj_8b, resp->hwrm_intf_min_8b, + resp->hwrm_intf_upd_8b); + + fw_maj = le16_to_cpu(resp->hwrm_fw_major); + fw_min = le16_to_cpu(resp->hwrm_fw_minor); + fw_bld = le16_to_cpu(resp->hwrm_fw_build); + fw_rsv = le16_to_cpu(resp->hwrm_fw_patch); + + bd->fw_ver_code = BNGE_FW_VER_CODE(fw_maj, fw_min, fw_bld, fw_rsv); + snprintf(bd->fw_ver_str, FW_VER_STR_LEN, "%d.%d.%d.%d", + fw_maj, fw_min, fw_bld, fw_rsv); + + if (strlen(resp->active_pkg_name)) { + int fw_ver_len = strlen(bd->fw_ver_str); + + snprintf(bd->fw_ver_str + fw_ver_len, + FW_VER_STR_LEN - fw_ver_len - 1, "/pkg %s", + resp->active_pkg_name); + bd->fw_cap |= BNGE_FW_CAP_PKG_VER; + } + + bd->hwrm_cmd_timeout = le16_to_cpu(resp->def_req_timeout); + if (!bd->hwrm_cmd_timeout) + bd->hwrm_cmd_timeout = BNGE_DFLT_HWRM_CMD_TIMEOUT; + bd->hwrm_cmd_max_timeout = le16_to_cpu(resp->max_req_timeout) * 1000; + if (!bd->hwrm_cmd_max_timeout) + bd->hwrm_cmd_max_timeout = BNGE_HWRM_CMD_MAX_TIMEOUT; + else if (bd->hwrm_cmd_max_timeout > BNGE_HWRM_CMD_MAX_TIMEOUT) + dev_warn(bd->dev, "Default HWRM commands max timeout increased to %d seconds\n", + bd->hwrm_cmd_max_timeout / 1000); + + bd->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len); + bd->hwrm_max_ext_req_len = le16_to_cpu(resp->max_ext_req_len); + + if (bd->hwrm_max_ext_req_len < HWRM_MAX_REQ_LEN) + bd->hwrm_max_ext_req_len = HWRM_MAX_REQ_LEN; + + bd->chip_num = le16_to_cpu(resp->chip_num); + bd->chip_rev = resp->chip_rev; + + dev_caps_cfg = le32_to_cpu(resp->dev_caps_cfg); + if ((dev_caps_cfg & VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_SUPPORTED) && + (dev_caps_cfg & VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_REQUIRED)) + bd->fw_cap |= BNGE_FW_CAP_SHORT_CMD; + + if (dev_caps_cfg & VER_GET_RESP_DEV_CAPS_CFG_KONG_MB_CHNL_SUPPORTED) + bd->fw_cap |= BNGE_FW_CAP_KONG_MB_CHNL; + + if (dev_caps_cfg & + VER_GET_RESP_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED) + bd->fw_cap |= BNGE_FW_CAP_CFA_ADV_FLOW; + +hwrm_ver_get_exit: + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int +bnge_hwrm_nvm_dev_info(struct bnge_dev *bd, + struct hwrm_nvm_get_dev_info_output *nvm_info) +{ + struct hwrm_nvm_get_dev_info_output *resp; + struct hwrm_nvm_get_dev_info_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_NVM_GET_DEV_INFO); + if (rc) + return rc; + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (!rc) + memcpy(nvm_info, resp, sizeof(*resp)); + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_func_reset(struct bnge_dev *bd) +{ + struct hwrm_func_reset_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_RESET); + if (rc) + return rc; + + req->enables = 0; + bnge_hwrm_req_timeout(bd, req, BNGE_HWRM_RESET_TIMEOUT); + return bnge_hwrm_req_send(bd, req); +} + +int bnge_hwrm_fw_set_time(struct bnge_dev *bd) +{ + struct hwrm_fw_set_time_input *req; + struct tm tm; + int rc; + + time64_to_tm(ktime_get_real_seconds(), 0, &tm); + + rc = bnge_hwrm_req_init(bd, req, HWRM_FW_SET_TIME); + if (rc) + return rc; + + req->year = cpu_to_le16(1900 + tm.tm_year); + req->month = 1 + tm.tm_mon; + req->day = tm.tm_mday; + req->hour = tm.tm_hour; + req->minute = tm.tm_min; + req->second = tm.tm_sec; + return bnge_hwrm_req_send(bd, req); +} + +int bnge_hwrm_func_drv_rgtr(struct bnge_dev *bd) +{ + struct hwrm_func_drv_rgtr_output *resp; + struct hwrm_func_drv_rgtr_input *req; + u32 flags; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_DRV_RGTR); + if (rc) + return rc; + + req->enables = cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE | + FUNC_DRV_RGTR_REQ_ENABLES_VER | + FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD); + + req->os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX); + flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE; + + req->flags = cpu_to_le32(flags); + req->ver_maj_8b = DRV_VER_MAJ; + req->ver_min_8b = DRV_VER_MIN; + req->ver_upd_8b = DRV_VER_UPD; + req->ver_maj = cpu_to_le16(DRV_VER_MAJ); + req->ver_min = cpu_to_le16(DRV_VER_MIN); + req->ver_upd = cpu_to_le16(DRV_VER_UPD); + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (!rc) { + set_bit(BNGE_STATE_DRV_REGISTERED, &bd->state); + if (resp->flags & + cpu_to_le32(FUNC_DRV_RGTR_RESP_FLAGS_IF_CHANGE_SUPPORTED)) + bd->fw_cap |= BNGE_FW_CAP_IF_CHANGE; + } + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_func_drv_unrgtr(struct bnge_dev *bd) +{ + struct hwrm_func_drv_unrgtr_input *req; + int rc; + + if (!test_and_clear_bit(BNGE_STATE_DRV_REGISTERED, &bd->state)) + return 0; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_DRV_UNRGTR); + if (rc) + return rc; + return bnge_hwrm_req_send(bd, req); +} + +static void bnge_init_ctx_initializer(struct bnge_ctx_mem_type *ctxm, + u8 init_val, u8 init_offset, + bool init_mask_set) +{ + ctxm->init_value = init_val; + ctxm->init_offset = BNGE_CTX_INIT_INVALID_OFFSET; + if (init_mask_set) + ctxm->init_offset = init_offset * 4; + else + ctxm->init_value = 0; +} + +static int bnge_alloc_all_ctx_pg_info(struct bnge_dev *bd, int ctx_max) +{ + struct bnge_ctx_mem_info *ctx = bd->ctx; + u16 type; + + for (type = 0; type < ctx_max; type++) { + struct bnge_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; + int n = 1; + + if (!ctxm->max_entries) + continue; + + if (ctxm->instance_bmap) + n = hweight32(ctxm->instance_bmap); + ctxm->pg_info = kcalloc(n, sizeof(*ctxm->pg_info), GFP_KERNEL); + if (!ctxm->pg_info) + return -ENOMEM; + } + + return 0; +} + +#define BNGE_CTX_INIT_VALID(flags) \ + (!!((flags) & \ + FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT)) + +int bnge_hwrm_func_backing_store_qcaps(struct bnge_dev *bd) +{ + struct hwrm_func_backing_store_qcaps_v2_output *resp; + struct hwrm_func_backing_store_qcaps_v2_input *req; + struct bnge_ctx_mem_info *ctx; + u16 type; + int rc; + + if (bd->ctx) + return 0; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_BACKING_STORE_QCAPS_V2); + if (rc) + return rc; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + bd->ctx = ctx; + + resp = bnge_hwrm_req_hold(bd, req); + + for (type = 0; type < BNGE_CTX_V2_MAX; ) { + struct bnge_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; + u8 init_val, init_off, i; + __le32 *p; + u32 flags; + + req->type = cpu_to_le16(type); + rc = bnge_hwrm_req_send(bd, req); + if (rc) + goto ctx_done; + flags = le32_to_cpu(resp->flags); + type = le16_to_cpu(resp->next_valid_type); + if (!(flags & + FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID)) + continue; + + ctxm->type = le16_to_cpu(resp->type); + ctxm->entry_size = le16_to_cpu(resp->entry_size); + ctxm->flags = flags; + ctxm->instance_bmap = le32_to_cpu(resp->instance_bit_map); + ctxm->entry_multiple = resp->entry_multiple; + ctxm->max_entries = le32_to_cpu(resp->max_num_entries); + ctxm->min_entries = le32_to_cpu(resp->min_num_entries); + init_val = resp->ctx_init_value; + init_off = resp->ctx_init_offset; + bnge_init_ctx_initializer(ctxm, init_val, init_off, + BNGE_CTX_INIT_VALID(flags)); + ctxm->split_entry_cnt = min_t(u8, resp->subtype_valid_cnt, + BNGE_MAX_SPLIT_ENTRY); + for (i = 0, p = &resp->split_entry_0; i < ctxm->split_entry_cnt; + i++, p++) + ctxm->split[i] = le32_to_cpu(*p); + } + rc = bnge_alloc_all_ctx_pg_info(bd, BNGE_CTX_V2_MAX); + +ctx_done: + bnge_hwrm_req_drop(bd, req); + return rc; +} + +static void bnge_hwrm_set_pg_attr(struct bnge_ring_mem_info *rmem, u8 *pg_attr, + __le64 *pg_dir) +{ + if (!rmem->nr_pages) + return; + + BNGE_SET_CTX_PAGE_ATTR(*pg_attr); + if (rmem->depth >= 1) { + if (rmem->depth == 2) + *pg_attr |= 2; + else + *pg_attr |= 1; + *pg_dir = cpu_to_le64(rmem->dma_pg_tbl); + } else { + *pg_dir = cpu_to_le64(rmem->dma_arr[0]); + } +} + +int bnge_hwrm_func_backing_store(struct bnge_dev *bd, + struct bnge_ctx_mem_type *ctxm, + bool last) +{ + struct hwrm_func_backing_store_cfg_v2_input *req; + u32 instance_bmap = ctxm->instance_bmap; + int i, j, rc = 0, n = 1; + __le32 *p; + + if (!(ctxm->flags & BNGE_CTX_MEM_TYPE_VALID) || !ctxm->pg_info) + return 0; + + if (instance_bmap) + n = hweight32(ctxm->instance_bmap); + else + instance_bmap = 1; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_BACKING_STORE_CFG_V2); + if (rc) + return rc; + bnge_hwrm_req_hold(bd, req); + req->type = cpu_to_le16(ctxm->type); + req->entry_size = cpu_to_le16(ctxm->entry_size); + req->subtype_valid_cnt = ctxm->split_entry_cnt; + for (i = 0, p = &req->split_entry_0; i < ctxm->split_entry_cnt; i++) + p[i] = cpu_to_le32(ctxm->split[i]); + for (i = 0, j = 0; j < n && !rc; i++) { + struct bnge_ctx_pg_info *ctx_pg; + + if (!(instance_bmap & (1 << i))) + continue; + req->instance = cpu_to_le16(i); + ctx_pg = &ctxm->pg_info[j++]; + if (!ctx_pg->entries) + continue; + req->num_entries = cpu_to_le32(ctx_pg->entries); + bnge_hwrm_set_pg_attr(&ctx_pg->ring_mem, + &req->page_size_pbl_level, + &req->page_dir); + if (last && j == n) + req->flags = + cpu_to_le32(BNGE_BS_CFG_ALL_DONE); + rc = bnge_hwrm_req_send(bd, req); + } + bnge_hwrm_req_drop(bd, req); + + return rc; +} + +static int bnge_hwrm_get_rings(struct bnge_dev *bd) +{ + struct bnge_hw_resc *hw_resc = &bd->hw_resc; + struct hwrm_func_qcfg_output *resp; + struct hwrm_func_qcfg_input *req; + u16 cp, stats; + u16 rx, tx; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_QCFG); + if (rc) + return rc; + + req->fid = cpu_to_le16(0xffff); + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (rc) { + bnge_hwrm_req_drop(bd, req); + return rc; + } + + hw_resc->resv_tx_rings = le16_to_cpu(resp->alloc_tx_rings); + hw_resc->resv_rx_rings = le16_to_cpu(resp->alloc_rx_rings); + hw_resc->resv_hw_ring_grps = + le32_to_cpu(resp->alloc_hw_ring_grps); + hw_resc->resv_vnics = le16_to_cpu(resp->alloc_vnics); + hw_resc->resv_rsscos_ctxs = le16_to_cpu(resp->alloc_rsscos_ctx); + cp = le16_to_cpu(resp->alloc_cmpl_rings); + stats = le16_to_cpu(resp->alloc_stat_ctx); + hw_resc->resv_irqs = cp; + rx = hw_resc->resv_rx_rings; + tx = hw_resc->resv_tx_rings; + if (bnge_is_agg_reqd(bd)) + rx >>= 1; + if (cp < (rx + tx)) { + rc = bnge_fix_rings_count(&rx, &tx, cp, false); + if (rc) + goto get_rings_exit; + if (bnge_is_agg_reqd(bd)) + rx <<= 1; + hw_resc->resv_rx_rings = rx; + hw_resc->resv_tx_rings = tx; + } + hw_resc->resv_irqs = le16_to_cpu(resp->alloc_msix); + hw_resc->resv_hw_ring_grps = rx; + hw_resc->resv_cp_rings = cp; + hw_resc->resv_stat_ctxs = stats; + +get_rings_exit: + bnge_hwrm_req_drop(bd, req); + return rc; +} + +static struct hwrm_func_cfg_input * +__bnge_hwrm_reserve_pf_rings(struct bnge_dev *bd, struct bnge_hw_rings *hwr) +{ + struct hwrm_func_cfg_input *req; + u32 enables = 0; + + if (bnge_hwrm_req_init(bd, req, HWRM_FUNC_QCFG)) + return NULL; + + req->fid = cpu_to_le16(0xffff); + enables |= hwr->tx ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; + req->num_tx_rings = cpu_to_le16(hwr->tx); + + enables |= hwr->rx ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; + enables |= hwr->stat ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + enables |= hwr->nq ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0; + enables |= hwr->cmpl ? FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; + enables |= hwr->vnic ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0; + enables |= hwr->rss_ctx ? FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; + + req->num_rx_rings = cpu_to_le16(hwr->rx); + req->num_rsscos_ctxs = cpu_to_le16(hwr->rss_ctx); + req->num_cmpl_rings = cpu_to_le16(hwr->cmpl); + req->num_msix = cpu_to_le16(hwr->nq); + req->num_stat_ctxs = cpu_to_le16(hwr->stat); + req->num_vnics = cpu_to_le16(hwr->vnic); + req->enables = cpu_to_le32(enables); + + return req; +} + +static int +bnge_hwrm_reserve_pf_rings(struct bnge_dev *bd, struct bnge_hw_rings *hwr) +{ + struct hwrm_func_cfg_input *req; + int rc; + + req = __bnge_hwrm_reserve_pf_rings(bd, hwr); + if (!req) + return -ENOMEM; + + if (!req->enables) { + bnge_hwrm_req_drop(bd, req); + return 0; + } + + rc = bnge_hwrm_req_send(bd, req); + if (rc) + return rc; + + return bnge_hwrm_get_rings(bd); +} + +int bnge_hwrm_reserve_rings(struct bnge_dev *bd, struct bnge_hw_rings *hwr) +{ + return bnge_hwrm_reserve_pf_rings(bd, hwr); +} + +int bnge_hwrm_func_qcfg(struct bnge_dev *bd) +{ + struct hwrm_func_qcfg_output *resp; + struct hwrm_func_qcfg_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_QCFG); + if (rc) + return rc; + + req->fid = cpu_to_le16(0xffff); + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (rc) + goto func_qcfg_exit; + + bd->max_mtu = le16_to_cpu(resp->max_mtu_configured); + if (!bd->max_mtu) + bd->max_mtu = BNGE_MAX_MTU; + + if (bd->db_size) + goto func_qcfg_exit; + + bd->db_offset = le16_to_cpu(resp->legacy_l2_db_size_kb) * 1024; + bd->db_size = PAGE_ALIGN(le16_to_cpu(resp->l2_doorbell_bar_size_kb) * + 1024); + if (!bd->db_size || bd->db_size > pci_resource_len(bd->pdev, 2) || + bd->db_size <= bd->db_offset) + bd->db_size = pci_resource_len(bd->pdev, 2); + +func_qcfg_exit: + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_func_resc_qcaps(struct bnge_dev *bd) +{ + struct hwrm_func_resource_qcaps_output *resp; + struct bnge_hw_resc *hw_resc = &bd->hw_resc; + struct hwrm_func_resource_qcaps_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_RESOURCE_QCAPS); + if (rc) + return rc; + + req->fid = cpu_to_le16(0xffff); + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send_silent(bd, req); + if (rc) + goto hwrm_func_resc_qcaps_exit; + + hw_resc->max_tx_sch_inputs = le16_to_cpu(resp->max_tx_scheduler_inputs); + hw_resc->min_rsscos_ctxs = le16_to_cpu(resp->min_rsscos_ctx); + hw_resc->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); + hw_resc->min_cp_rings = le16_to_cpu(resp->min_cmpl_rings); + hw_resc->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); + hw_resc->min_tx_rings = le16_to_cpu(resp->min_tx_rings); + hw_resc->max_tx_rings = le16_to_cpu(resp->max_tx_rings); + hw_resc->min_rx_rings = le16_to_cpu(resp->min_rx_rings); + hw_resc->max_rx_rings = le16_to_cpu(resp->max_rx_rings); + hw_resc->min_hw_ring_grps = le16_to_cpu(resp->min_hw_ring_grps); + hw_resc->max_hw_ring_grps = le16_to_cpu(resp->max_hw_ring_grps); + hw_resc->min_l2_ctxs = le16_to_cpu(resp->min_l2_ctxs); + hw_resc->max_l2_ctxs = le16_to_cpu(resp->max_l2_ctxs); + hw_resc->min_vnics = le16_to_cpu(resp->min_vnics); + hw_resc->max_vnics = le16_to_cpu(resp->max_vnics); + hw_resc->min_stat_ctxs = le16_to_cpu(resp->min_stat_ctx); + hw_resc->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); + + hw_resc->max_nqs = le16_to_cpu(resp->max_msix); + hw_resc->max_hw_ring_grps = hw_resc->max_rx_rings; + +hwrm_func_resc_qcaps_exit: + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_func_qcaps(struct bnge_dev *bd) +{ + struct hwrm_func_qcaps_output *resp; + struct hwrm_func_qcaps_input *req; + struct bnge_pf_info *pf = &bd->pf; + u32 flags; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_QCAPS); + if (rc) + return rc; + + req->fid = cpu_to_le16(0xffff); + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (rc) + goto hwrm_func_qcaps_exit; + + flags = le32_to_cpu(resp->flags); + if (flags & FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED) + bd->flags |= BNGE_EN_ROCE_V1; + if (flags & FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED) + bd->flags |= BNGE_EN_ROCE_V2; + + pf->fw_fid = le16_to_cpu(resp->fid); + pf->port_id = le16_to_cpu(resp->port_id); + memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN); + + bd->tso_max_segs = le16_to_cpu(resp->max_tso_segs); + +hwrm_func_qcaps_exit: + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_vnic_qcaps(struct bnge_dev *bd) +{ + struct hwrm_vnic_qcaps_output *resp; + struct hwrm_vnic_qcaps_input *req; + int rc; + + bd->hw_ring_stats_size = sizeof(struct ctx_hw_stats); + bd->rss_cap &= ~BNGE_RSS_CAP_NEW_RSS_CAP; + + rc = bnge_hwrm_req_init(bd, req, HWRM_VNIC_QCAPS); + if (rc) + return rc; + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (!rc) { + u32 flags = le32_to_cpu(resp->flags); + + if (flags & VNIC_QCAPS_RESP_FLAGS_VLAN_STRIP_CAP) + bd->fw_cap |= BNGE_FW_CAP_VLAN_RX_STRIP; + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_HASH_TYPE_DELTA_CAP) + bd->rss_cap |= BNGE_RSS_CAP_RSS_HASH_TYPE_DELTA; + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_PROF_TCAM_MODE_ENABLED) + bd->rss_cap |= BNGE_RSS_CAP_RSS_TCAM; + bd->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported); + if (bd->max_tpa_v2) + bd->hw_ring_stats_size = BNGE_RING_STATS_SIZE; + if (flags & VNIC_QCAPS_RESP_FLAGS_HW_TUNNEL_TPA_CAP) + bd->fw_cap |= BNGE_FW_CAP_VNIC_TUNNEL_TPA; + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_AH_SPI_IPV4_CAP) + bd->rss_cap |= BNGE_RSS_CAP_AH_V4_RSS_CAP; + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_AH_SPI_IPV6_CAP) + bd->rss_cap |= BNGE_RSS_CAP_AH_V6_RSS_CAP; + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV4_CAP) + bd->rss_cap |= BNGE_RSS_CAP_ESP_V4_RSS_CAP; + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP) + bd->rss_cap |= BNGE_RSS_CAP_ESP_V6_RSS_CAP; + } + bnge_hwrm_req_drop(bd, req); + + return rc; +} + +#define BNGE_CNPQ(q_profile) \ + ((q_profile) == \ + QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY_ROCE_CNP) + +int bnge_hwrm_queue_qportcfg(struct bnge_dev *bd) +{ + struct hwrm_queue_qportcfg_output *resp; + struct hwrm_queue_qportcfg_input *req; + u8 i, j, *qptr; + bool no_rdma; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_QUEUE_QPORTCFG); + if (rc) + return rc; + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (rc) + goto qportcfg_exit; + + if (!resp->max_configurable_queues) { + rc = -EINVAL; + goto qportcfg_exit; + } + bd->max_tc = resp->max_configurable_queues; + bd->max_lltc = resp->max_configurable_lossless_queues; + if (bd->max_tc > BNGE_MAX_QUEUE) + bd->max_tc = BNGE_MAX_QUEUE; + + no_rdma = !bnge_is_roce_en(bd); + qptr = &resp->queue_id0; + for (i = 0, j = 0; i < bd->max_tc; i++) { + bd->q_info[j].queue_id = *qptr; + bd->q_ids[i] = *qptr++; + bd->q_info[j].queue_profile = *qptr++; + bd->tc_to_qidx[j] = j; + if (!BNGE_CNPQ(bd->q_info[j].queue_profile) || no_rdma) + j++; + } + bd->max_q = bd->max_tc; + bd->max_tc = max_t(u8, j, 1); + + if (resp->queue_cfg_info & QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG) + bd->max_tc = 1; + + if (bd->max_lltc > bd->max_tc) + bd->max_lltc = bd->max_tc; + +qportcfg_exit: + bnge_hwrm_req_drop(bd, req); + return rc; +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h new file mode 100644 index 000000000000..6c03923eb559 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_HWRM_LIB_H_ +#define _BNGE_HWRM_LIB_H_ + +int bnge_hwrm_ver_get(struct bnge_dev *bd); +int bnge_hwrm_func_reset(struct bnge_dev *bd); +int bnge_hwrm_fw_set_time(struct bnge_dev *bd); +int bnge_hwrm_func_drv_rgtr(struct bnge_dev *bd); +int bnge_hwrm_func_drv_unrgtr(struct bnge_dev *bd); +int bnge_hwrm_vnic_qcaps(struct bnge_dev *bd); +int bnge_hwrm_nvm_dev_info(struct bnge_dev *bd, + struct hwrm_nvm_get_dev_info_output *nvm_dev_info); +int bnge_hwrm_func_backing_store(struct bnge_dev *bd, + struct bnge_ctx_mem_type *ctxm, + bool last); +int bnge_hwrm_func_backing_store_qcaps(struct bnge_dev *bd); +int bnge_hwrm_reserve_rings(struct bnge_dev *bd, + struct bnge_hw_rings *hwr); +int bnge_hwrm_func_qcaps(struct bnge_dev *bd); +int bnge_hwrm_vnic_qcaps(struct bnge_dev *bd); +int bnge_hwrm_func_qcfg(struct bnge_dev *bd); +int bnge_hwrm_func_resc_qcaps(struct bnge_dev *bd); +int bnge_hwrm_queue_qportcfg(struct bnge_dev *bd); + +#endif /* _BNGE_HWRM_LIB_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c new file mode 100644 index 000000000000..02254934f3d0 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2025 Broadcom. + +#include <asm/byteorder.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if.h> +#include <net/ip.h> +#include <linux/skbuff.h> + +#include "bnge.h" +#include "bnge_hwrm_lib.h" +#include "bnge_ethtool.h" + +static netdev_tx_t bnge_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; +} + +static int bnge_open(struct net_device *dev) +{ + return 0; +} + +static int bnge_close(struct net_device *dev) +{ + return 0; +} + +static const struct net_device_ops bnge_netdev_ops = { + .ndo_open = bnge_open, + .ndo_stop = bnge_close, + .ndo_start_xmit = bnge_start_xmit, +}; + +static void bnge_init_mac_addr(struct bnge_dev *bd) +{ + eth_hw_addr_set(bd->netdev, bd->pf.mac_addr); +} + +static void bnge_set_tpa_flags(struct bnge_dev *bd) +{ + struct bnge_net *bn = netdev_priv(bd->netdev); + + bn->priv_flags &= ~BNGE_NET_EN_TPA; + + if (bd->netdev->features & NETIF_F_LRO) + bn->priv_flags |= BNGE_NET_EN_LRO; + else if (bd->netdev->features & NETIF_F_GRO_HW) + bn->priv_flags |= BNGE_NET_EN_GRO; +} + +static void bnge_init_l2_fltr_tbl(struct bnge_net *bn) +{ + int i; + + for (i = 0; i < BNGE_L2_FLTR_HASH_SIZE; i++) + INIT_HLIST_HEAD(&bn->l2_fltr_hash_tbl[i]); + get_random_bytes(&bn->hash_seed, sizeof(bn->hash_seed)); +} + +void bnge_set_ring_params(struct bnge_dev *bd) +{ + struct bnge_net *bn = netdev_priv(bd->netdev); + u32 ring_size, rx_size, rx_space, max_rx_cmpl; + u32 agg_factor = 0, agg_ring_size = 0; + + /* 8 for CRC and VLAN */ + rx_size = SKB_DATA_ALIGN(bn->netdev->mtu + ETH_HLEN + NET_IP_ALIGN + 8); + + rx_space = rx_size + ALIGN(NET_SKB_PAD, 8) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + bn->rx_copy_thresh = BNGE_RX_COPY_THRESH; + ring_size = bn->rx_ring_size; + bn->rx_agg_ring_size = 0; + bn->rx_agg_nr_pages = 0; + + if (bn->priv_flags & BNGE_NET_EN_TPA) + agg_factor = min_t(u32, 4, 65536 / BNGE_RX_PAGE_SIZE); + + bn->priv_flags &= ~BNGE_NET_EN_JUMBO; + if (rx_space > PAGE_SIZE) { + u32 jumbo_factor; + + bn->priv_flags |= BNGE_NET_EN_JUMBO; + jumbo_factor = PAGE_ALIGN(bn->netdev->mtu - 40) >> PAGE_SHIFT; + if (jumbo_factor > agg_factor) + agg_factor = jumbo_factor; + } + if (agg_factor) { + if (ring_size > BNGE_MAX_RX_DESC_CNT_JUM_ENA) { + ring_size = BNGE_MAX_RX_DESC_CNT_JUM_ENA; + netdev_warn(bn->netdev, "RX ring size reduced from %d to %d due to jumbo ring\n", + bn->rx_ring_size, ring_size); + bn->rx_ring_size = ring_size; + } + agg_ring_size = ring_size * agg_factor; + + bn->rx_agg_nr_pages = bnge_adjust_pow_two(agg_ring_size, + RX_DESC_CNT); + if (bn->rx_agg_nr_pages > MAX_RX_AGG_PAGES) { + u32 tmp = agg_ring_size; + + bn->rx_agg_nr_pages = MAX_RX_AGG_PAGES; + agg_ring_size = MAX_RX_AGG_PAGES * RX_DESC_CNT - 1; + netdev_warn(bn->netdev, "RX agg ring size %d reduced to %d.\n", + tmp, agg_ring_size); + } + bn->rx_agg_ring_size = agg_ring_size; + bn->rx_agg_ring_mask = (bn->rx_agg_nr_pages * RX_DESC_CNT) - 1; + + rx_size = SKB_DATA_ALIGN(BNGE_RX_COPY_THRESH + NET_IP_ALIGN); + rx_space = rx_size + NET_SKB_PAD + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + + bn->rx_buf_use_size = rx_size; + bn->rx_buf_size = rx_space; + + bn->rx_nr_pages = bnge_adjust_pow_two(ring_size, RX_DESC_CNT); + bn->rx_ring_mask = (bn->rx_nr_pages * RX_DESC_CNT) - 1; + + ring_size = bn->tx_ring_size; + bn->tx_nr_pages = bnge_adjust_pow_two(ring_size, TX_DESC_CNT); + bn->tx_ring_mask = (bn->tx_nr_pages * TX_DESC_CNT) - 1; + + max_rx_cmpl = bn->rx_ring_size; + + if (bn->priv_flags & BNGE_NET_EN_TPA) + max_rx_cmpl += bd->max_tpa_v2; + ring_size = max_rx_cmpl * 2 + agg_ring_size + bn->tx_ring_size; + bn->cp_ring_size = ring_size; + + bn->cp_nr_pages = bnge_adjust_pow_two(ring_size, CP_DESC_CNT); + if (bn->cp_nr_pages > MAX_CP_PAGES) { + bn->cp_nr_pages = MAX_CP_PAGES; + bn->cp_ring_size = MAX_CP_PAGES * CP_DESC_CNT - 1; + netdev_warn(bn->netdev, "completion ring size %d reduced to %d.\n", + ring_size, bn->cp_ring_size); + } + bn->cp_bit = bn->cp_nr_pages * CP_DESC_CNT; + bn->cp_ring_mask = bn->cp_bit - 1; +} + +int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) +{ + struct net_device *netdev; + struct bnge_net *bn; + int rc; + + netdev = alloc_etherdev_mqs(sizeof(*bn), max_irqs * BNGE_MAX_QUEUE, + max_irqs); + if (!netdev) + return -ENOMEM; + + SET_NETDEV_DEV(netdev, bd->dev); + bd->netdev = netdev; + + netdev->netdev_ops = &bnge_netdev_ops; + + bnge_set_ethtool_ops(netdev); + + bn = netdev_priv(netdev); + bn->netdev = netdev; + bn->bd = bd; + + netdev->min_mtu = ETH_ZLEN; + netdev->max_mtu = bd->max_mtu; + + netdev->hw_features = NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_SG | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_GRE | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_PARTIAL | + NETIF_F_RXHASH | + NETIF_F_RXCSUM | + NETIF_F_GRO; + + if (bd->flags & BNGE_EN_UDP_GSO_SUPP) + netdev->hw_features |= NETIF_F_GSO_UDP_L4; + + if (BNGE_SUPPORTS_TPA(bd)) + netdev->hw_features |= NETIF_F_LRO; + + netdev->hw_enc_features = NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_SG | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_PARTIAL; + + if (bd->flags & BNGE_EN_UDP_GSO_SUPP) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_L4; + + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_GRE_CSUM; + + netdev->vlan_features = netdev->hw_features | NETIF_F_HIGHDMA; + if (bd->fw_cap & BNGE_FW_CAP_VLAN_RX_STRIP) + netdev->hw_features |= BNGE_HW_FEATURE_VLAN_ALL_RX; + if (bd->fw_cap & BNGE_FW_CAP_VLAN_TX_INSERT) + netdev->hw_features |= BNGE_HW_FEATURE_VLAN_ALL_TX; + + if (BNGE_SUPPORTS_TPA(bd)) + netdev->hw_features |= NETIF_F_GRO_HW; + + netdev->features |= netdev->hw_features | NETIF_F_HIGHDMA; + + if (netdev->features & NETIF_F_GRO_HW) + netdev->features &= ~NETIF_F_LRO; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + netif_set_tso_max_size(netdev, GSO_MAX_SIZE); + if (bd->tso_max_segs) + netif_set_tso_max_segs(netdev, bd->tso_max_segs); + + bn->rx_ring_size = BNGE_DEFAULT_RX_RING_SIZE; + bn->tx_ring_size = BNGE_DEFAULT_TX_RING_SIZE; + + bnge_set_tpa_flags(bd); + bnge_set_ring_params(bd); + + bnge_init_l2_fltr_tbl(bn); + bnge_init_mac_addr(bd); + + rc = register_netdev(netdev); + if (rc) { + dev_err(bd->dev, "Register netdev failed rc: %d\n", rc); + goto err_netdev; + } + + return 0; + +err_netdev: + free_netdev(netdev); + return rc; +} + +void bnge_netdev_free(struct bnge_dev *bd) +{ + struct net_device *netdev = bd->netdev; + + unregister_netdev(netdev); + free_netdev(netdev); + bd->netdev = NULL; +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h new file mode 100644 index 000000000000..96b77e44b552 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_NETDEV_H_ +#define _BNGE_NETDEV_H_ + +#include "../bnxt/bnxt_hsi.h" + +struct tx_bd { + __le32 tx_bd_len_flags_type; + #define TX_BD_TYPE (0x3f << 0) + #define TX_BD_TYPE_SHORT_TX_BD (0x00 << 0) + #define TX_BD_TYPE_LONG_TX_BD (0x10 << 0) + #define TX_BD_FLAGS_PACKET_END (1 << 6) + #define TX_BD_FLAGS_NO_CMPL (1 << 7) + #define TX_BD_FLAGS_BD_CNT (0x1f << 8) + #define TX_BD_FLAGS_BD_CNT_SHIFT 8 + #define TX_BD_FLAGS_LHINT (3 << 13) + #define TX_BD_FLAGS_LHINT_SHIFT 13 + #define TX_BD_FLAGS_LHINT_512_AND_SMALLER (0 << 13) + #define TX_BD_FLAGS_LHINT_512_TO_1023 (1 << 13) + #define TX_BD_FLAGS_LHINT_1024_TO_2047 (2 << 13) + #define TX_BD_FLAGS_LHINT_2048_AND_LARGER (3 << 13) + #define TX_BD_FLAGS_COAL_NOW (1 << 15) + #define TX_BD_LEN (0xffff << 16) + #define TX_BD_LEN_SHIFT 16 + u32 tx_bd_opaque; + __le64 tx_bd_haddr; +} __packed; + +struct rx_bd { + __le32 rx_bd_len_flags_type; + #define RX_BD_TYPE (0x3f << 0) + #define RX_BD_TYPE_RX_PACKET_BD 0x4 + #define RX_BD_TYPE_RX_BUFFER_BD 0x5 + #define RX_BD_TYPE_RX_AGG_BD 0x6 + #define RX_BD_TYPE_16B_BD_SIZE (0 << 4) + #define RX_BD_TYPE_32B_BD_SIZE (1 << 4) + #define RX_BD_TYPE_48B_BD_SIZE (2 << 4) + #define RX_BD_TYPE_64B_BD_SIZE (3 << 4) + #define RX_BD_FLAGS_SOP (1 << 6) + #define RX_BD_FLAGS_EOP (1 << 7) + #define RX_BD_FLAGS_BUFFERS (3 << 8) + #define RX_BD_FLAGS_1_BUFFER_PACKET (0 << 8) + #define RX_BD_FLAGS_2_BUFFER_PACKET (1 << 8) + #define RX_BD_FLAGS_3_BUFFER_PACKET (2 << 8) + #define RX_BD_FLAGS_4_BUFFER_PACKET (3 << 8) + #define RX_BD_LEN (0xffff << 16) + #define RX_BD_LEN_SHIFT 16 + u32 rx_bd_opaque; + __le64 rx_bd_haddr; +}; + +struct tx_cmp { + __le32 tx_cmp_flags_type; + #define CMP_TYPE (0x3f << 0) + #define CMP_TYPE_TX_L2_CMP 0 + #define CMP_TYPE_TX_L2_COAL_CMP 2 + #define CMP_TYPE_TX_L2_PKT_TS_CMP 4 + #define CMP_TYPE_RX_L2_CMP 17 + #define CMP_TYPE_RX_AGG_CMP 18 + #define CMP_TYPE_RX_L2_TPA_START_CMP 19 + #define CMP_TYPE_RX_L2_TPA_END_CMP 21 + #define CMP_TYPE_RX_TPA_AGG_CMP 22 + #define CMP_TYPE_RX_L2_V3_CMP 23 + #define CMP_TYPE_RX_L2_TPA_START_V3_CMP 25 + #define CMP_TYPE_STATUS_CMP 32 + #define CMP_TYPE_REMOTE_DRIVER_REQ 34 + #define CMP_TYPE_REMOTE_DRIVER_RESP 36 + #define CMP_TYPE_ERROR_STATUS 48 + #define CMPL_BASE_TYPE_STAT_EJECT 0x1aUL + #define CMPL_BASE_TYPE_HWRM_DONE 0x20UL + #define CMPL_BASE_TYPE_HWRM_FWD_REQ 0x22UL + #define CMPL_BASE_TYPE_HWRM_FWD_RESP 0x24UL + #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define TX_CMP_FLAGS_ERROR (1 << 6) + #define TX_CMP_FLAGS_PUSH (1 << 7) + u32 tx_cmp_opaque; + __le32 tx_cmp_errors_v; + #define TX_CMP_V (1 << 0) + #define TX_CMP_ERRORS_BUFFER_ERROR (7 << 1) + #define TX_CMP_ERRORS_BUFFER_ERROR_NO_ERROR 0 + #define TX_CMP_ERRORS_BUFFER_ERROR_BAD_FORMAT 2 + #define TX_CMP_ERRORS_BUFFER_ERROR_INVALID_STAG 4 + #define TX_CMP_ERRORS_BUFFER_ERROR_STAG_BOUNDS 5 + #define TX_CMP_ERRORS_ZERO_LENGTH_PKT (1 << 4) + #define TX_CMP_ERRORS_EXCESSIVE_BD_LEN (1 << 5) + #define TX_CMP_ERRORS_DMA_ERROR (1 << 6) + #define TX_CMP_ERRORS_HINT_TOO_SHORT (1 << 7) + __le32 sq_cons_idx; + #define TX_CMP_SQ_CONS_IDX_MASK 0x00ffffff +}; + +struct bnge_sw_tx_bd { + struct sk_buff *skb; + DEFINE_DMA_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_LEN(len); + struct page *page; + u8 is_ts_pkt; + u8 is_push; + u8 action; + unsigned short nr_frags; + union { + u16 rx_prod; + u16 txts_prod; + }; +}; + +struct bnge_sw_rx_bd { + void *data; + u8 *data_ptr; + dma_addr_t mapping; +}; + +struct bnge_sw_rx_agg_bd { + struct page *page; + unsigned int offset; + dma_addr_t mapping; +}; + +#define BNGE_RX_COPY_THRESH 256 + +#define BNGE_HW_FEATURE_VLAN_ALL_RX \ + (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX) +#define BNGE_HW_FEATURE_VLAN_ALL_TX \ + (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX) + +enum { + BNGE_NET_EN_GRO = BIT(0), + BNGE_NET_EN_LRO = BIT(1), + BNGE_NET_EN_JUMBO = BIT(2), +}; + +#define BNGE_NET_EN_TPA (BNGE_NET_EN_GRO | BNGE_NET_EN_LRO) + +struct bnge_net { + struct bnge_dev *bd; + struct net_device *netdev; + + u32 priv_flags; + + u32 rx_ring_size; + u32 rx_buf_size; + u32 rx_buf_use_size; /* usable size */ + u32 rx_agg_ring_size; + u32 rx_copy_thresh; + u32 rx_ring_mask; + u32 rx_agg_ring_mask; + u16 rx_nr_pages; + u16 rx_agg_nr_pages; + + u32 tx_ring_size; + u32 tx_ring_mask; + u16 tx_nr_pages; + + /* NQs and Completion rings */ + u32 cp_ring_size; + u32 cp_ring_mask; + u32 cp_bit; + u16 cp_nr_pages; + +#define BNGE_L2_FLTR_HASH_SIZE 32 +#define BNGE_L2_FLTR_HASH_MASK (BNGE_L2_FLTR_HASH_SIZE - 1) + struct hlist_head l2_fltr_hash_tbl[BNGE_L2_FLTR_HASH_SIZE]; + u32 hash_seed; + u64 toeplitz_prefix; +}; + +#define BNGE_DEFAULT_RX_RING_SIZE 511 +#define BNGE_DEFAULT_TX_RING_SIZE 511 + +int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs); +void bnge_netdev_free(struct bnge_dev *bd); +void bnge_set_ring_params(struct bnge_dev *bd); + +#if (BNGE_PAGE_SHIFT == 16) +#define MAX_RX_PAGES_AGG_ENA 1 +#define MAX_RX_PAGES 4 +#define MAX_RX_AGG_PAGES 4 +#define MAX_TX_PAGES 1 +#define MAX_CP_PAGES 16 +#else +#define MAX_RX_PAGES_AGG_ENA 8 +#define MAX_RX_PAGES 32 +#define MAX_RX_AGG_PAGES 32 +#define MAX_TX_PAGES 8 +#define MAX_CP_PAGES 128 +#endif + +#define BNGE_RX_PAGE_SIZE (1 << BNGE_RX_PAGE_SHIFT) + +#define RX_DESC_CNT (BNGE_PAGE_SIZE / sizeof(struct rx_bd)) +#define TX_DESC_CNT (BNGE_PAGE_SIZE / sizeof(struct tx_bd)) +#define CP_DESC_CNT (BNGE_PAGE_SIZE / sizeof(struct tx_cmp)) +#define SW_RXBD_RING_SIZE (sizeof(struct bnge_sw_rx_bd) * RX_DESC_CNT) +#define HW_RXBD_RING_SIZE (sizeof(struct rx_bd) * RX_DESC_CNT) +#define SW_RXBD_AGG_RING_SIZE (sizeof(struct bnge_sw_rx_agg_bd) * RX_DESC_CNT) +#define SW_TXBD_RING_SIZE (sizeof(struct bnge_sw_tx_bd) * TX_DESC_CNT) +#define HW_TXBD_RING_SIZE (sizeof(struct tx_bd) * TX_DESC_CNT) +#define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT) +#define BNGE_MAX_RX_DESC_CNT (RX_DESC_CNT * MAX_RX_PAGES - 1) +#define BNGE_MAX_RX_DESC_CNT_JUM_ENA (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1) +#define BNGE_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1) +#define BNGE_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1) + +#endif /* _BNGE_NETDEV_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_resc.c b/drivers/net/ethernet/broadcom/bnge/bnge_resc.c new file mode 100644 index 000000000000..c79a3607a1b7 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_resc.c @@ -0,0 +1,605 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2025 Broadcom. + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/ethtool.h> +#include <linux/netdevice.h> + +#include "bnge.h" +#include "bnge_hwrm.h" +#include "bnge_hwrm_lib.h" +#include "bnge_resc.h" + +static u16 bnge_num_tx_to_cp(struct bnge_dev *bd, u16 tx) +{ + u16 tcs = bd->num_tc; + + if (!tcs) + tcs = 1; + + return tx / tcs; +} + +static u16 bnge_get_max_func_irqs(struct bnge_dev *bd) +{ + struct bnge_hw_resc *hw_resc = &bd->hw_resc; + + return min_t(u16, hw_resc->max_irqs, hw_resc->max_nqs); +} + +static unsigned int bnge_get_max_func_stat_ctxs(struct bnge_dev *bd) +{ + return bd->hw_resc.max_stat_ctxs; +} + +static unsigned int bnge_get_max_func_cp_rings(struct bnge_dev *bd) +{ + return bd->hw_resc.max_cp_rings; +} + +static int bnge_aux_get_dflt_msix(struct bnge_dev *bd) +{ + int roce_msix = BNGE_MAX_ROCE_MSIX; + + return min_t(int, roce_msix, num_online_cpus() + 1); +} + +static u16 bnge_aux_get_msix(struct bnge_dev *bd) +{ + if (bnge_is_roce_en(bd)) + return bd->aux_num_msix; + + return 0; +} + +static void bnge_aux_set_msix_num(struct bnge_dev *bd, u16 num) +{ + if (bnge_is_roce_en(bd)) + bd->aux_num_msix = num; +} + +static u16 bnge_aux_get_stat_ctxs(struct bnge_dev *bd) +{ + if (bnge_is_roce_en(bd)) + return bd->aux_num_stat_ctxs; + + return 0; +} + +static void bnge_aux_set_stat_ctxs(struct bnge_dev *bd, u16 num_aux_ctx) +{ + if (bnge_is_roce_en(bd)) + bd->aux_num_stat_ctxs = num_aux_ctx; +} + +static u16 bnge_func_stat_ctxs_demand(struct bnge_dev *bd) +{ + return bd->nq_nr_rings + bnge_aux_get_stat_ctxs(bd); +} + +static int bnge_get_dflt_aux_stat_ctxs(struct bnge_dev *bd) +{ + int stat_ctx = 0; + + if (bnge_is_roce_en(bd)) { + stat_ctx = BNGE_MIN_ROCE_STAT_CTXS; + + if (!bd->pf.port_id && bd->port_count > 1) + stat_ctx++; + } + + return stat_ctx; +} + +static u16 bnge_nqs_demand(struct bnge_dev *bd) +{ + return bd->nq_nr_rings + bnge_aux_get_msix(bd); +} + +static u16 bnge_cprs_demand(struct bnge_dev *bd) +{ + return bd->tx_nr_rings + bd->rx_nr_rings; +} + +static u16 bnge_get_avail_msix(struct bnge_dev *bd, int num) +{ + u16 max_irq = bnge_get_max_func_irqs(bd); + u16 total_demand = bd->nq_nr_rings + num; + + if (max_irq < total_demand) { + num = max_irq - bd->nq_nr_rings; + if (num <= 0) + return 0; + } + + return num; +} + +static u16 bnge_num_cp_to_tx(struct bnge_dev *bd, u16 tx_chunks) +{ + return tx_chunks * bd->num_tc; +} + +int bnge_fix_rings_count(u16 *rx, u16 *tx, u16 max, bool shared) +{ + u16 _rx = *rx, _tx = *tx; + + if (shared) { + *rx = min_t(u16, _rx, max); + *tx = min_t(u16, _tx, max); + } else { + if (max < 2) + return -ENOMEM; + while (_rx + _tx > max) { + if (_rx > _tx && _rx > 1) + _rx--; + else if (_tx > 1) + _tx--; + } + *rx = _rx; + *tx = _tx; + } + + return 0; +} + +static int bnge_adjust_rings(struct bnge_dev *bd, u16 *rx, + u16 *tx, u16 max_nq, bool sh) +{ + u16 tx_chunks = bnge_num_tx_to_cp(bd, *tx); + + if (tx_chunks != *tx) { + u16 tx_saved = tx_chunks, rc; + + rc = bnge_fix_rings_count(rx, &tx_chunks, max_nq, sh); + if (rc) + return rc; + if (tx_chunks != tx_saved) + *tx = bnge_num_cp_to_tx(bd, tx_chunks); + return 0; + } + + return bnge_fix_rings_count(rx, tx, max_nq, sh); +} + +static int bnge_cal_nr_rss_ctxs(u16 rx_rings) +{ + if (!rx_rings) + return 0; + + return bnge_adjust_pow_two(rx_rings - 1, + BNGE_RSS_TABLE_ENTRIES); +} + +static u16 bnge_rss_ctxs_in_use(struct bnge_dev *bd, + struct bnge_hw_rings *hwr) +{ + return bnge_cal_nr_rss_ctxs(hwr->grp); +} + +static u16 bnge_get_total_vnics(struct bnge_dev *bd, u16 rx_rings) +{ + return 1; +} + +static u32 bnge_get_rxfh_indir_size(struct bnge_dev *bd) +{ + return bnge_cal_nr_rss_ctxs(bd->rx_nr_rings) * + BNGE_RSS_TABLE_ENTRIES; +} + +static void bnge_set_dflt_rss_indir_tbl(struct bnge_dev *bd) +{ + u16 max_entries, pad; + u32 *rss_indir_tbl; + int i; + + max_entries = bnge_get_rxfh_indir_size(bd); + rss_indir_tbl = &bd->rss_indir_tbl[0]; + + for (i = 0; i < max_entries; i++) + rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, + bd->rx_nr_rings); + + pad = bd->rss_indir_tbl_entries - max_entries; + if (pad) + memset(&rss_indir_tbl[i], 0, pad * sizeof(*rss_indir_tbl)); +} + +static void bnge_copy_reserved_rings(struct bnge_dev *bd, + struct bnge_hw_rings *hwr) +{ + struct bnge_hw_resc *hw_resc = &bd->hw_resc; + + hwr->tx = hw_resc->resv_tx_rings; + hwr->rx = hw_resc->resv_rx_rings; + hwr->nq = hw_resc->resv_irqs; + hwr->cmpl = hw_resc->resv_cp_rings; + hwr->grp = hw_resc->resv_hw_ring_grps; + hwr->vnic = hw_resc->resv_vnics; + hwr->stat = hw_resc->resv_stat_ctxs; + hwr->rss_ctx = hw_resc->resv_rsscos_ctxs; +} + +static bool bnge_rings_ok(struct bnge_hw_rings *hwr) +{ + return hwr->tx && hwr->rx && hwr->nq && hwr->grp && hwr->vnic && + hwr->stat && hwr->cmpl; +} + +static bool bnge_need_reserve_rings(struct bnge_dev *bd) +{ + struct bnge_hw_resc *hw_resc = &bd->hw_resc; + u16 cprs = bnge_cprs_demand(bd); + u16 rx = bd->rx_nr_rings, stat; + u16 nqs = bnge_nqs_demand(bd); + u16 vnic; + + if (hw_resc->resv_tx_rings != bd->tx_nr_rings) + return true; + + vnic = bnge_get_total_vnics(bd, rx); + + if (bnge_is_agg_reqd(bd)) + rx <<= 1; + stat = bnge_func_stat_ctxs_demand(bd); + if (hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cprs || + hw_resc->resv_vnics != vnic || hw_resc->resv_stat_ctxs != stat) + return true; + if (hw_resc->resv_irqs != nqs) + return true; + + return false; +} + +int bnge_reserve_rings(struct bnge_dev *bd) +{ + u16 aux_dflt_msix = bnge_aux_get_dflt_msix(bd); + struct bnge_hw_rings hwr = {0}; + u16 rx_rings, old_rx_rings; + u16 nq = bd->nq_nr_rings; + u16 aux_msix = 0; + bool sh = false; + u16 tx_cp; + int rc; + + if (!bnge_need_reserve_rings(bd)) + return 0; + + if (!bnge_aux_registered(bd)) { + aux_msix = bnge_get_avail_msix(bd, aux_dflt_msix); + if (!aux_msix) + bnge_aux_set_stat_ctxs(bd, 0); + + if (aux_msix > aux_dflt_msix) + aux_msix = aux_dflt_msix; + hwr.nq = nq + aux_msix; + } else { + hwr.nq = bnge_nqs_demand(bd); + } + + hwr.tx = bd->tx_nr_rings; + hwr.rx = bd->rx_nr_rings; + if (bd->flags & BNGE_EN_SHARED_CHNL) + sh = true; + hwr.cmpl = hwr.rx + hwr.tx; + + hwr.vnic = bnge_get_total_vnics(bd, hwr.rx); + + if (bnge_is_agg_reqd(bd)) + hwr.rx <<= 1; + hwr.grp = bd->rx_nr_rings; + hwr.rss_ctx = bnge_rss_ctxs_in_use(bd, &hwr); + hwr.stat = bnge_func_stat_ctxs_demand(bd); + old_rx_rings = bd->hw_resc.resv_rx_rings; + + rc = bnge_hwrm_reserve_rings(bd, &hwr); + if (rc) + return rc; + + bnge_copy_reserved_rings(bd, &hwr); + + rx_rings = hwr.rx; + if (bnge_is_agg_reqd(bd)) { + if (hwr.rx >= 2) + rx_rings = hwr.rx >> 1; + else + return -ENOMEM; + } + + rx_rings = min_t(u16, rx_rings, hwr.grp); + hwr.nq = min_t(u16, hwr.nq, bd->nq_nr_rings); + if (hwr.stat > bnge_aux_get_stat_ctxs(bd)) + hwr.stat -= bnge_aux_get_stat_ctxs(bd); + hwr.nq = min_t(u16, hwr.nq, hwr.stat); + + /* Adjust the rings */ + rc = bnge_adjust_rings(bd, &rx_rings, &hwr.tx, hwr.nq, sh); + if (bnge_is_agg_reqd(bd)) + hwr.rx = rx_rings << 1; + tx_cp = hwr.tx; + hwr.nq = sh ? max_t(u16, tx_cp, rx_rings) : tx_cp + rx_rings; + bd->tx_nr_rings = hwr.tx; + + if (rx_rings != bd->rx_nr_rings) + dev_warn(bd->dev, "RX rings resv reduced to %d than earlier %d requested\n", + rx_rings, bd->rx_nr_rings); + + bd->rx_nr_rings = rx_rings; + bd->nq_nr_rings = hwr.nq; + + if (!bnge_rings_ok(&hwr)) + return -ENOMEM; + + if (old_rx_rings != bd->hw_resc.resv_rx_rings) + bnge_set_dflt_rss_indir_tbl(bd); + + if (!bnge_aux_registered(bd)) { + u16 resv_msix, resv_ctx, aux_ctxs; + struct bnge_hw_resc *hw_resc; + + hw_resc = &bd->hw_resc; + resv_msix = hw_resc->resv_irqs - bd->nq_nr_rings; + aux_msix = min_t(u16, resv_msix, aux_msix); + bnge_aux_set_msix_num(bd, aux_msix); + resv_ctx = hw_resc->resv_stat_ctxs - bd->nq_nr_rings; + aux_ctxs = min(resv_ctx, bnge_aux_get_stat_ctxs(bd)); + bnge_aux_set_stat_ctxs(bd, aux_ctxs); + } + + return rc; +} + +int bnge_alloc_irqs(struct bnge_dev *bd) +{ + u16 aux_msix, tx_cp, num_entries; + int i, irqs_demand, rc; + u16 max, min = 1; + + irqs_demand = bnge_nqs_demand(bd); + max = bnge_get_max_func_irqs(bd); + if (irqs_demand > max) + irqs_demand = max; + + if (!(bd->flags & BNGE_EN_SHARED_CHNL)) + min = 2; + + irqs_demand = pci_alloc_irq_vectors(bd->pdev, min, irqs_demand, + PCI_IRQ_MSIX); + aux_msix = bnge_aux_get_msix(bd); + if (irqs_demand < 0 || irqs_demand < aux_msix) { + rc = -ENODEV; + goto err_free_irqs; + } + + num_entries = irqs_demand; + if (pci_msix_can_alloc_dyn(bd->pdev)) + num_entries = max; + bd->irq_tbl = kcalloc(num_entries, sizeof(*bd->irq_tbl), GFP_KERNEL); + if (!bd->irq_tbl) { + rc = -ENOMEM; + goto err_free_irqs; + } + + for (i = 0; i < irqs_demand; i++) + bd->irq_tbl[i].vector = pci_irq_vector(bd->pdev, i); + + bd->irqs_acquired = irqs_demand; + /* Reduce rings based upon num of vectors allocated. + * We dont need to consider NQs as they have been calculated + * and must be more than irqs_demand. + */ + rc = bnge_adjust_rings(bd, &bd->rx_nr_rings, + &bd->tx_nr_rings, + irqs_demand - aux_msix, min == 1); + if (rc) + goto err_free_irqs; + + tx_cp = bnge_num_tx_to_cp(bd, bd->tx_nr_rings); + bd->nq_nr_rings = (min == 1) ? + max_t(u16, tx_cp, bd->rx_nr_rings) : + tx_cp + bd->rx_nr_rings; + + /* Readjust tx_nr_rings_per_tc */ + if (!bd->num_tc) + bd->tx_nr_rings_per_tc = bd->tx_nr_rings; + + return 0; + +err_free_irqs: + dev_err(bd->dev, "Failed to allocate IRQs err = %d\n", rc); + bnge_free_irqs(bd); + return rc; +} + +void bnge_free_irqs(struct bnge_dev *bd) +{ + pci_free_irq_vectors(bd->pdev); + kfree(bd->irq_tbl); + bd->irq_tbl = NULL; +} + +static void _bnge_get_max_rings(struct bnge_dev *bd, u16 *max_rx, + u16 *max_tx, u16 *max_nq) +{ + struct bnge_hw_resc *hw_resc = &bd->hw_resc; + u16 max_ring_grps = 0, max_cp; + int rc; + + *max_tx = hw_resc->max_tx_rings; + *max_rx = hw_resc->max_rx_rings; + *max_nq = min_t(int, bnge_get_max_func_irqs(bd), + hw_resc->max_stat_ctxs); + max_ring_grps = hw_resc->max_hw_ring_grps; + if (bnge_is_agg_reqd(bd)) + *max_rx >>= 1; + + max_cp = bnge_get_max_func_cp_rings(bd); + + /* Fix RX and TX rings according to number of CPs available */ + rc = bnge_fix_rings_count(max_rx, max_tx, max_cp, false); + if (rc) { + *max_rx = 0; + *max_tx = 0; + } + + *max_rx = min_t(int, *max_rx, max_ring_grps); +} + +static int bnge_get_max_rings(struct bnge_dev *bd, u16 *max_rx, + u16 *max_tx, bool shared) +{ + u16 rx, tx, nq; + + _bnge_get_max_rings(bd, &rx, &tx, &nq); + *max_rx = rx; + *max_tx = tx; + if (!rx || !tx || !nq) + return -ENOMEM; + + return bnge_fix_rings_count(max_rx, max_tx, nq, shared); +} + +static int bnge_get_dflt_rings(struct bnge_dev *bd, u16 *max_rx, u16 *max_tx, + bool shared) +{ + int rc; + + rc = bnge_get_max_rings(bd, max_rx, max_tx, shared); + if (rc) { + dev_info(bd->dev, "Not enough rings available\n"); + return rc; + } + + if (bnge_is_roce_en(bd)) { + int max_cp, max_stat, max_irq; + + /* Reserve minimum resources for RoCE */ + max_cp = bnge_get_max_func_cp_rings(bd); + max_stat = bnge_get_max_func_stat_ctxs(bd); + max_irq = bnge_get_max_func_irqs(bd); + if (max_cp <= BNGE_MIN_ROCE_CP_RINGS || + max_irq <= BNGE_MIN_ROCE_CP_RINGS || + max_stat <= BNGE_MIN_ROCE_STAT_CTXS) + return 0; + + max_cp -= BNGE_MIN_ROCE_CP_RINGS; + max_irq -= BNGE_MIN_ROCE_CP_RINGS; + max_stat -= BNGE_MIN_ROCE_STAT_CTXS; + max_cp = min_t(u16, max_cp, max_irq); + max_cp = min_t(u16, max_cp, max_stat); + rc = bnge_adjust_rings(bd, max_rx, max_tx, max_cp, shared); + if (rc) + rc = 0; + } + + return rc; +} + +/* In initial default shared ring setting, each shared ring must have a + * RX/TX ring pair. + */ +static void bnge_trim_dflt_sh_rings(struct bnge_dev *bd) +{ + bd->nq_nr_rings = min_t(u16, bd->tx_nr_rings_per_tc, bd->rx_nr_rings); + bd->rx_nr_rings = bd->nq_nr_rings; + bd->tx_nr_rings_per_tc = bd->nq_nr_rings; + bd->tx_nr_rings = bd->tx_nr_rings_per_tc; +} + +static int bnge_net_init_dflt_rings(struct bnge_dev *bd, bool sh) +{ + u16 dflt_rings, max_rx_rings, max_tx_rings; + int rc; + + if (sh) + bd->flags |= BNGE_EN_SHARED_CHNL; + + dflt_rings = netif_get_num_default_rss_queues(); + + rc = bnge_get_dflt_rings(bd, &max_rx_rings, &max_tx_rings, sh); + if (rc) + return rc; + bd->rx_nr_rings = min_t(u16, dflt_rings, max_rx_rings); + bd->tx_nr_rings_per_tc = min_t(u16, dflt_rings, max_tx_rings); + if (sh) + bnge_trim_dflt_sh_rings(bd); + else + bd->nq_nr_rings = bd->tx_nr_rings_per_tc + bd->rx_nr_rings; + bd->tx_nr_rings = bd->tx_nr_rings_per_tc; + + rc = bnge_reserve_rings(bd); + if (rc && rc != -ENODEV) + dev_warn(bd->dev, "Unable to reserve tx rings\n"); + bd->tx_nr_rings_per_tc = bd->tx_nr_rings; + if (sh) + bnge_trim_dflt_sh_rings(bd); + + /* Rings may have been reduced, re-reserve them again */ + if (bnge_need_reserve_rings(bd)) { + rc = bnge_reserve_rings(bd); + if (rc && rc != -ENODEV) + dev_warn(bd->dev, "Fewer rings reservation failed\n"); + bd->tx_nr_rings_per_tc = bd->tx_nr_rings; + } + if (rc) { + bd->tx_nr_rings = 0; + bd->rx_nr_rings = 0; + } + + return rc; +} + +static int bnge_alloc_rss_indir_tbl(struct bnge_dev *bd) +{ + u16 entries; + + entries = BNGE_MAX_RSS_TABLE_ENTRIES; + + bd->rss_indir_tbl_entries = entries; + bd->rss_indir_tbl = + kmalloc_array(entries, sizeof(*bd->rss_indir_tbl), GFP_KERNEL); + if (!bd->rss_indir_tbl) + return -ENOMEM; + + return 0; +} + +int bnge_net_init_dflt_config(struct bnge_dev *bd) +{ + struct bnge_hw_resc *hw_resc; + int rc; + + rc = bnge_alloc_rss_indir_tbl(bd); + if (rc) + return rc; + + rc = bnge_net_init_dflt_rings(bd, true); + if (rc) + goto err_free_tbl; + + hw_resc = &bd->hw_resc; + bd->max_fltr = hw_resc->max_rx_em_flows + hw_resc->max_rx_wm_flows + + BNGE_L2_FLTR_MAX_FLTR; + + return 0; + +err_free_tbl: + kfree(bd->rss_indir_tbl); + bd->rss_indir_tbl = NULL; + return rc; +} + +void bnge_net_uninit_dflt_config(struct bnge_dev *bd) +{ + kfree(bd->rss_indir_tbl); + bd->rss_indir_tbl = NULL; +} + +void bnge_aux_init_dflt_config(struct bnge_dev *bd) +{ + bd->aux_num_msix = bnge_aux_get_dflt_msix(bd); + bd->aux_num_stat_ctxs = bnge_get_dflt_aux_stat_ctxs(bd); +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_resc.h b/drivers/net/ethernet/broadcom/bnge/bnge_resc.h new file mode 100644 index 000000000000..54ef1c7d8822 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_resc.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_RESC_H_ +#define _BNGE_RESC_H_ + +#include "bnge_netdev.h" +#include "bnge_rmem.h" + +struct bnge_hw_resc { + u16 min_rsscos_ctxs; + u16 max_rsscos_ctxs; + u16 resv_rsscos_ctxs; + u16 min_cp_rings; + u16 max_cp_rings; + u16 resv_cp_rings; + u16 min_tx_rings; + u16 max_tx_rings; + u16 resv_tx_rings; + u16 max_tx_sch_inputs; + u16 min_rx_rings; + u16 max_rx_rings; + u16 resv_rx_rings; + u16 min_hw_ring_grps; + u16 max_hw_ring_grps; + u16 resv_hw_ring_grps; + u16 min_l2_ctxs; + u16 max_l2_ctxs; + u16 min_vnics; + u16 max_vnics; + u16 resv_vnics; + u16 min_stat_ctxs; + u16 max_stat_ctxs; + u16 resv_stat_ctxs; + u16 max_nqs; + u16 max_irqs; + u16 resv_irqs; + u32 max_encap_records; + u32 max_decap_records; + u32 max_tx_em_flows; + u32 max_tx_wm_flows; + u32 max_rx_em_flows; + u32 max_rx_wm_flows; +}; + +struct bnge_hw_rings { + u16 tx; + u16 rx; + u16 grp; + u16 nq; + u16 cmpl; + u16 stat; + u16 vnic; + u16 rss_ctx; +}; + +/* "TXRX", 2 hypens, plus maximum integer */ +#define BNGE_IRQ_NAME_EXTRA 17 +struct bnge_irq { + irq_handler_t handler; + unsigned int vector; + u8 requested:1; + u8 have_cpumask:1; + char name[IFNAMSIZ + BNGE_IRQ_NAME_EXTRA]; + cpumask_var_t cpu_mask; +}; + +int bnge_reserve_rings(struct bnge_dev *bd); +int bnge_fix_rings_count(u16 *rx, u16 *tx, u16 max, bool shared); +int bnge_alloc_irqs(struct bnge_dev *bd); +void bnge_free_irqs(struct bnge_dev *bd); +int bnge_net_init_dflt_config(struct bnge_dev *bd); +void bnge_net_uninit_dflt_config(struct bnge_dev *bd); +void bnge_aux_init_dflt_config(struct bnge_dev *bd); + +static inline u32 +bnge_adjust_pow_two(u32 total_ent, u16 ent_per_blk) +{ + u32 blks = total_ent / ent_per_blk; + + if (blks == 0 || blks == 1) + return ++blks; + + if (!is_power_of_2(blks)) + blks = roundup_pow_of_two(blks); + + return blks; +} + +#define BNGE_MAX_ROCE_MSIX 64 +#define BNGE_MIN_ROCE_CP_RINGS 2 +#define BNGE_MIN_ROCE_STAT_CTXS 1 + +#endif /* _BNGE_RESC_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c new file mode 100644 index 000000000000..0e935cc46da6 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2025 Broadcom. + +#include <linux/etherdevice.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/crash_dump.h> + +#include "bnge.h" +#include "../bnxt/bnxt_hsi.h" +#include "bnge_hwrm_lib.h" +#include "bnge_rmem.h" + +static void bnge_init_ctx_mem(struct bnge_ctx_mem_type *ctxm, + void *p, int len) +{ + u8 init_val = ctxm->init_value; + u16 offset = ctxm->init_offset; + u8 *p2 = p; + int i; + + if (!init_val) + return; + if (offset == BNGE_CTX_INIT_INVALID_OFFSET) { + memset(p, init_val, len); + return; + } + for (i = 0; i < len; i += ctxm->entry_size) + *(p2 + i + offset) = init_val; +} + +void bnge_free_ring(struct bnge_dev *bd, struct bnge_ring_mem_info *rmem) +{ + struct pci_dev *pdev = bd->pdev; + int i; + + if (!rmem->pg_arr) + goto skip_pages; + + for (i = 0; i < rmem->nr_pages; i++) { + if (!rmem->pg_arr[i]) + continue; + + dma_free_coherent(&pdev->dev, rmem->page_size, + rmem->pg_arr[i], rmem->dma_arr[i]); + + rmem->pg_arr[i] = NULL; + } +skip_pages: + if (rmem->pg_tbl) { + size_t pg_tbl_size = rmem->nr_pages * 8; + + if (rmem->flags & BNGE_RMEM_USE_FULL_PAGE_FLAG) + pg_tbl_size = rmem->page_size; + dma_free_coherent(&pdev->dev, pg_tbl_size, + rmem->pg_tbl, rmem->dma_pg_tbl); + rmem->pg_tbl = NULL; + } + if (rmem->vmem_size && *rmem->vmem) { + vfree(*rmem->vmem); + *rmem->vmem = NULL; + } +} + +int bnge_alloc_ring(struct bnge_dev *bd, struct bnge_ring_mem_info *rmem) +{ + struct pci_dev *pdev = bd->pdev; + u64 valid_bit = 0; + int i; + + if (rmem->flags & (BNGE_RMEM_VALID_PTE_FLAG | BNGE_RMEM_RING_PTE_FLAG)) + valid_bit = PTU_PTE_VALID; + + if ((rmem->nr_pages > 1 || rmem->depth > 0) && !rmem->pg_tbl) { + size_t pg_tbl_size = rmem->nr_pages * 8; + + if (rmem->flags & BNGE_RMEM_USE_FULL_PAGE_FLAG) + pg_tbl_size = rmem->page_size; + rmem->pg_tbl = dma_alloc_coherent(&pdev->dev, pg_tbl_size, + &rmem->dma_pg_tbl, + GFP_KERNEL); + if (!rmem->pg_tbl) + return -ENOMEM; + } + + for (i = 0; i < rmem->nr_pages; i++) { + u64 extra_bits = valid_bit; + + rmem->pg_arr[i] = dma_alloc_coherent(&pdev->dev, + rmem->page_size, + &rmem->dma_arr[i], + GFP_KERNEL); + if (!rmem->pg_arr[i]) + return -ENOMEM; + + if (rmem->ctx_mem) + bnge_init_ctx_mem(rmem->ctx_mem, rmem->pg_arr[i], + rmem->page_size); + + if (rmem->nr_pages > 1 || rmem->depth > 0) { + if (i == rmem->nr_pages - 2 && + (rmem->flags & BNGE_RMEM_RING_PTE_FLAG)) + extra_bits |= PTU_PTE_NEXT_TO_LAST; + else if (i == rmem->nr_pages - 1 && + (rmem->flags & BNGE_RMEM_RING_PTE_FLAG)) + extra_bits |= PTU_PTE_LAST; + rmem->pg_tbl[i] = + cpu_to_le64(rmem->dma_arr[i] | extra_bits); + } + } + + if (rmem->vmem_size) { + *rmem->vmem = vzalloc(rmem->vmem_size); + if (!(*rmem->vmem)) + return -ENOMEM; + } + + return 0; +} + +static int bnge_alloc_ctx_one_lvl(struct bnge_dev *bd, + struct bnge_ctx_pg_info *ctx_pg) +{ + struct bnge_ring_mem_info *rmem = &ctx_pg->ring_mem; + + rmem->page_size = BNGE_PAGE_SIZE; + rmem->pg_arr = ctx_pg->ctx_pg_arr; + rmem->dma_arr = ctx_pg->ctx_dma_arr; + rmem->flags = BNGE_RMEM_VALID_PTE_FLAG; + if (rmem->depth >= 1) + rmem->flags |= BNGE_RMEM_USE_FULL_PAGE_FLAG; + return bnge_alloc_ring(bd, rmem); +} + +static int bnge_alloc_ctx_pg_tbls(struct bnge_dev *bd, + struct bnge_ctx_pg_info *ctx_pg, u32 mem_size, + u8 depth, struct bnge_ctx_mem_type *ctxm) +{ + struct bnge_ring_mem_info *rmem = &ctx_pg->ring_mem; + int rc; + + if (!mem_size) + return -EINVAL; + + ctx_pg->nr_pages = DIV_ROUND_UP(mem_size, BNGE_PAGE_SIZE); + if (ctx_pg->nr_pages > MAX_CTX_TOTAL_PAGES) { + ctx_pg->nr_pages = 0; + return -EINVAL; + } + if (ctx_pg->nr_pages > MAX_CTX_PAGES || depth > 1) { + int nr_tbls, i; + + rmem->depth = 2; + ctx_pg->ctx_pg_tbl = kcalloc(MAX_CTX_PAGES, sizeof(ctx_pg), + GFP_KERNEL); + if (!ctx_pg->ctx_pg_tbl) + return -ENOMEM; + nr_tbls = DIV_ROUND_UP(ctx_pg->nr_pages, MAX_CTX_PAGES); + rmem->nr_pages = nr_tbls; + rc = bnge_alloc_ctx_one_lvl(bd, ctx_pg); + if (rc) + return rc; + for (i = 0; i < nr_tbls; i++) { + struct bnge_ctx_pg_info *pg_tbl; + + pg_tbl = kzalloc(sizeof(*pg_tbl), GFP_KERNEL); + if (!pg_tbl) + return -ENOMEM; + ctx_pg->ctx_pg_tbl[i] = pg_tbl; + rmem = &pg_tbl->ring_mem; + rmem->pg_tbl = ctx_pg->ctx_pg_arr[i]; + rmem->dma_pg_tbl = ctx_pg->ctx_dma_arr[i]; + rmem->depth = 1; + rmem->nr_pages = MAX_CTX_PAGES; + rmem->ctx_mem = ctxm; + if (i == (nr_tbls - 1)) { + int rem = ctx_pg->nr_pages % MAX_CTX_PAGES; + + if (rem) + rmem->nr_pages = rem; + } + rc = bnge_alloc_ctx_one_lvl(bd, pg_tbl); + if (rc) + break; + } + } else { + rmem->nr_pages = DIV_ROUND_UP(mem_size, BNGE_PAGE_SIZE); + if (rmem->nr_pages > 1 || depth) + rmem->depth = 1; + rmem->ctx_mem = ctxm; + rc = bnge_alloc_ctx_one_lvl(bd, ctx_pg); + } + + return rc; +} + +static void bnge_free_ctx_pg_tbls(struct bnge_dev *bd, + struct bnge_ctx_pg_info *ctx_pg) +{ + struct bnge_ring_mem_info *rmem = &ctx_pg->ring_mem; + + if (rmem->depth > 1 || ctx_pg->nr_pages > MAX_CTX_PAGES || + ctx_pg->ctx_pg_tbl) { + int i, nr_tbls = rmem->nr_pages; + + for (i = 0; i < nr_tbls; i++) { + struct bnge_ctx_pg_info *pg_tbl; + struct bnge_ring_mem_info *rmem2; + + pg_tbl = ctx_pg->ctx_pg_tbl[i]; + if (!pg_tbl) + continue; + rmem2 = &pg_tbl->ring_mem; + bnge_free_ring(bd, rmem2); + ctx_pg->ctx_pg_arr[i] = NULL; + kfree(pg_tbl); + ctx_pg->ctx_pg_tbl[i] = NULL; + } + kfree(ctx_pg->ctx_pg_tbl); + ctx_pg->ctx_pg_tbl = NULL; + } + bnge_free_ring(bd, rmem); + ctx_pg->nr_pages = 0; +} + +static int bnge_setup_ctxm_pg_tbls(struct bnge_dev *bd, + struct bnge_ctx_mem_type *ctxm, u32 entries, + u8 pg_lvl) +{ + struct bnge_ctx_pg_info *ctx_pg = ctxm->pg_info; + int i, rc = 0, n = 1; + u32 mem_size; + + if (!ctxm->entry_size || !ctx_pg) + return -EINVAL; + if (ctxm->instance_bmap) + n = hweight32(ctxm->instance_bmap); + if (ctxm->entry_multiple) + entries = roundup(entries, ctxm->entry_multiple); + entries = clamp_t(u32, entries, ctxm->min_entries, ctxm->max_entries); + mem_size = entries * ctxm->entry_size; + for (i = 0; i < n && !rc; i++) { + ctx_pg[i].entries = entries; + rc = bnge_alloc_ctx_pg_tbls(bd, &ctx_pg[i], mem_size, pg_lvl, + ctxm->init_value ? ctxm : NULL); + } + + return rc; +} + +static int bnge_backing_store_cfg(struct bnge_dev *bd, u32 ena) +{ + struct bnge_ctx_mem_info *ctx = bd->ctx; + struct bnge_ctx_mem_type *ctxm; + u16 last_type; + int rc = 0; + u16 type; + + if (!ena) + return 0; + else if (ena & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) + last_type = BNGE_CTX_MAX - 1; + else + last_type = BNGE_CTX_L2_MAX - 1; + ctx->ctx_arr[last_type].last = 1; + + for (type = 0 ; type < BNGE_CTX_V2_MAX; type++) { + ctxm = &ctx->ctx_arr[type]; + + rc = bnge_hwrm_func_backing_store(bd, ctxm, ctxm->last); + if (rc) + return rc; + } + + return 0; +} + +void bnge_free_ctx_mem(struct bnge_dev *bd) +{ + struct bnge_ctx_mem_info *ctx = bd->ctx; + u16 type; + + if (!ctx) + return; + + for (type = 0; type < BNGE_CTX_V2_MAX; type++) { + struct bnge_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; + struct bnge_ctx_pg_info *ctx_pg = ctxm->pg_info; + int i, n = 1; + + if (!ctx_pg) + continue; + if (ctxm->instance_bmap) + n = hweight32(ctxm->instance_bmap); + for (i = 0; i < n; i++) + bnge_free_ctx_pg_tbls(bd, &ctx_pg[i]); + + kfree(ctx_pg); + ctxm->pg_info = NULL; + } + + ctx->flags &= ~BNGE_CTX_FLAG_INITED; + kfree(ctx); + bd->ctx = NULL; +} + +#define FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES \ + (FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP | \ + FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ | \ + FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ | \ + FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC | \ + FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT) + +int bnge_alloc_ctx_mem(struct bnge_dev *bd) +{ + struct bnge_ctx_mem_type *ctxm; + struct bnge_ctx_mem_info *ctx; + u32 l2_qps, qp1_qps, max_qps; + u32 ena, entries_sp, entries; + u32 srqs, max_srqs, min; + u32 num_mr, num_ah; + u32 extra_srqs = 0; + u32 extra_qps = 0; + u32 fast_qpmd_qps; + u8 pg_lvl = 1; + int i, rc; + + rc = bnge_hwrm_func_backing_store_qcaps(bd); + if (rc) { + dev_err(bd->dev, "Failed querying ctx mem caps, rc: %d\n", rc); + return rc; + } + + ctx = bd->ctx; + if (!ctx || (ctx->flags & BNGE_CTX_FLAG_INITED)) + return 0; + + ctxm = &ctx->ctx_arr[BNGE_CTX_QP]; + l2_qps = ctxm->qp_l2_entries; + qp1_qps = ctxm->qp_qp1_entries; + fast_qpmd_qps = ctxm->qp_fast_qpmd_entries; + max_qps = ctxm->max_entries; + ctxm = &ctx->ctx_arr[BNGE_CTX_SRQ]; + srqs = ctxm->srq_l2_entries; + max_srqs = ctxm->max_entries; + ena = 0; + if (bnge_is_roce_en(bd) && !is_kdump_kernel()) { + pg_lvl = 2; + extra_qps = min_t(u32, 65536, max_qps - l2_qps - qp1_qps); + /* allocate extra qps if fast qp destroy feature enabled */ + extra_qps += fast_qpmd_qps; + extra_srqs = min_t(u32, 8192, max_srqs - srqs); + if (fast_qpmd_qps) + ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP_FAST_QPMD; + } + + ctxm = &ctx->ctx_arr[BNGE_CTX_QP]; + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, l2_qps + qp1_qps + extra_qps, + pg_lvl); + if (rc) + return rc; + + ctxm = &ctx->ctx_arr[BNGE_CTX_SRQ]; + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, srqs + extra_srqs, pg_lvl); + if (rc) + return rc; + + ctxm = &ctx->ctx_arr[BNGE_CTX_CQ]; + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, ctxm->cq_l2_entries + + extra_qps * 2, pg_lvl); + if (rc) + return rc; + + ctxm = &ctx->ctx_arr[BNGE_CTX_VNIC]; + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, ctxm->max_entries, 1); + if (rc) + return rc; + + ctxm = &ctx->ctx_arr[BNGE_CTX_STAT]; + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, ctxm->max_entries, 1); + if (rc) + return rc; + + if (!bnge_is_roce_en(bd)) + goto skip_rdma; + + ctxm = &ctx->ctx_arr[BNGE_CTX_MRAV]; + /* 128K extra is needed to accommodate static AH context + * allocation by f/w. + */ + num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256); + num_ah = min_t(u32, num_mr, 1024 * 128); + ctxm->split_entry_cnt = BNGE_CTX_MRAV_AV_SPLIT_ENTRY + 1; + if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah) + ctxm->mrav_av_entries = num_ah; + + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, num_mr + num_ah, 2); + if (rc) + return rc; + ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV; + + ctxm = &ctx->ctx_arr[BNGE_CTX_TIM]; + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, l2_qps + qp1_qps + extra_qps, 1); + if (rc) + return rc; + ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM; + +skip_rdma: + ctxm = &ctx->ctx_arr[BNGE_CTX_STQM]; + min = ctxm->min_entries; + entries_sp = ctx->ctx_arr[BNGE_CTX_VNIC].vnic_entries + l2_qps + + 2 * (extra_qps + qp1_qps) + min; + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, entries_sp, 2); + if (rc) + return rc; + + ctxm = &ctx->ctx_arr[BNGE_CTX_FTQM]; + entries = l2_qps + 2 * (extra_qps + qp1_qps); + rc = bnge_setup_ctxm_pg_tbls(bd, ctxm, entries, 2); + if (rc) + return rc; + for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) + ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP << i; + ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES; + + rc = bnge_backing_store_cfg(bd, ena); + if (rc) { + dev_err(bd->dev, "Failed configuring ctx mem, rc: %d\n", rc); + return rc; + } + ctx->flags |= BNGE_CTX_FLAG_INITED; + + return 0; +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.h b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.h new file mode 100644 index 000000000000..300f1d8268ef --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_RMEM_H_ +#define _BNGE_RMEM_H_ + +struct bnge_ctx_mem_type; +struct bnge_dev; + +#define PTU_PTE_VALID 0x1UL +#define PTU_PTE_LAST 0x2UL +#define PTU_PTE_NEXT_TO_LAST 0x4UL + +struct bnge_ring_mem_info { + /* Number of pages to next level */ + int nr_pages; + int page_size; + u16 flags; +#define BNGE_RMEM_VALID_PTE_FLAG 1 +#define BNGE_RMEM_RING_PTE_FLAG 2 +#define BNGE_RMEM_USE_FULL_PAGE_FLAG 4 + + u16 depth; + + void **pg_arr; + dma_addr_t *dma_arr; + + __le64 *pg_tbl; + dma_addr_t dma_pg_tbl; + + int vmem_size; + void **vmem; + + struct bnge_ctx_mem_type *ctx_mem; +}; + +/* The hardware supports certain page sizes. + * Use the supported page sizes to allocate the rings. + */ +#if (PAGE_SHIFT < 12) +#define BNGE_PAGE_SHIFT 12 +#elif (PAGE_SHIFT <= 13) +#define BNGE_PAGE_SHIFT PAGE_SHIFT +#elif (PAGE_SHIFT < 16) +#define BNGE_PAGE_SHIFT 13 +#else +#define BNGE_PAGE_SHIFT 16 +#endif +#define BNGE_PAGE_SIZE (1 << BNGE_PAGE_SHIFT) +/* The RXBD length is 16-bit so we can only support page sizes < 64K */ +#if (PAGE_SHIFT > 15) +#define BNGE_RX_PAGE_SHIFT 15 +#else +#define BNGE_RX_PAGE_SHIFT PAGE_SHIFT +#endif +#define MAX_CTX_PAGES (BNGE_PAGE_SIZE / 8) +#define MAX_CTX_TOTAL_PAGES (MAX_CTX_PAGES * MAX_CTX_PAGES) + +struct bnge_ctx_pg_info { + u32 entries; + u32 nr_pages; + void *ctx_pg_arr[MAX_CTX_PAGES]; + dma_addr_t ctx_dma_arr[MAX_CTX_PAGES]; + struct bnge_ring_mem_info ring_mem; + struct bnge_ctx_pg_info **ctx_pg_tbl; +}; + +#define BNGE_MAX_TQM_SP_RINGS 1 +#define BNGE_MAX_TQM_FP_RINGS 8 +#define BNGE_MAX_TQM_RINGS \ + (BNGE_MAX_TQM_SP_RINGS + BNGE_MAX_TQM_FP_RINGS) +#define BNGE_BACKING_STORE_CFG_LEGACY_LEN 256 +#define BNGE_SET_CTX_PAGE_ATTR(attr) \ +do { \ + if (BNGE_PAGE_SIZE == 0x2000) \ + attr = FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_8K; \ + else if (BNGE_PAGE_SIZE == 0x10000) \ + attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_64K; \ + else \ + attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_4K; \ +} while (0) + +#define BNGE_CTX_MRAV_AV_SPLIT_ENTRY 0 + +#define BNGE_CTX_QP \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP +#define BNGE_CTX_SRQ \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ +#define BNGE_CTX_CQ \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ +#define BNGE_CTX_VNIC \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC +#define BNGE_CTX_STAT \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT +#define BNGE_CTX_STQM \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING +#define BNGE_CTX_FTQM \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING +#define BNGE_CTX_MRAV \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV +#define BNGE_CTX_TIM \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM +#define BNGE_CTX_TCK \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TX_CK +#define BNGE_CTX_RCK \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RX_CK +#define BNGE_CTX_MTQM \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING +#define BNGE_CTX_SQDBS \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW +#define BNGE_CTX_RQDBS \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW +#define BNGE_CTX_SRQDBS \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW +#define BNGE_CTX_CQDBS \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW +#define BNGE_CTX_SRT_TRACE \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT_TRACE +#define BNGE_CTX_SRT2_TRACE \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT2_TRACE +#define BNGE_CTX_CRT_TRACE \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT_TRACE +#define BNGE_CTX_CRT2_TRACE \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT2_TRACE +#define BNGE_CTX_RIGP0_TRACE \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP0_TRACE +#define BNGE_CTX_L2_HWRM_TRACE \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_L2_HWRM_TRACE +#define BNGE_CTX_ROCE_HWRM_TRACE \ + FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ROCE_HWRM_TRACE + +#define BNGE_CTX_MAX (BNGE_CTX_TIM + 1) +#define BNGE_CTX_L2_MAX (BNGE_CTX_FTQM + 1) +#define BNGE_CTX_INV ((u16)-1) + +#define BNGE_CTX_V2_MAX \ + (FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ROCE_HWRM_TRACE + 1) + +#define BNGE_BS_CFG_ALL_DONE \ + FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_BS_CFG_ALL_DONE + +struct bnge_ctx_mem_type { + u16 type; + u16 entry_size; + u32 flags; +#define BNGE_CTX_MEM_TYPE_VALID \ + FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID + u32 instance_bmap; + u8 init_value; + u8 entry_multiple; + u16 init_offset; +#define BNGE_CTX_INIT_INVALID_OFFSET 0xffff + u32 max_entries; + u32 min_entries; + u8 last:1; + u8 split_entry_cnt; +#define BNGE_MAX_SPLIT_ENTRY 4 + union { + struct { + u32 qp_l2_entries; + u32 qp_qp1_entries; + u32 qp_fast_qpmd_entries; + }; + u32 srq_l2_entries; + u32 cq_l2_entries; + u32 vnic_entries; + struct { + u32 mrav_av_entries; + u32 mrav_num_entries_units; + }; + u32 split[BNGE_MAX_SPLIT_ENTRY]; + }; + struct bnge_ctx_pg_info *pg_info; +}; + +struct bnge_ctx_mem_info { + u8 tqm_fp_rings_count; + u32 flags; +#define BNGE_CTX_FLAG_INITED 0x01 + struct bnge_ctx_mem_type ctx_arr[BNGE_CTX_V2_MAX]; +}; + +int bnge_alloc_ring(struct bnge_dev *bd, struct bnge_ring_mem_info *rmem); +void bnge_free_ring(struct bnge_dev *bd, struct bnge_ring_mem_info *rmem); +int bnge_alloc_ctx_mem(struct bnge_dev *bd); +void bnge_free_ctx_mem(struct bnge_dev *bd); + +#endif /* _BNGE_RMEM_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f621a5bab1ea..6bbe875132b0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11616,11 +11616,9 @@ static void bnxt_free_irq(struct bnxt *bp) static int bnxt_request_irq(struct bnxt *bp) { + struct cpu_rmap *rmap = NULL; int i, j, rc = 0; unsigned long flags = 0; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif rc = bnxt_setup_int_mode(bp); if (rc) { @@ -11641,15 +11639,15 @@ static int bnxt_request_irq(struct bnxt *bp) int map_idx = bnxt_cp_num_to_irq_num(bp, i); struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; -#ifdef CONFIG_RFS_ACCEL - if (rmap && bp->bnapi[i]->rx_ring) { + if (IS_ENABLED(CONFIG_RFS_ACCEL) && + rmap && bp->bnapi[i]->rx_ring) { rc = irq_cpu_rmap_add(rmap, irq->vector); if (rc) netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n", j); j++; } -#endif + rc = request_irq(irq->vector, irq->handler, flags, irq->name, bp->bnapi[i]); if (rc) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 4f40f6afe88f..98971ae4f87d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4092,6 +4092,12 @@ static int bcmgenet_probe(struct platform_device *pdev) for (i = 0; i <= priv->hw_params->rx_queues; i++) priv->rx_rings[i].rx_max_coalesced_frames = 1; + /* Initialize u64 stats seq counter for 32bit machines */ + for (i = 0; i <= priv->hw_params->rx_queues; i++) + u64_stats_init(&priv->rx_rings[i].stats64.syncp); + for (i = 0; i <= priv->hw_params->tx_queues; i++) + u64_stats_init(&priv->tx_rings[i].stats64.syncp); + /* libphy will determine the link state */ netif_carrier_off(dev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index aebb9fef3f6e..1be2dc40a1a6 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1578,7 +1578,6 @@ napi_del: static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) { struct nicvf *nic = netdev_priv(netdev); - int orig_mtu = netdev->mtu; /* For now just support only the usual MTU sized frames, * plus some headroom for VLAN, QinQ. @@ -1589,15 +1588,10 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } - WRITE_ONCE(netdev->mtu, new_mtu); - - if (!netif_running(netdev)) - return 0; - - if (nicvf_update_hw_max_frs(nic, new_mtu)) { - netdev->mtu = orig_mtu; + if (netif_running(netdev) && nicvf_update_hw_max_frs(nic, new_mtu)) return -EINVAL; - } + + WRITE_ONCE(netdev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index d567e42e1760..465fa8077964 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1096,8 +1096,7 @@ new_buf: copy = size; if (msg->msg_flags & MSG_SPLICE_PAGES) { - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) { if (err == -EMSGSIZE) goto new_buf; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 773f5ad972a2..6bc8dfdb3d4b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1864,10 +1864,10 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu) if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (netdev->mtu > enic->port_mtu) + if (new_mtu > enic->port_mtu) netdev_warn(netdev, "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); + new_mtu, enic->port_mtu); return _enic_change_mtu(netdev, new_mtu); } diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index a98d5af3f9e3..05b8e3743a79 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/clk.h> +#include <linux/reset.h> #include <linux/dma-mapping.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -101,6 +102,8 @@ struct ftgmac100 { /* AST2500/AST2600 RMII ref clock gate */ struct clk *rclk; + /* Aspeed reset control */ + struct reset_control *rst; /* Link management */ int cur_speed; @@ -148,6 +151,23 @@ static int ftgmac100_reset_and_config_mac(struct ftgmac100 *priv) { u32 maccr = 0; + /* Aspeed RMII needs SCU reset to clear status */ + if (priv->is_aspeed && priv->netdev->phydev->interface == PHY_INTERFACE_MODE_RMII) { + int err; + + err = reset_control_assert(priv->rst); + if (err) { + dev_err(priv->dev, "Failed to reset mac (%d)\n", err); + return err; + } + usleep_range(10000, 20000); + err = reset_control_deassert(priv->rst); + if (err) { + dev_err(priv->dev, "Failed to deassert mac reset (%d)\n", err); + return err; + } + } + switch (priv->cur_speed) { case SPEED_10: case 0: /* no link */ @@ -1968,6 +1988,12 @@ static int ftgmac100_probe(struct platform_device *pdev) } + priv->rst = devm_reset_control_get_optional_exclusive(priv->dev, NULL); + if (IS_ERR(priv->rst)) { + err = PTR_ERR(priv->rst); + goto err_phy_connect; + } + if (priv->is_aspeed) { err = ftgmac100_setup_clk(priv); if (err) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 2ec2c3dab250..b82f121cadad 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -3939,6 +3939,7 @@ static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv, MEM_TYPE_PAGE_ORDER0, NULL); if (err) { dev_err(dev, "xdp_rxq_info_reg_mem_model failed\n"); + xdp_rxq_info_unreg(&fq->channel->xdp_rxq); return err; } @@ -4432,17 +4433,25 @@ static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv) return -EINVAL; } if (err) - return err; + goto out; } err = dpni_get_qdid(priv->mc_io, 0, priv->mc_token, DPNI_QUEUE_TX, &priv->tx_qdid); if (err) { dev_err(dev, "dpni_get_qdid() failed\n"); - return err; + goto out; } return 0; + +out: + while (i--) { + if (priv->fq[i].type == DPAA2_RX_FQ && + xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq)) + xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq); + } + return err; } /* Allocate rings for storing incoming frame descriptors */ @@ -4825,6 +4834,17 @@ static void dpaa2_eth_del_ch_napi(struct dpaa2_eth_priv *priv) } } +static void dpaa2_eth_free_rx_xdp_rxq(struct dpaa2_eth_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_fqs; i++) { + if (priv->fq[i].type == DPAA2_RX_FQ && + xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq)) + xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq); + } +} + static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) { struct device *dev; @@ -5028,6 +5048,7 @@ err_alloc_percpu_extras: free_percpu(priv->percpu_stats); err_alloc_percpu_stats: dpaa2_eth_del_ch_napi(priv); + dpaa2_eth_free_rx_xdp_rxq(priv); err_bind: dpaa2_eth_free_dpbps(priv); err_dpbp_setup: @@ -5080,6 +5101,7 @@ static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev) free_percpu(priv->percpu_extras); dpaa2_eth_del_ch_napi(priv); + dpaa2_eth_free_rx_xdp_rxq(priv); dpaa2_eth_free_dpbps(priv); dpaa2_eth_free_dpio(priv); dpaa2_eth_free_dpni(priv); diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index cf91195d5f39..53899096e89e 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -804,6 +804,7 @@ struct gve_priv { struct gve_tx_queue_config tx_cfg; struct gve_rx_queue_config rx_cfg; u32 num_ntfy_blks; /* split between TX and RX so must be even */ + int numa_node; struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ __be32 __iomem *db_bar2; /* "array" of doorbells */ diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index a71883e1d920..6c3c459a1b5e 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -246,6 +246,7 @@ struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .order = 0, .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt, + .nid = priv->numa_node, .dev = &priv->pdev->dev, .netdev = priv->dev, .napi = &priv->ntfy_blocks[ntfy_id].napi, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 27f97a1d2957..be461751ff31 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -461,10 +461,19 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) return work_done; } +static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) +{ + if (priv->numa_node == NUMA_NO_NODE) + return cpu_all_mask; + else + return cpumask_of_node(priv->numa_node); +} + static int gve_alloc_notify_blocks(struct gve_priv *priv) { int num_vecs_requested = priv->num_ntfy_blks + 1; - unsigned int active_cpus; + const struct cpumask *node_mask; + unsigned int cur_cpu; int vecs_enabled; int i, j; int err; @@ -503,8 +512,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; } - /* Half the notification blocks go to TX and half to RX */ - active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); /* Setup Management Vector - the last vector */ snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", @@ -533,6 +540,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) } /* Setup the other blocks - the first n-1 vectors */ + node_mask = gve_get_node_mask(priv); + cur_cpu = cpumask_first(node_mask); for (i = 0; i < priv->num_ntfy_blks; i++) { struct gve_notify_block *block = &priv->ntfy_blocks[i]; int msix_idx = i; @@ -549,9 +558,17 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) goto abort_with_some_ntfy_blocks; } block->irq = priv->msix_vectors[msix_idx].vector; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - get_cpu_mask(i % active_cpus)); + irq_set_affinity_and_hint(block->irq, + cpumask_of(cur_cpu)); block->irq_db_index = &priv->irq_db_indices[i].index; + + cur_cpu = cpumask_next(cur_cpu, node_mask); + /* Wrap once CPUs in the node have been exhausted, or when + * starting RX queue affinities. TX and RX queues of the same + * index share affinity. + */ + if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues) + cur_cpu = cpumask_first(node_mask); } return 0; abort_with_some_ntfy_blocks: @@ -1040,7 +1057,7 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, enum dma_data_direction dir, gfp_t gfp_flags) { - *page = alloc_page(gfp_flags); + *page = alloc_pages_node(priv->numa_node, gfp_flags, 0); if (!*page) { priv->page_alloc_fail++; return -ENOMEM; @@ -2322,6 +2339,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) */ priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; priv->mgmt_msix_idx = priv->num_ntfy_blks; + priv->numa_node = dev_to_node(&priv->pdev->dev); priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 90e875c1832f..ec424d2f4f57 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -192,8 +192,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, */ slots = rx->mask + 1; - rx->data.page_info = kvzalloc(slots * - sizeof(*rx->data.page_info), GFP_KERNEL); + rx->data.page_info = kvcalloc_node(slots, sizeof(*rx->data.page_info), + GFP_KERNEL, priv->numa_node); if (!rx->data.page_info) return -ENOMEM; @@ -216,7 +216,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, if (!rx->data.raw_addressing) { for (j = 0; j < rx->qpl_copy_pool_mask + 1; j++) { - struct page *page = alloc_page(GFP_KERNEL); + struct page *page = alloc_pages_node(priv->numa_node, + GFP_KERNEL, 0); if (!page) { err = -ENOMEM; @@ -303,10 +304,9 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv, rx->qpl_copy_pool_mask = min_t(u32, U32_MAX, slots * 2) - 1; rx->qpl_copy_pool_head = 0; - rx->qpl_copy_pool = kvcalloc(rx->qpl_copy_pool_mask + 1, - sizeof(rx->qpl_copy_pool[0]), - GFP_KERNEL); - + rx->qpl_copy_pool = kvcalloc_node(rx->qpl_copy_pool_mask + 1, + sizeof(rx->qpl_copy_pool[0]), + GFP_KERNEL, priv->numa_node); if (!rx->qpl_copy_pool) { err = -ENOMEM; goto abort_with_slots; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 96743e1d80f3..afaa822b1227 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -237,9 +237,9 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); - rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, - sizeof(rx->dqo.buf_states[0]), - GFP_KERNEL); + rx->dqo.buf_states = kvcalloc_node(rx->dqo.num_buf_states, + sizeof(rx->dqo.buf_states[0]), + GFP_KERNEL, priv->numa_node); if (!rx->dqo.buf_states) return -ENOMEM; @@ -488,7 +488,7 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state, u16 buf_len) { - struct page *page = alloc_page(GFP_ATOMIC); + struct page *page = alloc_pages_node(rx->gve->numa_node, GFP_ATOMIC, 0); int num_frags; if (!page) @@ -671,7 +671,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, buf_len = compl_desc->packet_len; hdr_len = compl_desc->header_len; - /* Page might have not been used for awhile and was likely last written + /* Page might have not been used for a while and was likely last written * by a different thread. */ if (rx->dqo.page_pool) { diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 9b65eef62b3f..8cca8316ba40 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -18,6 +18,13 @@ #define HBG_ENDIAN_CTRL_LE_DATA_BE 0x0 #define HBG_PCU_FRAME_LEN_PLUS 4 +#define HBG_FIFO_TX_FULL_THRSLD 0x3F0 +#define HBG_FIFO_TX_EMPTY_THRSLD 0x1F0 +#define HBG_FIFO_RX_FULL_THRSLD 0x240 +#define HBG_FIFO_RX_EMPTY_THRSLD 0x190 +#define HBG_CFG_FIFO_FULL_THRSLD 0x10 +#define HBG_CFG_FIFO_EMPTY_THRSLD 0x01 + static bool hbg_hw_spec_is_valid(struct hbg_priv *priv) { return hbg_reg_read(priv, HBG_REG_SPEC_VALID_ADDR) && @@ -168,6 +175,11 @@ static void hbg_hw_set_mac_max_frame_len(struct hbg_priv *priv, void hbg_hw_set_mtu(struct hbg_priv *priv, u16 mtu) { + /* burst_len BIT(29) set to 1 can improve the TX performance. + * But packet drop occurs when mtu > 2000. + * So, BIT(29) reset to 0 when mtu > 2000. + */ + u32 burst_len_bit = (mtu > 2000) ? 0 : 1; u32 frame_len; frame_len = mtu + VLAN_HLEN * priv->dev_specs.vlan_layers + @@ -175,6 +187,9 @@ void hbg_hw_set_mtu(struct hbg_priv *priv, u16 mtu) hbg_hw_set_pcu_max_frame_len(priv, frame_len); hbg_hw_set_mac_max_frame_len(priv, frame_len); + + hbg_reg_write_field(priv, HBG_REG_BRUST_LENGTH_ADDR, + HBG_REG_BRUST_LENGTH_B, burst_len_bit); } void hbg_hw_mac_enable(struct hbg_priv *priv, u32 enable) @@ -264,6 +279,41 @@ void hbg_hw_set_rx_pause_mac_addr(struct hbg_priv *priv, u64 mac_addr) hbg_reg_write64(priv, HBG_REG_FD_FC_ADDR_LOW_ADDR, mac_addr); } +static void hbg_hw_set_fifo_thrsld(struct hbg_priv *priv, + u32 full, u32 empty, enum hbg_dir dir) +{ + u32 value = 0; + + value |= FIELD_PREP(HBG_REG_FIFO_THRSLD_FULL_M, full); + value |= FIELD_PREP(HBG_REG_FIFO_THRSLD_EMPTY_M, empty); + + if (dir & HBG_DIR_TX) + hbg_reg_write(priv, HBG_REG_TX_FIFO_THRSLD_ADDR, value); + + if (dir & HBG_DIR_RX) + hbg_reg_write(priv, HBG_REG_RX_FIFO_THRSLD_ADDR, value); +} + +static void hbg_hw_set_cfg_fifo_thrsld(struct hbg_priv *priv, + u32 full, u32 empty, enum hbg_dir dir) +{ + u32 value; + + value = hbg_reg_read(priv, HBG_REG_CFG_FIFO_THRSLD_ADDR); + + if (dir & HBG_DIR_TX) { + value |= FIELD_PREP(HBG_REG_CFG_FIFO_THRSLD_TX_FULL_M, full); + value |= FIELD_PREP(HBG_REG_CFG_FIFO_THRSLD_TX_EMPTY_M, empty); + } + + if (dir & HBG_DIR_RX) { + value |= FIELD_PREP(HBG_REG_CFG_FIFO_THRSLD_RX_FULL_M, full); + value |= FIELD_PREP(HBG_REG_CFG_FIFO_THRSLD_RX_EMPTY_M, empty); + } + + hbg_reg_write(priv, HBG_REG_CFG_FIFO_THRSLD_ADDR, value); +} + static void hbg_hw_init_transmit_ctrl(struct hbg_priv *priv) { u32 ctrl = 0; @@ -324,5 +374,12 @@ int hbg_hw_init(struct hbg_priv *priv) hbg_hw_init_rx_control(priv); hbg_hw_init_transmit_ctrl(priv); + + hbg_hw_set_fifo_thrsld(priv, HBG_FIFO_TX_FULL_THRSLD, + HBG_FIFO_TX_EMPTY_THRSLD, HBG_DIR_TX); + hbg_hw_set_fifo_thrsld(priv, HBG_FIFO_RX_FULL_THRSLD, + HBG_FIFO_RX_EMPTY_THRSLD, HBG_DIR_RX); + hbg_hw_set_cfg_fifo_thrsld(priv, HBG_CFG_FIFO_FULL_THRSLD, + HBG_CFG_FIFO_EMPTY_THRSLD, HBG_DIR_TX_RX); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index 42b0083c9193..8b7b476ed7fb 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -2,6 +2,7 @@ // Copyright (c) 2024 Hisilicon Limited. #include <linux/phy.h> +#include <linux/phy_fixed.h> #include <linux/rtnetlink.h> #include "hbg_common.h" #include "hbg_hw.h" @@ -19,6 +20,7 @@ #define HBG_MDIO_OP_INTERVAL_US (5 * 1000) #define HBG_NP_LINK_FAIL_RETRY_TIMES 5 +#define HBG_NO_PHY 0xFF static void hbg_mdio_set_command(struct hbg_mac *mac, u32 cmd) { @@ -229,6 +231,39 @@ void hbg_phy_stop(struct hbg_priv *priv) phy_stop(priv->mac.phydev); } +static void hbg_fixed_phy_uninit(void *data) +{ + fixed_phy_unregister((struct phy_device *)data); +} + +static int hbg_fixed_phy_init(struct hbg_priv *priv) +{ + struct fixed_phy_status hbg_fixed_phy_status = { + .link = 1, + .speed = SPEED_1000, + .duplex = DUPLEX_FULL, + .pause = 1, + .asym_pause = 1, + }; + struct device *dev = &priv->pdev->dev; + struct phy_device *phydev; + int ret; + + phydev = fixed_phy_register(&hbg_fixed_phy_status, NULL); + if (IS_ERR(phydev)) { + dev_err_probe(dev, PTR_ERR(phydev), + "failed to register fixed PHY device\n"); + return PTR_ERR(phydev); + } + + ret = devm_add_action_or_reset(dev, hbg_fixed_phy_uninit, phydev); + if (ret) + return ret; + + priv->mac.phydev = phydev; + return hbg_phy_connect(priv); +} + int hbg_mdio_init(struct hbg_priv *priv) { struct device *dev = &priv->pdev->dev; @@ -238,6 +273,9 @@ int hbg_mdio_init(struct hbg_priv *priv) int ret; mac->phy_addr = priv->dev_specs.phy_addr; + if (mac->phy_addr == HBG_NO_PHY) + return hbg_fixed_phy_init(priv); + mdio_bus = devm_mdiobus_alloc(dev); if (!mdio_bus) return dev_err_probe(dev, -ENOMEM, diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index a6e7f5e62b48..a39d1e796e4a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -141,7 +141,13 @@ /* PCU */ #define HBG_REG_TX_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0420) #define HBG_REG_RX_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0424) +#define HBG_REG_FIFO_THRSLD_FULL_M GENMASK(25, 16) +#define HBG_REG_FIFO_THRSLD_EMPTY_M GENMASK(9, 0) #define HBG_REG_CFG_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0428) +#define HBG_REG_CFG_FIFO_THRSLD_TX_FULL_M GENMASK(31, 24) +#define HBG_REG_CFG_FIFO_THRSLD_TX_EMPTY_M GENMASK(23, 16) +#define HBG_REG_CFG_FIFO_THRSLD_RX_FULL_M GENMASK(15, 8) +#define HBG_REG_CFG_FIFO_THRSLD_RX_EMPTY_M GENMASK(7, 0) #define HBG_REG_CF_INTRPT_MSK_ADDR (HBG_REG_SGMII_BASE + 0x042C) #define HBG_INT_MSK_WE_ERR_B BIT(31) #define HBG_INT_MSK_RBREQ_ERR_B BIT(30) @@ -185,6 +191,8 @@ #define HBG_REG_TX_CFF_ADDR_2_ADDR (HBG_REG_SGMII_BASE + 0x0490) #define HBG_REG_TX_CFF_ADDR_3_ADDR (HBG_REG_SGMII_BASE + 0x0494) #define HBG_REG_RX_CFF_ADDR_ADDR (HBG_REG_SGMII_BASE + 0x04A0) +#define HBG_REG_BRUST_LENGTH_ADDR (HBG_REG_SGMII_BASE + 0x04C4) +#define HBG_REG_BRUST_LENGTH_B BIT(29) #define HBG_REG_RX_BUF_SIZE_ADDR (HBG_REG_SGMII_BASE + 0x04E4) #define HBG_REG_RX_BUF_SIZE_M GENMASK(15, 0) #define HBG_REG_BUS_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x04E8) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 54d5fdc303ca..eea845b22089 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -661,7 +661,7 @@ struct i40e_pf { struct ptp_clock_info ptp_caps; struct sk_buff *ptp_tx_skb; unsigned long ptp_tx_start; - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; struct timespec64 ptp_prev_hw_time; struct work_struct ptp_extts0_work; ktime_t ptp_reset_start; @@ -1303,8 +1303,11 @@ void i40e_ptp_tx_hang(struct i40e_pf *pf); void i40e_ptp_tx_hwtstamp(struct i40e_pf *pf); void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index); void i40e_ptp_set_increment(struct i40e_pf *pf); -int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr); -int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr); +int i40e_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int i40e_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void i40e_ptp_save_hw_time(struct i40e_pf *pf); void i40e_ptp_restore_hw_time(struct i40e_pf *pf); void i40e_ptp_init(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3b4f59d978a5..949b74fbb127 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2955,27 +2955,6 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) } /** - * i40e_ioctl - Access the hwtstamp interface - * @netdev: network interface device structure - * @ifr: interface request data - * @cmd: ioctl command - **/ -int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_pf *pf = np->vsi->back; - - switch (cmd) { - case SIOCGHWTSTAMP: - return i40e_ptp_get_ts_config(pf, ifr); - case SIOCSHWTSTAMP: - return i40e_ptp_set_ts_config(pf, ifr); - default: - return -EOPNOTSUPP; - } -} - -/** * i40e_vlan_stripping_enable - Turn on vlan stripping for the VSI * @vsi: the vsi being adjusted **/ @@ -13626,7 +13605,6 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = i40e_set_mac, .ndo_change_mtu = i40e_change_mtu, - .ndo_eth_ioctl = i40e_ioctl, .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, @@ -13654,6 +13632,8 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_xsk_wakeup = i40e_xsk_wakeup, .ndo_dfwd_add_station = i40e_fwd_add, .ndo_dfwd_del_station = i40e_fwd_del, + .ndo_hwtstamp_get = i40e_ptp_hwtstamp_get, + .ndo_hwtstamp_set = i40e_ptp_hwtstamp_set, }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index b72a4b5d76b9..1d04ea7df552 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -912,23 +912,26 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) } /** - * i40e_ptp_get_ts_config - ioctl interface to read the HW timestamping - * @pf: Board private structure - * @ifr: ioctl data + * i40e_ptp_hwtstamp_get - interface to read the HW timestamping + * @netdev: Network device structure + * @config: Timestamping configuration structure * * Obtain the current hardware timestamping settigs as requested. To do this, * keep a shadow copy of the timestamp settings rather than attempting to * deconstruct it from the registers. **/ -int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr) +int i40e_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config *config = &pf->tstamp_config; + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_pf *pf = np->vsi->back; if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags)) return -EOPNOTSUPP; - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; + *config = pf->tstamp_config; + + return 0; } /** @@ -1167,7 +1170,7 @@ int i40e_ptp_alloc_pins(struct i40e_pf *pf) * more broad if the specific filter is not directly supported. **/ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { struct i40e_hw *hw = &pf->hw; u32 tsyntype, regval; @@ -1290,9 +1293,10 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, } /** - * i40e_ptp_set_ts_config - ioctl interface to control the HW timestamping - * @pf: Board private structure - * @ifr: ioctl data + * i40e_ptp_hwtstamp_set - interface to control the HW timestamping + * @netdev: Network device structure + * @config: Timestamping configuration structure + * @extack: Netlink extended ack structure for error reporting * * Respond to the user filter requests and make the appropriate hardware * changes here. The XL710 cannot support splitting of the Tx/Rx timestamping @@ -1303,26 +1307,25 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, * as the user receives the timestamps they care about and the user is notified * the filter has been broadened. **/ -int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr) +int i40e_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_pf *pf = np->vsi->back; int err; if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags)) return -EOPNOTSUPP; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = i40e_ptp_set_timestamp_mode(pf, &config); + err = i40e_ptp_set_timestamp_mode(pf, config); if (err) return err; /* save these settings for future reference */ - pf->tstamp_config = config; + pf->tstamp_config = *config; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index ea7e8b879b48..e54221fba849 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3591,11 +3591,10 @@ static int ice_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { struct ice_netdev_priv *np = netdev_priv(netdev); - u32 rss_context = rxfh->rss_context; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; u16 qcount, offset; - int err, num_tc, i; + int err, i; u8 *lut; if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { @@ -3603,24 +3602,8 @@ ice_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) return -EOPNOTSUPP; } - if (rss_context && !ice_is_adq_active(pf)) { - netdev_err(netdev, "RSS context cannot be non-zero when ADQ is not configured.\n"); - return -EINVAL; - } - - qcount = vsi->mqprio_qopt.qopt.count[rss_context]; - offset = vsi->mqprio_qopt.qopt.offset[rss_context]; - - if (rss_context && ice_is_adq_active(pf)) { - num_tc = vsi->mqprio_qopt.qopt.num_tc; - if (rss_context >= num_tc) { - netdev_err(netdev, "RSS context:%d > num_tc:%d\n", - rss_context, num_tc); - return -EINVAL; - } - /* Use channel VSI of given TC */ - vsi = vsi->tc_map_vsi[rss_context]; - } + qcount = vsi->mqprio_qopt.qopt.count[0]; + offset = vsi->mqprio_qopt.qopt.offset[0]; rxfh->hfunc = ETH_RSS_HASH_TOP; if (vsi->rss_hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ) @@ -3680,9 +3663,6 @@ ice_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (rxfh->rss_context) - return -EOPNOTSUPP; - if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { /* RSS not supported return error here */ netdev_warn(netdev, "RSS is not configured on this VSI!\n"); @@ -4750,12 +4730,10 @@ static int ice_repr_ethtool_reset(struct net_device *dev, u32 *flags) } static const struct ethtool_ops ice_ethtool_ops = { - .cap_rss_ctx_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE | ETHTOOL_COALESCE_RX_USECS_HIGH, .supported_input_xfrm = RXH_XFRM_SYM_XOR, - .rxfh_per_ctx_key = true, .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, .get_fec_stats = ice_get_fec_stats, diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f8ef80069e3d..af68869693ed 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7902,27 +7902,6 @@ int ice_change_mtu(struct net_device *netdev, int new_mtu) } /** - * ice_eth_ioctl - Access the hwtstamp interface - * @netdev: network interface device structure - * @ifr: interface request data - * @cmd: ioctl command - */ -static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; - - switch (cmd) { - case SIOCGHWTSTAMP: - return ice_ptp_get_ts_config(pf, ifr); - case SIOCSHWTSTAMP: - return ice_ptp_set_ts_config(pf, ifr); - default: - return -EOPNOTSUPP; - } -} - -/** * ice_aq_str - convert AQ err code to a string * @aq_err: the AQ error code to convert */ @@ -9761,7 +9740,6 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_set_tx_maxrate = ice_set_tx_maxrate, - .ndo_eth_ioctl = ice_eth_ioctl, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, @@ -9785,4 +9763,6 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_bpf = ice_xdp, .ndo_xdp_xmit = ice_xdp_xmit, .ndo_xsk_wakeup = ice_xsk_wakeup, + .ndo_hwtstamp_get = ice_ptp_hwtstamp_get, + .ndo_hwtstamp_set = ice_ptp_hwtstamp_set, }; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e7005d757477..e358eb1d719f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2197,23 +2197,24 @@ static int ice_ptp_getcrosststamp(struct ptp_clock_info *info, } /** - * ice_ptp_get_ts_config - ioctl interface to read the timestamping config - * @pf: Board private structure - * @ifr: ioctl data + * ice_ptp_hwtstamp_get - interface to read the timestamping config + * @netdev: Pointer to network interface device structure + * @config: Timestamping configuration structure * * Copy the timestamping config to user buffer */ -int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) +int ice_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config *config; + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; if (pf->ptp.state != ICE_PTP_READY) return -EIO; - config = &pf->ptp.tstamp_config; + *config = pf->ptp.tstamp_config; - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; + return 0; } /** @@ -2221,8 +2222,8 @@ int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) * @pf: Board private structure * @config: hwtstamp settings requested or saved */ -static int -ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) +static int ice_ptp_set_timestamp_mode(struct ice_pf *pf, + struct kernel_hwtstamp_config *config) { switch (config->tx_type) { case HWTSTAMP_TX_OFF: @@ -2266,32 +2267,32 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) } /** - * ice_ptp_set_ts_config - ioctl interface to control the timestamping - * @pf: Board private structure - * @ifr: ioctl data + * ice_ptp_hwtstamp_set - interface to control the timestamping + * @netdev: Pointer to network interface device structure + * @config: Timestamping configuration structure + * @extack: Netlink extended ack structure for error reporting * * Get the user config and store it */ -int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) +int ice_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; int err; if (pf->ptp.state != ICE_PTP_READY) return -EAGAIN; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = ice_ptp_set_timestamp_mode(pf, &config); + err = ice_ptp_set_timestamp_mode(pf, config); if (err) return err; /* Return the actual configuration set */ - config = pf->ptp.tstamp_config; + *config = pf->ptp.tstamp_config; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index c8dac5a5bcd9..137f2070a2d9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -259,7 +259,7 @@ struct ice_ptp { struct ptp_extts_request extts_rqs[GLTSYN_EVNT_H_IDX_MAX]; struct ptp_clock_info info; struct ptp_clock *clock; - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; u64 reset_time; u32 tx_hwtstamp_skipped; u32 tx_hwtstamp_timeouts; @@ -291,8 +291,11 @@ struct ice_ptp { #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) int ice_ptp_clock_index(struct ice_pf *pf); struct ice_pf; -int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr); -int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr); +int ice_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int ice_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void ice_ptp_restore_timestamp_mode(struct ice_pf *pf); void ice_ptp_extts_event(struct ice_pf *pf); @@ -313,12 +316,16 @@ void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); void ice_ptp_link_change(struct ice_pf *pf, bool linkup); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ -static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) + +static inline int ice_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } -static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) +static inline int ice_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index b28991dd1870..48b8e184f3db 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -96,7 +96,7 @@ static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq) */ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) { - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* free ring buffers and the ring itself */ idpf_ctlq_dealloc_ring_res(hw, cq); @@ -104,8 +104,7 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) /* Set ring_size to 0 to indicate uninitialized queue */ cq->ring_size = 0; - mutex_unlock(&cq->cq_lock); - mutex_destroy(&cq->cq_lock); + spin_unlock(&cq->cq_lock); } /** @@ -173,7 +172,7 @@ int idpf_ctlq_add(struct idpf_hw *hw, idpf_ctlq_init_regs(hw, cq, is_rxq); - mutex_init(&cq->cq_lock); + spin_lock_init(&cq->cq_lock); list_add(&cq->cq_list, &hw->cq_list_head); @@ -272,7 +271,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, int err = 0; int i; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* Ensure there are enough descriptors to send all messages */ num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq); @@ -332,7 +331,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, wr32(hw, cq->reg.tail, cq->next_to_use); err_unlock: - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); return err; } @@ -364,7 +363,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, if (*clean_count > cq->ring_size) return -EBADR; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -397,7 +396,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* Return number of descriptors actually cleaned */ *clean_count = i; @@ -435,7 +434,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, if (*buff_count > 0) buffs_avail = true; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); if (tbp >= cq->ring_size) tbp = 0; @@ -524,7 +523,7 @@ post_buffs_out: wr32(hw, cq->reg.tail, cq->next_to_post); } - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* return the number of buffers that were not posted */ *buff_count = *buff_count - i; @@ -552,7 +551,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, u16 i; /* take the lock before we start messing with the ring */ - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -614,7 +613,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); *num_q_msg = i; if (*num_q_msg == 0) diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h index 9642494a67d8..3414c5f9a831 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h @@ -99,7 +99,7 @@ struct idpf_ctlq_info { enum idpf_ctlq_type cq_type; int q_id; - struct mutex cq_lock; /* control queue lock */ + spinlock_t cq_lock; /* control queue lock */ /* used for interrupt processing */ u16 next_to_use; u16 next_to_clean; diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 9bdb309b668e..eaf7a2606faa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -47,7 +47,7 @@ static u32 idpf_get_rxfh_key_size(struct net_device *netdev) struct idpf_vport_user_config_data *user_config; if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) - return -EOPNOTSUPP; + return 0; user_config = &np->adapter->vport_config[np->vport_idx]->user_config; @@ -66,7 +66,7 @@ static u32 idpf_get_rxfh_indir_size(struct net_device *netdev) struct idpf_vport_user_config_data *user_config; if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) - return -EOPNOTSUPP; + return 0; user_config = &np->adapter->vport_config[np->vport_idx]->user_config; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 4eb20ec2accb..80382ff4a5fa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2314,8 +2314,12 @@ void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size) struct idpf_adapter *adapter = hw->back; size_t sz = ALIGN(size, 4096); - mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz, - &mem->pa, GFP_KERNEL); + /* The control queue resources are freed under a spinlock, contiguous + * pages will avoid IOMMU remapping and the use vmap (and vunmap in + * dma_free_*() path. + */ + mem->va = dma_alloc_attrs(&adapter->pdev->dev, sz, &mem->pa, + GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = sz; return mem->va; @@ -2330,8 +2334,8 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) { struct idpf_adapter *adapter = hw->back; - dma_free_coherent(&adapter->pdev->dev, mem->size, - mem->va, mem->pa); + dma_free_attrs(&adapter->pdev->dev, mem->size, + mem->va, mem->pa, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = 0; mem->va = NULL; mem->pa = 0; diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index f34ead8243e9..c3f4f7cd264e 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -626,7 +626,7 @@ struct igb_adapter { struct delayed_work ptp_overflow_work; struct work_struct ptp_tx_work; struct sk_buff *ptp_tx_skb; - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; unsigned long ptp_tx_start; unsigned long last_rx_ptp_check; unsigned long last_rx_timestamp; @@ -771,8 +771,11 @@ void igb_ptp_tx_hang(struct igb_adapter *adapter); void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb); int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, ktime_t *timestamp); -int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); -int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); +int igb_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int igb_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void igb_set_flag_queue_pairs(struct igb_adapter *, const u32); unsigned int igb_get_max_rss_queues(struct igb_adapter *); #ifdef CONFIG_IGB_HWMON diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b76a154e635e..a9a7a94ae61e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3062,6 +3062,8 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_bpf = igb_xdp, .ndo_xdp_xmit = igb_xdp_xmit, .ndo_xsk_wakeup = igb_xsk_wakeup, + .ndo_hwtstamp_get = igb_ptp_hwtstamp_get, + .ndo_hwtstamp_set = igb_ptp_hwtstamp_set, }; /** @@ -9317,10 +9319,6 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); - case SIOCGHWTSTAMP: - return igb_ptp_get_ts_config(netdev, ifr); - case SIOCSHWTSTAMP: - return igb_ptp_set_ts_config(netdev, ifr); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 793c96016288..05d30aba66db 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1094,21 +1094,22 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) } /** - * igb_ptp_get_ts_config - get hardware time stamping config + * igb_ptp_hwtstamp_get - get hardware time stamping config * @netdev: netdev struct - * @ifr: interface struct + * @config: timestamping configuration structure * * Get the hwtstamp_config settings to return to the user. Rather than attempt * to deconstruct the settings from the registers, just return a shadow copy * of the last known settings. **/ -int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) +int igb_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { struct igb_adapter *adapter = netdev_priv(netdev); - struct hwtstamp_config *config = &adapter->tstamp_config; - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; + *config = adapter->tstamp_config; + + return 0; } /** @@ -1129,7 +1130,7 @@ int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) * level 2 or 4". */ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { struct e1000_hw *hw = &adapter->hw; u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; @@ -1275,30 +1276,26 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter, } /** - * igb_ptp_set_ts_config - set hardware time stamping config + * igb_ptp_hwtstamp_set - set hardware time stamping config * @netdev: netdev struct - * @ifr: interface struct - * + * @config: timestamping configuration structure + * @extack: netlink extended ack structure for error reporting **/ -int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr) +int igb_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct igb_adapter *adapter = netdev_priv(netdev); - struct hwtstamp_config config; int err; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = igb_ptp_set_timestamp_mode(adapter, &config); + err = igb_ptp_set_timestamp_mode(adapter, config); if (err) return err; /* save these settings for future reference */ - memcpy(&adapter->tstamp_config, &config, - sizeof(adapter->tstamp_config)); + adapter->tstamp_config = *config; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } /** diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c index 83b97989a6bd..773895c663fd 100644 --- a/drivers/net/ethernet/intel/igbvf/ethtool.c +++ b/drivers/net/ethernet/intel/igbvf/ethtool.c @@ -33,6 +33,7 @@ static const struct igbvf_stats igbvf_gstrings_stats[] = { { "lbrx_bytes", IGBVF_STAT(stats.gorlbc, stats.base_gorlbc) }, { "lbrx_packets", IGBVF_STAT(stats.gprlbc, stats.base_gprlbc) }, { "tx_restart_queue", IGBVF_STAT(restart_queue, zero_base) }, + { "tx_timeout_count", IGBVF_STAT(tx_timeout_count, zero_base) }, { "rx_long_byte_count", IGBVF_STAT(stats.gorc, stats.base_gorc) }, { "rx_csum_offload_good", IGBVF_STAT(hw_csum_good, zero_base) }, { "rx_csum_offload_errors", IGBVF_STAT(hw_csum_err, zero_base) }, diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h index ca6e44245a7b..ba9c3fee6da7 100644 --- a/drivers/net/ethernet/intel/igbvf/igbvf.h +++ b/drivers/net/ethernet/intel/igbvf/igbvf.h @@ -238,8 +238,6 @@ struct igbvf_adapter { int int_mode; u32 eims_enable_mask; u32 eims_other; - u32 int_counter0; - u32 int_counter1; u32 eeprom_wol; u32 wol; diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index e55dd9345833..aed9162afd38 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -855,8 +855,6 @@ static irqreturn_t igbvf_msix_other(int irq, void *data) struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - adapter->int_counter1++; - hw->mac.get_link_status = 1; if (!test_bit(__IGBVF_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); @@ -899,8 +897,6 @@ static irqreturn_t igbvf_intr_msix_rx(int irq, void *data) struct net_device *netdev = data; struct igbvf_adapter *adapter = netdev_priv(netdev); - adapter->int_counter0++; - /* Write the ITR value calculated at the end of the * previous interrupt. */ diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 1525ae25fd3e..97b1a2c820ee 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -315,7 +315,7 @@ struct igc_adapter { */ spinlock_t ptp_tx_lock; struct igc_tx_timestamp_request tx_tstamp[IGC_MAX_TX_TSTAMP_REGS]; - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; unsigned int ptp_flags; /* System time value lock */ spinlock_t tmreg_lock; @@ -773,8 +773,11 @@ void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); void igc_ptp_stop(struct igc_adapter *adapter); ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf); -int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); -int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); +int igc_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int igc_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void igc_ptp_tx_hang(struct igc_adapter *adapter); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2e12915b42a9..32ea4fdd3e2b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6303,24 +6303,6 @@ int igc_close(struct net_device *netdev) return 0; } -/** - * igc_ioctl - Access the hwtstamp interface - * @netdev: network interface device structure - * @ifr: interface request data - * @cmd: ioctl command - **/ -static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { - case SIOCGHWTSTAMP: - return igc_ptp_get_ts_config(netdev, ifr); - case SIOCSHWTSTAMP: - return igc_ptp_set_ts_config(netdev, ifr); - default: - return -EOPNOTSUPP; - } -} - static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, bool enable) { @@ -6971,12 +6953,13 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_fix_features = igc_fix_features, .ndo_set_features = igc_set_features, .ndo_features_check = igc_features_check, - .ndo_eth_ioctl = igc_ioctl, .ndo_setup_tc = igc_setup_tc, .ndo_bpf = igc_bpf, .ndo_xdp_xmit = igc_xdp_xmit, .ndo_xsk_wakeup = igc_xsk_wakeup, .ndo_get_tstamp = igc_get_tstamp, + .ndo_hwtstamp_get = igc_ptp_hwtstamp_get, + .ndo_hwtstamp_set = igc_ptp_hwtstamp_set, }; u32 igc_rd32(struct igc_hw *hw, u32 reg) @@ -7144,6 +7127,10 @@ static int igc_probe(struct pci_dev *pdev, adapter->port_num = hw->bus.func; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + err = pci_save_state(pdev); if (err) goto err_ioremap; @@ -7529,6 +7516,9 @@ static int __igc_resume(struct device *dev, bool rpm) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + if (igc_init_interrupt_scheme(adapter, true)) { netdev_err(netdev, "Unable to allocate memory for queues\n"); return -ENOMEM; @@ -7654,6 +7644,9 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2); + /* In case of PCI error, adapter loses its HW address * so we should re-assign it here. */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index f4f5c28615d3..b7b46d863bee 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -626,7 +626,7 @@ static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) * Return: 0 in case of success, negative errno code otherwise. */ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { switch (config->tx_type) { case HWTSTAMP_TX_OFF: @@ -853,48 +853,46 @@ void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter) } /** - * igc_ptp_set_ts_config - set hardware time stamping config + * igc_ptp_hwtstamp_set - set hardware time stamping config * @netdev: network interface device structure - * @ifr: interface request data + * @config: timestamping configuration structure + * @extack: netlink extended ack structure for error reporting * **/ -int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr) +int igc_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct igc_adapter *adapter = netdev_priv(netdev); - struct hwtstamp_config config; int err; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = igc_ptp_set_timestamp_mode(adapter, &config); + err = igc_ptp_set_timestamp_mode(adapter, config); if (err) return err; /* save these settings for future reference */ - memcpy(&adapter->tstamp_config, &config, - sizeof(adapter->tstamp_config)); + adapter->tstamp_config = *config; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } /** - * igc_ptp_get_ts_config - get hardware time stamping config + * igc_ptp_hwtstamp_get - get hardware time stamping config * @netdev: network interface device structure - * @ifr: interface request data + * @config: timestamping configuration structure * * Get the hwtstamp_config settings to return to the user. Rather than attempt * to deconstruct the settings from the registers, just return a shadow copy * of the last known settings. **/ -int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) +int igc_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { struct igc_adapter *adapter = netdev_priv(netdev); - struct hwtstamp_config *config = &adapter->tstamp_config; - return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? - -EFAULT : 0; + *config = adapter->tstamp_config; + + return 0; } /* The two conditions below must be met for cross timestamping via diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index c6772cd2d802..05b36ac3ac29 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -429,6 +429,10 @@ enum ixgbe_ring_f_enum { #define IXGBE_BAD_L2A_QUEUE 3 #define IXGBE_MAX_MACVLANS 63 +#define IXGBE_MAX_TX_QUEUES 128 +#define IXGBE_MAX_TX_DESCRIPTORS 40 +#define IXGBE_MAX_TX_VF_HANGS 4 + DECLARE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key); struct ixgbe_ring_feature { @@ -785,7 +789,7 @@ struct ixgbe_adapter { struct ptp_clock_info ptp_caps; struct work_struct ptp_tx_work; struct sk_buff *ptp_tx_skb; - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; unsigned long ptp_tx_start; unsigned long last_overflow_check; unsigned long last_rx_ptp_check; @@ -810,6 +814,7 @@ struct ixgbe_adapter { u32 timer_event_accumulator; u32 vferr_refcount; struct ixgbe_mac_addr *mac_table; + u8 tx_hang_count[IXGBE_MAX_TX_QUEUES]; struct kobject *info_kobj; u16 lse_mask; #ifdef CONFIG_IXGBE_HWMON @@ -1080,8 +1085,11 @@ static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring, rx_ring->last_rx_timestamp = jiffies; } -int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); -int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); +int ixgbe_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config); +int ixgbe_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter); void ixgbe_ptp_reset(struct ixgbe_adapter *adapter); void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 444da982593f..406c15f58034 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -45,7 +45,7 @@ static void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw) goto out; /* - * if capababilities version is type 1 we can write the + * if capabilities version is type 1 we can write the * timeout of 10ms to 250ms through the GCR register */ if (!(gcr & IXGBE_GCR_CAP_VER2)) { @@ -751,7 +751,7 @@ mac_reset_top: /* * Store the original AUTOC value if it has not been * stored off yet. Otherwise restore the stored original - * AUTOC value since the reset operation sets back to deaults. + * AUTOC value since the reset operation sets back to defaults. */ autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); if (hw->mac.orig_link_settings_stored == false) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 5784d5d1896e..4ff19426ab74 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -244,7 +244,7 @@ int ixgbe_setup_fc_generic(struct ixgbe_hw *hw) */ if (hw->phy.media_type == ixgbe_media_type_backplane) { /* Need the SW/FW semaphore around AUTOC writes if 82599 and - * LESM is on, likewise reset_pipeline requries the lock as + * LESM is on, likewise reset_pipeline requires the lock as * it also writes AUTOC. */ ret_val = hw->mac.ops.prot_autoc_write(hw, reg_bp, locked); @@ -301,7 +301,7 @@ int ixgbe_start_hw_generic(struct ixgbe_hw *hw) return ret_val; } - /* Cashe bit indicating need for crosstalk fix */ + /* Cache bit indicating need for crosstalk fix */ switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X550EM_x: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 71ea25de1bac..87b03c1992a8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -3965,6 +3965,10 @@ static const struct ixgbe_mac_operations mac_ops_e610 = { .prot_autoc_write = prot_autoc_write_generic, .setup_fc = ixgbe_setup_fc_e610, .fc_autoneg = ixgbe_fc_autoneg_e610, + .enable_mdd = ixgbe_enable_mdd_x550, + .disable_mdd = ixgbe_disable_mdd_x550, + .restore_mdd_vf = ixgbe_restore_mdd_vf_x550, + .handle_mdd = ixgbe_handle_mdd_x550, }; static const struct ixgbe_phy_operations phy_ops_e610 = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 7dcf6ecd157b..011fda9c6193 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -744,7 +744,7 @@ void ixgbe_free_fcoe_ddp_resources(struct ixgbe_adapter *adapter) * ixgbe_setup_fcoe_ddp_resources - setup all fcoe ddp context resources * @adapter: ixgbe adapter * - * Sets up ddp context resouces + * Sets up ddp context resources * * Returns : 0 indicates success or -EINVAL on failure */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 336d47ffb95a..170a29d162c6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -891,7 +891,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, q_vector->rx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS | IXGBE_ITR_ADAPTIVE_LATENCY; - /* intialize ITR */ + /* initialize ITR */ if (txr_count && !rxr_count) { /* tx only vector */ if (adapter->tx_itr_setting == 1) @@ -1293,7 +1293,8 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; /* set bits to identify this as an advanced context descriptor */ - type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; + type_tucmd |= IXGBE_TXD_CMD_DEXT | + FIELD_PREP(IXGBE_ADVTXD_DTYP_MASK, IXGBE_ADVTXD_DTYP_CTXT); context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); context_desc->fceof_saidx = cpu_to_le32(fceof_saidx); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6eccfba51fac..6122a0abb41f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9,6 +9,7 @@ #include <linux/string.h> #include <linux/in.h> #include <linux/interrupt.h> +#include <linux/iopoll.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/sctp.h> @@ -1040,6 +1041,48 @@ static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring) return ((head <= tail) ? tail : tail + ring->count) - head; } +/** + * ixgbe_get_vf_idx - provide VF index number based on queue index + * @adapter: pointer to the adapter struct + * @queue: Tx queue identifier + * @vf: output VF index + * + * Provide VF index number associated to the input queue. + * + * Returns: 0 if VF provided or error number. + */ +static int ixgbe_get_vf_idx(struct ixgbe_adapter *adapter, u16 queue, u16 *vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + u8 queue_count; + u32 reg; + + if (queue >= adapter->num_tx_queues) + return -EINVAL; + + /* Determine number of queues by checking + * number of virtual functions + */ + reg = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); + switch (reg & IXGBE_GCR_EXT_VT_MODE_MASK) { + case IXGBE_GCR_EXT_VT_MODE_64: + queue_count = IXGBE_64VFS_QUEUES; + break; + case IXGBE_GCR_EXT_VT_MODE_32: + queue_count = IXGBE_32VFS_QUEUES; + break; + case IXGBE_GCR_EXT_VT_MODE_16: + queue_count = IXGBE_16VFS_QUEUES; + break; + default: + return -EINVAL; + } + + *vf = queue / queue_count; + + return 0; +} + static bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring) { u32 tx_done = ixgbe_get_tx_completed(tx_ring); @@ -1159,6 +1202,150 @@ void ixgbe_update_rx_ring_stats(struct ixgbe_ring *rx_ring, } /** + * ixgbe_pf_handle_tx_hang - handle Tx hang on PF + * @tx_ring: tx ring number + * @next: next ring + * + * Prints a message containing details about the tx hang. + */ +static void ixgbe_pf_handle_tx_hang(struct ixgbe_ring *tx_ring, + unsigned int next) +{ + struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev); + struct ixgbe_hw *hw = &adapter->hw; + + e_err(drv, "Detected Tx Unit Hang%s\n" + " Tx Queue <%d>\n" + " TDH, TDT <%x>, <%x>\n" + " next_to_use <%x>\n" + " next_to_clean <%x>\n" + "tx_buffer_info[next_to_clean]\n" + " time_stamp <%lx>\n" + " jiffies <%lx>\n", + ring_is_xdp(tx_ring) ? " (XDP)" : "", + tx_ring->queue_index, + IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)), + IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)), + tx_ring->next_to_use, next, + tx_ring->tx_buffer_info[next].time_stamp, jiffies); + + if (!ring_is_xdp(tx_ring)) + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); +} + +/** + * ixgbe_vf_handle_tx_hang - handle Tx hang on VF + * @adapter: structure containing ring specific data + * @vf: VF index + * + * Print a message containing details about malicious driver detection. + * Set malicious VF link down if the detection happened several times. + */ +static void ixgbe_vf_handle_tx_hang(struct ixgbe_adapter *adapter, u16 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + + if (adapter->hw.mac.type != ixgbe_mac_e610) + return; + + e_warn(drv, + "Malicious Driver Detection tx hang detected on PF %d VF %d MAC: %pM", + hw->bus.func, vf, adapter->vfinfo[vf].vf_mac_addresses); + + adapter->tx_hang_count[vf]++; + if (adapter->tx_hang_count[vf] == IXGBE_MAX_TX_VF_HANGS) { + ixgbe_set_vf_link_state(adapter, vf, + IFLA_VF_LINK_STATE_DISABLE); + adapter->tx_hang_count[vf] = 0; + } +} + +static u32 ixgbe_poll_tx_icache(struct ixgbe_hw *hw, u16 queue, u16 idx) +{ + IXGBE_WRITE_REG(hw, IXGBE_TXDESCIC, queue * idx); + return IXGBE_READ_REG(hw, IXGBE_TXDESCIC); +} + +/** + * ixgbe_check_illegal_queue - search for queue with illegal packet + * @adapter: structure containing ring specific data + * @queue: queue index + * + * Check if tx descriptor connected with input queue + * contains illegal packet. + * + * Returns: true if queue contain illegal packet. + */ +static bool ixgbe_check_illegal_queue(struct ixgbe_adapter *adapter, + u16 queue) +{ + u32 hdr_len_reg, mss_len_reg, type_reg; + struct ixgbe_hw *hw = &adapter->hw; + u32 mss_len, header_len, reg; + + for (u16 i = 0; i < IXGBE_MAX_TX_DESCRIPTORS; i++) { + /* HW will clear bit IXGBE_TXDESCIC_READY when address + * is written to address field. HW will set this bit + * when iCache read is done, and data is ready at TIC_DWx. + * Set descriptor address. + */ + read_poll_timeout(ixgbe_poll_tx_icache, reg, + !(reg & IXGBE_TXDESCIC_READY), 0, 0, false, + hw, queue, i); + + /* read tx descriptor access registers */ + hdr_len_reg = IXGBE_READ_REG(hw, IXGBE_TIC_DW2(IXGBE_VLAN_MACIP_LENS_REG)); + type_reg = IXGBE_READ_REG(hw, IXGBE_TIC_DW2(IXGBE_TYPE_TUCMD_MLHL)); + mss_len_reg = IXGBE_READ_REG(hw, IXGBE_TIC_DW2(IXGBE_MSS_L4LEN_IDX)); + + /* check if Advanced Context Descriptor */ + if (FIELD_GET(IXGBE_ADVTXD_DTYP_MASK, type_reg) != + IXGBE_ADVTXD_DTYP_CTXT) + continue; + + /* check for illegal MSS and Header length */ + mss_len = FIELD_GET(IXGBE_ADVTXD_MSS_MASK, mss_len_reg); + header_len = FIELD_GET(IXGBE_ADVTXD_HEADER_LEN_MASK, + hdr_len_reg); + if ((mss_len + header_len) > SZ_16K) { + e_warn(probe, "mss len + header len too long\n"); + return true; + } + } + + return false; +} + +/** + * ixgbe_handle_mdd_event - handle mdd event + * @adapter: structure containing ring specific data + * @tx_ring: tx descriptor ring to handle + * + * Reset VF driver if malicious vf detected or + * illegal packet in an any queue detected. + */ +static void ixgbe_handle_mdd_event(struct ixgbe_adapter *adapter, + struct ixgbe_ring *tx_ring) +{ + u16 vf, q; + + if (adapter->vfinfo && ixgbe_check_mdd_event(adapter)) { + /* vf mdd info and malicious vf detected */ + if (!ixgbe_get_vf_idx(adapter, tx_ring->queue_index, &vf)) + ixgbe_vf_handle_tx_hang(adapter, vf); + } else { + /* malicious vf not detected */ + for (q = 0; q < IXGBE_MAX_TX_QUEUES; q++) { + if (ixgbe_check_illegal_queue(adapter, q) && + !ixgbe_get_vf_idx(adapter, q, &vf)) + /* illegal queue detected */ + ixgbe_vf_handle_tx_hang(adapter, vf); + } + } +} + +/** * ixgbe_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: structure containing interrupt and ring information * @tx_ring: tx ring to clean @@ -1265,26 +1452,10 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, adapter->tx_ipsec += total_ipsec; if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { - /* schedule immediate reset if we believe we hung */ - struct ixgbe_hw *hw = &adapter->hw; - e_err(drv, "Detected Tx Unit Hang %s\n" - " Tx Queue <%d>\n" - " TDH, TDT <%x>, <%x>\n" - " next_to_use <%x>\n" - " next_to_clean <%x>\n" - "tx_buffer_info[next_to_clean]\n" - " time_stamp <%lx>\n" - " jiffies <%lx>\n", - ring_is_xdp(tx_ring) ? "(XDP)" : "", - tx_ring->queue_index, - IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)), - IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)), - tx_ring->next_to_use, i, - tx_ring->tx_buffer_info[i].time_stamp, jiffies); - - if (!ring_is_xdp(tx_ring)) - netif_stop_subqueue(tx_ring->netdev, - tx_ring->queue_index); + if (adapter->hw.mac.type == ixgbe_mac_e610) + ixgbe_handle_mdd_event(adapter, tx_ring); + + ixgbe_pf_handle_tx_hang(tx_ring, i); e_info(probe, "tx hang %d detected on queue %d, resetting adapter\n", @@ -2200,7 +2371,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, struct sk_buff *skb; /* Prefetch first cache line of first page. If xdp->data_meta - * is unused, this points extactly as xdp->data, otherwise we + * is unused, this points exactly as xdp->data, otherwise we * likely have a consumer accessing first few bytes of meta * data, and then actual data. */ @@ -2323,7 +2494,7 @@ static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring, * This function provides a "bounce buffer" approach to Rx interrupt * processing. The advantage to this is that on systems that have * expensive overhead for IOMMU access this provides a means of avoiding - * it by maintaining the mapping of the page to the syste. + * it by maintaining the mapping of the page to the system. * * Returns amount of work completed **/ @@ -3933,8 +4104,12 @@ void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) #endif { - int i; bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; + struct ixgbe_hw *hw = &adapter->hw; + int i; + + if (hw->mac.ops.disable_mdd) + hw->mac.ops.disable_mdd(hw); if (adapter->ixgbe_ieee_pfc) pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); @@ -3956,6 +4131,9 @@ static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) ixgbe_disable_rx_drop(adapter, adapter->rx_ring[i]); } + + if (hw->mac.ops.enable_mdd) + hw->mac.ops.enable_mdd(hw); } #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 @@ -4885,7 +5063,7 @@ static void ixgbe_scrub_vfta(struct ixgbe_adapter *adapter, u32 vfta_offset) /* pull VLAN ID from VLVF */ vid = vlvf & VLAN_VID_MASK; - /* only concern outselves with a certain range */ + /* only concern ourselves with a certain range */ if (vid < vid_start || vid >= vid_end) continue; @@ -7964,6 +8142,9 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) netif_carrier_on(netdev); ixgbe_check_vf_rate_limit(adapter); + if (adapter->num_vfs && hw->mac.ops.enable_mdd) + hw->mac.ops.enable_mdd(hw); + /* enable transmits */ netif_tx_wake_all_queues(adapter->netdev); @@ -9441,10 +9622,6 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); switch (cmd) { - case SIOCSHWTSTAMP: - return ixgbe_ptp_set_ts_config(adapter, req); - case SIOCGHWTSTAMP: - return ixgbe_ptp_get_ts_config(adapter, req); case SIOCGMIIPHY: if (!adapter->hw.phy.ops.read_reg) return -EOPNOTSUPP; @@ -10908,6 +11085,8 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_bpf = ixgbe_xdp, .ndo_xdp_xmit = ixgbe_xdp_xmit, .ndo_xsk_wakeup = ixgbe_xsk_wakeup, + .ndo_hwtstamp_get = ixgbe_ptp_hwtstamp_get, + .ndo_hwtstamp_set = ixgbe_ptp_hwtstamp_set, }; static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter, @@ -11110,7 +11289,7 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) * ixgbe_enumerate_functions - Get the number of ports this device has * @adapter: adapter structure * - * This function enumerates the phsyical functions co-located on a single slot, + * This function enumerates the physical functions co-located on a single slot, * in order to determine how many ports a device has. This is most useful in * determining the required GT/s of PCIe bandwidth necessary for optimal * performance. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index bf65e82b4c61..4af149b63a39 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -34,7 +34,7 @@ #define IXGBE_VT_MSGTYPE_CTS 0x20000000 /* Indicates that VF is still clear to send requests */ #define IXGBE_VT_MSGINFO_SHIFT 16 -/* bits 23:16 are used for exra info for certain messages */ +/* bits 23:16 are used for extra info for certain messages */ #define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT) /* definitions to support mailbox API version negotiation */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 2d54828bdfbb..2449e4cf2679 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -1323,7 +1323,7 @@ int ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed, * @hw: pointer to hardware structure * * Restart autonegotiation and PHY and waits for completion. - * This function always returns success, this is nessary since + * This function always returns success, this is necessary since * it is called via a function pointer that could call other * functions that could return an error. **/ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index eef25e11d938..40be99def2ee 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -936,20 +936,22 @@ void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector, } /** - * ixgbe_ptp_get_ts_config - get current hardware timestamping configuration - * @adapter: pointer to adapter structure - * @ifr: ioctl data + * ixgbe_ptp_hwtstamp_get - get current hardware timestamping configuration + * @netdev: pointer to net device structure + * @config: timestamping configuration structure * * This function returns the current timestamping settings. Rather than * attempt to deconstruct registers to fill in the values, simply keep a copy * of the old settings around, and return a copy when requested. */ -int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) +int ixgbe_ptp_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config *config = &adapter->tstamp_config; + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); - return copy_to_user(ifr->ifr_data, config, - sizeof(*config)) ? -EFAULT : 0; + *config = adapter->tstamp_config; + + return 0; } /** @@ -978,7 +980,7 @@ int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) * mode, if required to support the specifically requested mode. */ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { struct ixgbe_hw *hw = &adapter->hw; u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED; @@ -1129,31 +1131,29 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, } /** - * ixgbe_ptp_set_ts_config - user entry point for timestamp mode - * @adapter: pointer to adapter struct - * @ifr: ioctl data + * ixgbe_ptp_hwtstamp_set - user entry point for timestamp mode + * @netdev: pointer to net device structure + * @config: timestamping configuration structure + * @extack: netlink extended ack structure for error reporting * * Set hardware to requested mode. If unsupported, return an error with no * changes. Otherwise, store the mode for future reference. */ -int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) +int ixgbe_ptp_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; + struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); int err; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = ixgbe_ptp_set_timestamp_mode(adapter, &config); + err = ixgbe_ptp_set_timestamp_mode(adapter, config); if (err) return err; /* save these settings for future reference */ - memcpy(&adapter->tstamp_config, &config, - sizeof(adapter->tstamp_config)); + adapter->tstamp_config = *config; - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; + return 0; } static void ixgbe_ptp_link_speed_adjust(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 0dbbd2befd4d..32ac1e020d91 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -207,6 +207,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { unsigned int num_vfs = adapter->num_vfs, vf; + struct ixgbe_hw *hw = &adapter->hw; unsigned long flags; int rss; @@ -237,6 +238,9 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) return 0; + if (hw->mac.ops.disable_mdd) + hw->mac.ops.disable_mdd(hw); + #ifdef CONFIG_PCI_IOV /* * If our VFs are assigned we cannot shut down SR-IOV @@ -702,7 +706,7 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) u32 reg_val; u32 queue; - /* remove VLAN filters beloning to this VF */ + /* remove VLAN filters belonging to this VF */ ixgbe_clear_vf_vlans(adapter, vf); /* add back PF assigned VLAN or VLAN 0 */ @@ -1353,12 +1357,59 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf) ixgbe_write_mbx(hw, &msg, 1, vf); } +/** + * ixgbe_check_mdd_event - check for MDD event on all VFs + * @adapter: pointer to ixgbe adapter + * + * Return: true if there is a VF on which MDD event occurred, false otherwise. + */ +bool ixgbe_check_mdd_event(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + DECLARE_BITMAP(vf_bitmap, 64); + bool ret = false; + int i; + + if (!hw->mac.ops.handle_mdd) + return false; + + /* Did we have a malicious event */ + bitmap_zero(vf_bitmap, 64); + hw->mac.ops.handle_mdd(hw, vf_bitmap); + + /* Log any blocked queues and release lock */ + for_each_set_bit(i, vf_bitmap, 64) { + dev_warn(&adapter->pdev->dev, + "Malicious event on VF %d tx:%x rx:%x\n", i, + IXGBE_READ_REG(hw, IXGBE_LVMMC_TX), + IXGBE_READ_REG(hw, IXGBE_LVMMC_RX)); + + if (hw->mac.ops.restore_mdd_vf) { + u32 ping; + + hw->mac.ops.restore_mdd_vf(hw, i); + + /* get the VF to rebuild its queues */ + adapter->vfinfo[i].clear_to_send = 0; + ping = IXGBE_PF_CONTROL_MSG | + IXGBE_VT_MSGTYPE_CTS; + ixgbe_write_mbx(hw, &ping, 1, i); + } + + ret = true; + } + + return ret; +} + void ixgbe_msg_task(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; unsigned long flags; u32 vf; + ixgbe_check_mdd_event(adapter); + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->num_vfs; vf++) { /* process any reset requests */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 0690ecb8dfa3..bc4cab976bf9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -15,6 +15,7 @@ #ifdef CONFIG_PCI_IOV void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); #endif +bool ixgbe_check_mdd_event(struct ixgbe_adapter *adapter); void ixgbe_msg_task(struct ixgbe_adapter *adapter); int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 892fa6c1f879..36577091cd9e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -238,7 +238,7 @@ struct ixgbe_thermal_sensor_data { #define NVM_VER_INVALID 0xFFFF #define NVM_ETK_VALID 0x8000 #define NVM_INVALID_PTR 0xFFFF -#define NVM_VER_SIZE 32 /* version sting size */ +#define NVM_VER_SIZE 32 /* version string size */ struct ixgbe_nvm_version { u32 etk_id; @@ -402,6 +402,8 @@ struct ixgbe_nvm_version { #define IXGBE_MRCTL(_i) (0x0F600 + ((_i) * 4)) #define IXGBE_VMRVLAN(_i) (0x0F610 + ((_i) * 4)) #define IXGBE_VMRVM(_i) (0x0F630 + ((_i) * 4)) +#define IXGBE_LVMMC_RX 0x2FA8 +#define IXGBE_LVMMC_TX 0x8108 #define IXGBE_WQBR_RX(_i) (0x2FB0 + ((_i) * 4)) /* 4 total */ #define IXGBE_WQBR_TX(_i) (0x8130 + ((_i) * 4)) /* 4 total */ #define IXGBE_L34T_IMIR(_i) (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/ @@ -1042,6 +1044,7 @@ struct ixgbe_nvm_version { #define IXGBE_GCR_EXT_VT_MODE_16 0x00000001 #define IXGBE_GCR_EXT_VT_MODE_32 0x00000002 #define IXGBE_GCR_EXT_VT_MODE_64 0x00000003 +#define IXGBE_GCR_EXT_VT_MODE_MASK 0x00000003 #define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \ IXGBE_GCR_EXT_VT_MODE_64) @@ -2021,7 +2024,7 @@ enum { /* EEPROM Addressing bits based on type (0-small, 1-large) */ #define IXGBE_EEC_ADDR_SIZE 0x00000400 #define IXGBE_EEC_SIZE 0x00007800 /* EEPROM Size */ -#define IXGBE_EERD_MAX_ADDR 0x00003FFF /* EERD alows 14 bits for addr. */ +#define IXGBE_EERD_MAX_ADDR 0x00003FFF /* EERD allows 14 bits for addr. */ #define IXGBE_EEC_SIZE_SHIFT 11 #define IXGBE_EEPROM_WORD_SIZE_SHIFT 6 @@ -2746,6 +2749,28 @@ enum ixgbe_fdir_pballoc_type { #define FW_PHY_INFO_ID_HI_MASK 0xFFFF0000u #define FW_PHY_INFO_ID_LO_MASK 0x0000FFFFu +/* There are only 3 options for VFs creation on this device: + * 16 VFs pool with 8 queues each + * 32 VFs pool with 4 queues each + * 64 VFs pool with 2 queues each + * + * That means reading some VF registers that map VF to queue depending on + * chosen option. Define values that help dealing with each scenario. + */ +/* Number of queues based on VFs pool */ +#define IXGBE_16VFS_QUEUES 8 +#define IXGBE_32VFS_QUEUES 4 +#define IXGBE_64VFS_QUEUES 2 +/* Mask for getting queues bits based on VFs pool */ +#define IXGBE_16VFS_BITMASK GENMASK(IXGBE_16VFS_QUEUES - 1, 0) +#define IXGBE_32VFS_BITMASK GENMASK(IXGBE_32VFS_QUEUES - 1, 0) +#define IXGBE_64VFS_BITMASK GENMASK(IXGBE_64VFS_QUEUES - 1, 0) +/* Convert queue index to register number. + * We have 4 registers with 32 queues in each. + */ +#define IXGBE_QUEUES_PER_REG 32 +#define IXGBE_QUEUES_REG_AMOUNT 4 + /* Host Interface Command Structures */ struct ixgbe_hic_hdr { u8 cmd; @@ -2911,6 +2936,13 @@ struct ixgbe_adv_tx_context_desc { __le32 mss_l4len_idx; }; +enum { + IXGBE_VLAN_MACIP_LENS_REG = 0, + IXGBE_FCEOF_SAIDX_REG = 1, + IXGBE_TYPE_TUCMD_MLHL = 2, + IXGBE_MSS_L4LEN_IDX = 3, +}; + /* Adv Transmit Descriptor Config Masks */ #define IXGBE_ADVTXD_DTALEN_MASK 0x0000FFFF /* Data buf length(bytes) */ #define IXGBE_ADVTXD_MAC_LINKSEC 0x00040000 /* Insert LinkSec */ @@ -2918,7 +2950,7 @@ struct ixgbe_adv_tx_context_desc { #define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK 0x000003FF /* IPSec SA index */ #define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK 0x000001FF /* IPSec ESP length */ #define IXGBE_ADVTXD_DTYP_MASK 0x00F00000 /* DTYP mask */ -#define IXGBE_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Desc */ +#define IXGBE_ADVTXD_DTYP_CTXT 0x2 /* Advanced Context Desc */ #define IXGBE_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ #define IXGBE_ADVTXD_DCMD_EOP IXGBE_TXD_CMD_EOP /* End of Packet */ #define IXGBE_ADVTXD_DCMD_IFCS IXGBE_TXD_CMD_IFCS /* Insert FCS */ @@ -2967,6 +2999,8 @@ struct ixgbe_adv_tx_context_desc { #define IXGBE_ADVTXD_FCOEF_EOF_MASK (3u << 10) /* FC EOF index */ #define IXGBE_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ #define IXGBE_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ +#define IXGBE_ADVTXD_MSS_MASK GENMASK(31, IXGBE_ADVTXD_MSS_SHIFT) +#define IXGBE_ADVTXD_HEADER_LEN_MASK GENMASK(8, 0) /* Autonegotiation advertised speeds */ typedef u32 ixgbe_autoneg_advertised; @@ -3539,6 +3573,12 @@ struct ixgbe_mac_operations { int (*dmac_config_tcs)(struct ixgbe_hw *hw); int (*read_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32 *); int (*write_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32); + + /* MDD events */ + void (*enable_mdd)(struct ixgbe_hw *hw); + void (*disable_mdd)(struct ixgbe_hw *hw); + void (*restore_mdd_vf)(struct ixgbe_hw *hw, u32 vf); + void (*handle_mdd)(struct ixgbe_hw *hw, unsigned long *vf_bitmap); }; struct ixgbe_phy_operations { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index f1ab95aa8c83..c2353aed0120 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -47,7 +47,7 @@ int ixgbe_get_invariants_X540(struct ixgbe_hw *hw) } /** - * ixgbe_setup_mac_link_X540 - Set the auto advertised capabilitires + * ixgbe_setup_mac_link_X540 - Set the auto advertised capabilities * @hw: pointer to hardware structure * @speed: new link speed * @autoneg_wait_to_complete: true when waiting for completion is needed diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 7461367a1868..bfa647086c70 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3316,7 +3316,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) return media_type; } -/** ixgbe_init_ext_t_x550em - Start (unstall) the external Base T PHY. +/** ixgbe_init_ext_t_x550em - Start (uninstall) the external Base T PHY. ** @hw: pointer to hardware structure **/ static int ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) @@ -3800,6 +3800,122 @@ static int ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, return status; } +static void ixgbe_set_mdd_x550(struct ixgbe_hw *hw, bool ena) +{ + u32 reg_dma, reg_rdr; + + reg_dma = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); + reg_rdr = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + + if (ena) { + reg_dma |= (IXGBE_DMATXCTL_MDP_EN | IXGBE_DMATXCTL_MBINTEN); + reg_rdr |= (IXGBE_RDRXCTL_MDP_EN | IXGBE_RDRXCTL_MBINTEN); + } else { + reg_dma &= ~(IXGBE_DMATXCTL_MDP_EN | IXGBE_DMATXCTL_MBINTEN); + reg_rdr &= ~(IXGBE_RDRXCTL_MDP_EN | IXGBE_RDRXCTL_MBINTEN); + } + + IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, reg_dma); + IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_rdr); +} + +/** + * ixgbe_enable_mdd_x550 - enable malicious driver detection + * @hw: pointer to hardware structure + */ +void ixgbe_enable_mdd_x550(struct ixgbe_hw *hw) +{ + ixgbe_set_mdd_x550(hw, true); +} + +/** + * ixgbe_disable_mdd_x550 - disable malicious driver detection + * @hw: pointer to hardware structure + */ +void ixgbe_disable_mdd_x550(struct ixgbe_hw *hw) +{ + ixgbe_set_mdd_x550(hw, false); +} + +/** + * ixgbe_restore_mdd_vf_x550 - restore VF that was disabled during MDD event + * @hw: pointer to hardware structure + * @vf: vf index + */ +void ixgbe_restore_mdd_vf_x550(struct ixgbe_hw *hw, u32 vf) +{ + u32 idx, reg, val, num_qs, start_q, bitmask; + + /* Map VF to queues */ + reg = IXGBE_READ_REG(hw, IXGBE_MRQC); + switch (reg & IXGBE_MRQC_MRQE_MASK) { + case IXGBE_MRQC_VMDQRT8TCEN: + num_qs = IXGBE_16VFS_QUEUES; + bitmask = IXGBE_16VFS_BITMASK; + break; + case IXGBE_MRQC_VMDQRSS32EN: + case IXGBE_MRQC_VMDQRT4TCEN: + num_qs = IXGBE_32VFS_QUEUES; + bitmask = IXGBE_32VFS_BITMASK; + break; + default: + num_qs = IXGBE_64VFS_QUEUES; + bitmask = IXGBE_64VFS_BITMASK; + break; + } + start_q = vf * num_qs; + + /* Release vf's queues by clearing WQBR_TX and WQBR_RX (RW1C) */ + idx = start_q / IXGBE_QUEUES_PER_REG; + val = bitmask << (start_q % IXGBE_QUEUES_PER_REG); + IXGBE_WRITE_REG(hw, IXGBE_WQBR_TX(idx), val); + IXGBE_WRITE_REG(hw, IXGBE_WQBR_RX(idx), val); +} + +/** + * ixgbe_handle_mdd_x550 - handle malicious driver detection event + * @hw: pointer to hardware structure + * @vf_bitmap: output vf bitmap of malicious vfs + */ +void ixgbe_handle_mdd_x550(struct ixgbe_hw *hw, unsigned long *vf_bitmap) +{ + u32 i, j, reg, q, div, vf; + unsigned long wqbr; + + /* figure out pool size for mapping to vf's */ + reg = IXGBE_READ_REG(hw, IXGBE_MRQC); + switch (reg & IXGBE_MRQC_MRQE_MASK) { + case IXGBE_MRQC_VMDQRT8TCEN: + div = IXGBE_16VFS_QUEUES; + break; + case IXGBE_MRQC_VMDQRSS32EN: + case IXGBE_MRQC_VMDQRT4TCEN: + div = IXGBE_32VFS_QUEUES; + break; + default: + div = IXGBE_64VFS_QUEUES; + break; + } + + /* Read WQBR_TX and WQBR_RX and check for malicious queues */ + for (i = 0; i < IXGBE_QUEUES_REG_AMOUNT; i++) { + wqbr = IXGBE_READ_REG(hw, IXGBE_WQBR_TX(i)) | + IXGBE_READ_REG(hw, IXGBE_WQBR_RX(i)); + if (!wqbr) + continue; + + /* Get malicious queue */ + for_each_set_bit(j, (unsigned long *)&wqbr, + IXGBE_QUEUES_PER_REG) { + /* Get queue from bitmask */ + q = j + (i * IXGBE_QUEUES_PER_REG); + /* Map queue to vf */ + vf = q / div; + set_bit(vf, vf_bitmap); + } + } +} + #define X550_COMMON_MAC \ .init_hw = &ixgbe_init_hw_generic, \ .start_hw = &ixgbe_start_hw_X540, \ @@ -3863,6 +3979,10 @@ static const struct ixgbe_mac_operations mac_ops_X550 = { .prot_autoc_write = prot_autoc_write_generic, .setup_fc = ixgbe_setup_fc_generic, .fc_autoneg = ixgbe_fc_autoneg, + .enable_mdd = ixgbe_enable_mdd_x550, + .disable_mdd = ixgbe_disable_mdd_x550, + .restore_mdd_vf = ixgbe_restore_mdd_vf_x550, + .handle_mdd = ixgbe_handle_mdd_x550, }; static const struct ixgbe_mac_operations mac_ops_X550EM_x = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h index 3e4092f8da3e..2a11147fb1bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h @@ -17,4 +17,9 @@ void ixgbe_set_source_address_pruning_x550(struct ixgbe_hw *hw, void ixgbe_set_ethertype_anti_spoofing_x550(struct ixgbe_hw *hw, bool enable, int vf); +void ixgbe_enable_mdd_x550(struct ixgbe_hw *hw); +void ixgbe_disable_mdd_x550(struct ixgbe_hw *hw); +void ixgbe_restore_mdd_vf_x550(struct ixgbe_hw *hw, u32 vf); +void ixgbe_handle_mdd_x550(struct ixgbe_hw *hw, unsigned long *vf_bitmap); + #endif /* _IXGBE_X550_H_ */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 9a10396e7504..f674729124e6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -318,21 +318,20 @@ fail: return err; } -int otx2_set_rss_table(struct otx2_nic *pfvf, int ctx_id) +int otx2_set_rss_table(struct otx2_nic *pfvf, int ctx_id, const u32 *ind_tbl) { struct otx2_rss_info *rss = &pfvf->hw.rss_info; const int index = rss->rss_size * ctx_id; struct mbox *mbox = &pfvf->mbox; - struct otx2_rss_ctx *rss_ctx; struct nix_aq_enq_req *aq; int idx, err; mutex_lock(&mbox->lock); - rss_ctx = rss->rss_ctx[ctx_id]; + ind_tbl = ind_tbl ?: rss->ind_tbl; /* Get memory to put this msg */ for (idx = 0; idx < rss->rss_size; idx++) { /* Ignore the queue if AF_XDP zero copy is enabled */ - if (test_bit(rss_ctx->ind_tbl[idx], pfvf->af_xdp_zc_qidx)) + if (test_bit(ind_tbl[idx], pfvf->af_xdp_zc_qidx)) continue; aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); @@ -352,7 +351,7 @@ int otx2_set_rss_table(struct otx2_nic *pfvf, int ctx_id) } } - aq->rss.rq = rss_ctx->ind_tbl[idx]; + aq->rss.rq = ind_tbl[idx]; /* Fill AQ info */ aq->qidx = index + idx; @@ -390,30 +389,22 @@ void otx2_set_rss_key(struct otx2_nic *pfvf) int otx2_rss_init(struct otx2_nic *pfvf) { struct otx2_rss_info *rss = &pfvf->hw.rss_info; - struct otx2_rss_ctx *rss_ctx; int idx, ret = 0; - rss->rss_size = sizeof(*rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]); + rss->rss_size = sizeof(*rss->ind_tbl); /* Init RSS key if it is not setup already */ if (!rss->enable) netdev_rss_key_fill(rss->key, sizeof(rss->key)); otx2_set_rss_key(pfvf); - if (!netif_is_rxfh_configured(pfvf->netdev)) { - /* Set RSS group 0 as default indirection table */ - rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP] = kzalloc(rss->rss_size, - GFP_KERNEL); - if (!rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]) - return -ENOMEM; - - rss_ctx = rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]; + if (!netif_is_rxfh_configured(pfvf->netdev)) for (idx = 0; idx < rss->rss_size; idx++) - rss_ctx->ind_tbl[idx] = + rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, pfvf->hw.rx_queues); - } - ret = otx2_set_rss_table(pfvf, DEFAULT_RSS_CONTEXT_GROUP); + + ret = otx2_set_rss_table(pfvf, DEFAULT_RSS_CONTEXT_GROUP, NULL); if (ret) return ret; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 6b59881f78e0..e3765b73c434 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -93,10 +93,6 @@ struct otx2_lmt_info { u64 lmt_addr; u16 lmt_id; }; -/* RSS configuration */ -struct otx2_rss_ctx { - u8 ind_tbl[MAX_RSS_INDIR_TBL_SIZE]; -}; struct otx2_rss_info { u8 enable; @@ -104,7 +100,7 @@ struct otx2_rss_info { u16 rss_size; #define RSS_HASH_KEY_SIZE 44 /* 352 bit key */ u8 key[RSS_HASH_KEY_SIZE]; - struct otx2_rss_ctx *rss_ctx[MAX_RSS_GROUPS]; + u32 ind_tbl[MAX_RSS_INDIR_TBL_SIZE]; }; /* NIX (or NPC) RX errors */ @@ -1067,7 +1063,7 @@ int otx2_set_hw_capabilities(struct otx2_nic *pfvf); int otx2_rss_init(struct otx2_nic *pfvf); int otx2_set_flowkey_cfg(struct otx2_nic *pfvf); void otx2_set_rss_key(struct otx2_nic *pfvf); -int otx2_set_rss_table(struct otx2_nic *pfvf, int ctx_id); +int otx2_set_rss_table(struct otx2_nic *pfvf, int ctx_id, const u32 *ind_tbl); /* Mbox handlers */ void mbox_handler_msix_offset(struct otx2_nic *pfvf, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 20de517dfb09..998c734ff839 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -796,35 +796,75 @@ static u32 otx2_get_rxfh_indir_size(struct net_device *dev) return MAX_RSS_INDIR_TBL_SIZE; } -static int otx2_rss_ctx_delete(struct otx2_nic *pfvf, int ctx_id) +static int otx2_create_rxfh(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { - struct otx2_rss_info *rss = &pfvf->hw.rss_info; + struct otx2_nic *pfvf = netdev_priv(dev); + struct otx2_rss_info *rss; + unsigned int queues; + u32 *ind_tbl; + int idx; + + rss = &pfvf->hw.rss_info; + queues = pfvf->hw.rx_queues; - otx2_rss_ctx_flow_del(pfvf, ctx_id); - kfree(rss->rss_ctx[ctx_id]); - rss->rss_ctx[ctx_id] = NULL; + if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + ctx->hfunc = ETH_RSS_HASH_TOP; + if (!rss->enable) { + netdev_err(dev, "RSS is disabled, cannot change settings\n"); + return -EIO; + } + + ind_tbl = rxfh->indir; + if (!ind_tbl) { + ind_tbl = ethtool_rxfh_context_indir(ctx); + for (idx = 0; idx < rss->rss_size; idx++) + ind_tbl[idx] = ethtool_rxfh_indir_default(idx, queues); + } + + otx2_set_rss_table(pfvf, rxfh->rss_context, ind_tbl); return 0; } -static int otx2_rss_ctx_create(struct otx2_nic *pfvf, - u32 *rss_context) +static int otx2_modify_rxfh(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { - struct otx2_rss_info *rss = &pfvf->hw.rss_info; - u8 ctx; + struct otx2_nic *pfvf = netdev_priv(dev); - for (ctx = 0; ctx < MAX_RSS_GROUPS; ctx++) { - if (!rss->rss_ctx[ctx]) - break; + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (!pfvf->hw.rss_info.enable) { + netdev_err(dev, "RSS is disabled, cannot change settings\n"); + return -EIO; } - if (ctx == MAX_RSS_GROUPS) - return -EINVAL; - rss->rss_ctx[ctx] = kzalloc(sizeof(*rss->rss_ctx[ctx]), GFP_KERNEL); - if (!rss->rss_ctx[ctx]) - return -ENOMEM; - *rss_context = ctx; + if (rxfh->indir) + otx2_set_rss_table(pfvf, rxfh->rss_context, rxfh->indir); + + return 0; +} + +static int otx2_remove_rxfh(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack) +{ + struct otx2_nic *pfvf = netdev_priv(dev); + + if (!pfvf->hw.rss_info.enable) { + netdev_err(dev, "RSS is disabled, cannot change settings\n"); + return -EIO; + } + otx2_rss_ctx_flow_del(pfvf, rss_context); return 0; } @@ -833,23 +873,14 @@ static int otx2_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) { - u32 rss_context = DEFAULT_RSS_CONTEXT_GROUP; struct otx2_nic *pfvf = netdev_priv(dev); - struct otx2_rss_ctx *rss_ctx; struct otx2_rss_info *rss; - int ret, idx; + int idx; if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (rxfh->rss_context) - rss_context = rxfh->rss_context; - - if (rss_context != ETH_RXFH_CONTEXT_ALLOC && - rss_context >= MAX_RSS_GROUPS) - return -EINVAL; - rss = &pfvf->hw.rss_info; if (!rss->enable) { @@ -861,21 +892,12 @@ static int otx2_set_rxfh(struct net_device *dev, memcpy(rss->key, rxfh->key, sizeof(rss->key)); otx2_set_rss_key(pfvf); } - if (rxfh->rss_delete) - return otx2_rss_ctx_delete(pfvf, rss_context); - - if (rss_context == ETH_RXFH_CONTEXT_ALLOC) { - ret = otx2_rss_ctx_create(pfvf, &rss_context); - rxfh->rss_context = rss_context; - if (ret) - return ret; - } + if (rxfh->indir) { - rss_ctx = rss->rss_ctx[rss_context]; for (idx = 0; idx < rss->rss_size; idx++) - rss_ctx->ind_tbl[idx] = rxfh->indir[idx]; + rss->ind_tbl[idx] = rxfh->indir[idx]; } - otx2_set_rss_table(pfvf, rss_context); + otx2_set_rss_table(pfvf, DEFAULT_RSS_CONTEXT_GROUP, NULL); return 0; } @@ -884,9 +906,7 @@ static int otx2_set_rxfh(struct net_device *dev, static int otx2_get_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh) { - u32 rss_context = DEFAULT_RSS_CONTEXT_GROUP; struct otx2_nic *pfvf = netdev_priv(dev); - struct otx2_rss_ctx *rss_ctx; struct otx2_rss_info *rss; u32 *indir = rxfh->indir; int idx, rx_queues; @@ -894,32 +914,21 @@ static int otx2_get_rxfh(struct net_device *dev, rss = &pfvf->hw.rss_info; rxfh->hfunc = ETH_RSS_HASH_TOP; - if (rxfh->rss_context) - rss_context = rxfh->rss_context; - if (!indir) return 0; - if (!rss->enable && rss_context == DEFAULT_RSS_CONTEXT_GROUP) { + if (!rss->enable) { rx_queues = pfvf->hw.rx_queues; for (idx = 0; idx < MAX_RSS_INDIR_TBL_SIZE; idx++) indir[idx] = ethtool_rxfh_indir_default(idx, rx_queues); return 0; } - if (rss_context >= MAX_RSS_GROUPS) - return -ENOENT; - - rss_ctx = rss->rss_ctx[rss_context]; - if (!rss_ctx) - return -ENOENT; - - if (indir) { - for (idx = 0; idx < rss->rss_size; idx++) { - /* Ignore if the rx queue is AF_XDP zero copy enabled */ - if (test_bit(rss_ctx->ind_tbl[idx], pfvf->af_xdp_zc_qidx)) - continue; - indir[idx] = rss_ctx->ind_tbl[idx]; - } + + for (idx = 0; idx < rss->rss_size; idx++) { + /* Ignore if the rx queue is AF_XDP zero copy enabled */ + if (test_bit(rss->ind_tbl[idx], pfvf->af_xdp_zc_qidx)) + continue; + indir[idx] = rss->ind_tbl[idx]; } if (rxfh->key) memcpy(rxfh->key, rss->key, sizeof(rss->key)); @@ -1307,12 +1316,12 @@ static void otx2_get_fec_stats(struct net_device *netdev, } static const struct ethtool_ops otx2_ethtool_ops = { - .cap_rss_ctx_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE, .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | ETHTOOL_RING_USE_CQE_SIZE, + .rxfh_max_num_contexts = MAX_RSS_GROUPS, .get_link = otx2_get_link, .get_drvinfo = otx2_get_drvinfo, .get_strings = otx2_get_strings, @@ -1332,6 +1341,9 @@ static const struct ethtool_ops otx2_ethtool_ops = { .set_rxfh = otx2_set_rxfh, .get_rxfh_fields = otx2_get_rss_hash_opts, .set_rxfh_fields = otx2_set_rss_hash_opts, + .create_rxfh_context = otx2_create_rxfh, + .modify_rxfh_context = otx2_modify_rxfh, + .remove_rxfh_context = otx2_remove_rxfh, .get_msglevel = otx2_get_msglevel, .set_msglevel = otx2_set_msglevel, .get_pauseparam = otx2_get_pauseparam, @@ -1426,12 +1438,12 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev, } static const struct ethtool_ops otx2vf_ethtool_ops = { - .cap_rss_ctx_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE, .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | ETHTOOL_RING_USE_CQE_SIZE, + .rxfh_max_num_contexts = MAX_RSS_GROUPS, .get_link = otx2_get_link, .get_drvinfo = otx2vf_get_drvinfo, .get_strings = otx2vf_get_strings, @@ -1447,6 +1459,9 @@ static const struct ethtool_ops otx2vf_ethtool_ops = { .set_rxfh = otx2_set_rxfh, .get_rxfh_fields = otx2_get_rss_hash_opts, .set_rxfh_fields = otx2_set_rss_hash_opts, + .create_rxfh_context = otx2_create_rxfh, + .modify_rxfh_context = otx2_modify_rxfh, + .remove_rxfh_context = otx2_remove_rxfh, .get_ringparam = otx2_get_ringparam, .set_ringparam = otx2_set_ringparam, .get_coalesce = otx2_get_coalesce, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 4e2d1206e1b0..b23585c5e5c2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2158,7 +2158,6 @@ int otx2_stop(struct net_device *netdev) struct otx2_nic *pf = netdev_priv(netdev); struct otx2_cq_poll *cq_poll = NULL; struct otx2_qset *qset = &pf->qset; - struct otx2_rss_info *rss; int qidx, vec, wrk; /* If the DOWN flag is set resources are already freed */ @@ -2176,10 +2175,7 @@ int otx2_stop(struct net_device *netdev) otx2_rxtx_enable(pf, false); /* Clear RSS enable flag */ - rss = &pf->hw.rss_info; - rss->enable = false; - if (!netif_is_rxfh_configured(netdev)) - kfree(rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]); + pf->hw.rss_info.enable = false; /* Cleanup Queue IRQ */ vec = pci_irq_vector(pf->pdev, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c index b328aae23d73..7d67b4cbaf71 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_xsk.c @@ -132,7 +132,7 @@ int otx2_xsk_pool_enable(struct otx2_nic *pf, struct xsk_buff_pool *pool, u16 qi set_bit(qidx, pf->af_xdp_zc_qidx); otx2_clean_up_rq(pf, qidx); /* Reconfigure RSS table as 'qidx' cannot be part of RSS now */ - otx2_set_rss_table(pf, DEFAULT_RSS_CONTEXT_GROUP); + otx2_set_rss_table(pf, DEFAULT_RSS_CONTEXT_GROUP, NULL); /* Kick start the NAPI context so that receiving will start */ return otx2_xsk_wakeup(pf->netdev, qidx, XDP_WAKEUP_RX); } @@ -153,7 +153,7 @@ int otx2_xsk_pool_disable(struct otx2_nic *pf, u16 qidx) clear_bit(qidx, pf->af_xdp_zc_qidx); xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); /* Reconfigure RSS table as 'qidx' now need to be part of RSS now */ - otx2_set_rss_table(pf, DEFAULT_RSS_CONTEXT_GROUP); + otx2_set_rss_table(pf, DEFAULT_RSS_CONTEXT_GROUP, NULL); return 0; } diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig index 7bfd3f230ff5..2ba361f8ce7d 100644 --- a/drivers/net/ethernet/mediatek/Kconfig +++ b/drivers/net/ethernet/mediatek/Kconfig @@ -17,6 +17,7 @@ config NET_MEDIATEK_SOC select PINCTRL select PHYLINK select DIMLIB + select GENERIC_ALLOCATOR select PAGE_POOL select PAGE_POOL_STATS select PCS_MTK_LYNXI diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f8a907747db4..11ee7e1829bf 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -27,6 +27,7 @@ #include <net/dsa.h> #include <net/dst_metadata.h> #include <net/page_pool/helpers.h> +#include <linux/genalloc.h> #include "mtk_eth_soc.h" #include "mtk_wed.h" @@ -1267,6 +1268,34 @@ static void *mtk_max_lro_buf_alloc(gfp_t gfp_mask) return (void *)data; } +static void *mtk_dma_ring_alloc(struct mtk_eth *eth, size_t size, + dma_addr_t *dma_handle, bool use_sram) +{ + void *dma_ring; + + if (use_sram && eth->sram_pool) { + dma_ring = (void *)gen_pool_alloc(eth->sram_pool, size); + if (!dma_ring) + return dma_ring; + *dma_handle = gen_pool_virt_to_phys(eth->sram_pool, + (unsigned long)dma_ring); + } else { + dma_ring = dma_alloc_coherent(eth->dma_dev, size, dma_handle, + GFP_KERNEL); + } + + return dma_ring; +} + +static void mtk_dma_ring_free(struct mtk_eth *eth, size_t size, void *dma_ring, + dma_addr_t dma_handle, bool in_sram) +{ + if (in_sram && eth->sram_pool) + gen_pool_free(eth->sram_pool, (unsigned long)dma_ring, size); + else + dma_free_coherent(eth->dma_dev, size, dma_ring, dma_handle); +} + /* the qdma core needs scratch memory to be setup */ static int mtk_init_fq_dma(struct mtk_eth *eth) { @@ -1276,13 +1305,8 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) dma_addr_t dma_addr; int i, j, len; - if (MTK_HAS_CAPS(eth->soc->caps, MTK_SRAM)) - eth->scratch_ring = eth->sram_base; - else - eth->scratch_ring = dma_alloc_coherent(eth->dma_dev, - cnt * soc->tx.desc_size, - ð->phy_scratch_ring, - GFP_KERNEL); + eth->scratch_ring = mtk_dma_ring_alloc(eth, cnt * soc->tx.desc_size, + ð->phy_scratch_ring, true); if (unlikely(!eth->scratch_ring)) return -ENOMEM; @@ -2620,14 +2644,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) if (!ring->buf) goto no_tx_mem; - if (MTK_HAS_CAPS(soc->caps, MTK_SRAM)) { - ring->dma = eth->sram_base + soc->tx.fq_dma_size * sz; - ring->phys = eth->phy_scratch_ring + soc->tx.fq_dma_size * (dma_addr_t)sz; - } else { - ring->dma = dma_alloc_coherent(eth->dma_dev, ring_size * sz, - &ring->phys, GFP_KERNEL); - } - + ring->dma = mtk_dma_ring_alloc(eth, ring_size * sz, &ring->phys, true); if (!ring->dma) goto no_tx_mem; @@ -2726,10 +2743,10 @@ static void mtk_tx_clean(struct mtk_eth *eth) kfree(ring->buf); ring->buf = NULL; } - if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && ring->dma) { - dma_free_coherent(eth->dma_dev, - ring->dma_size * soc->tx.desc_size, - ring->dma, ring->phys); + + if (ring->dma) { + mtk_dma_ring_free(eth, ring->dma_size * soc->tx.desc_size, + ring->dma, ring->phys, true); ring->dma = NULL; } @@ -2746,14 +2763,9 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) const struct mtk_reg_map *reg_map = eth->soc->reg_map; const struct mtk_soc_data *soc = eth->soc; struct mtk_rx_ring *ring; - int rx_data_len, rx_dma_size, tx_ring_size; + int rx_data_len, rx_dma_size; int i; - if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) - tx_ring_size = MTK_QDMA_RING_SIZE; - else - tx_ring_size = soc->tx.dma_size; - if (rx_flag == MTK_RX_FLAGS_QDMA) { if (ring_no) return -EINVAL; @@ -2788,20 +2800,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) ring->page_pool = pp; } - if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SRAM) || - rx_flag != MTK_RX_FLAGS_NORMAL) { - ring->dma = dma_alloc_coherent(eth->dma_dev, - rx_dma_size * eth->soc->rx.desc_size, - &ring->phys, GFP_KERNEL); - } else { - struct mtk_tx_ring *tx_ring = ð->tx_ring; - - ring->dma = tx_ring->dma + tx_ring_size * - eth->soc->tx.desc_size * (ring_no + 1); - ring->phys = tx_ring->phys + tx_ring_size * - eth->soc->tx.desc_size * (ring_no + 1); - } - + ring->dma = mtk_dma_ring_alloc(eth, + rx_dma_size * eth->soc->rx.desc_size, + &ring->phys, + rx_flag == MTK_RX_FLAGS_NORMAL); if (!ring->dma) return -ENOMEM; @@ -2916,10 +2918,9 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring, bool in_ ring->data = NULL; } - if (!in_sram && ring->dma) { - dma_free_coherent(eth->dma_dev, - ring->dma_size * eth->soc->rx.desc_size, - ring->dma, ring->phys); + if (ring->dma) { + mtk_dma_ring_free(eth, ring->dma_size * eth->soc->rx.desc_size, + ring->dma, ring->phys, in_sram); ring->dma = NULL; } @@ -3287,15 +3288,16 @@ static void mtk_dma_free(struct mtk_eth *eth) netdev_tx_reset_subqueue(eth->netdev[i], j); } - if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { - dma_free_coherent(eth->dma_dev, - MTK_QDMA_RING_SIZE * soc->tx.desc_size, - eth->scratch_ring, eth->phy_scratch_ring); + if (eth->scratch_ring) { + mtk_dma_ring_free(eth, soc->tx.fq_dma_size * soc->tx.desc_size, + eth->scratch_ring, eth->phy_scratch_ring, + true); eth->scratch_ring = NULL; eth->phy_scratch_ring = 0; } + mtk_tx_clean(eth); - mtk_rx_clean(eth, ð->rx_ring[0], MTK_HAS_CAPS(soc->caps, MTK_SRAM)); + mtk_rx_clean(eth, ð->rx_ring[0], true); mtk_rx_clean(eth, ð->rx_ring_qdma, false); if (eth->hwlro) { @@ -3341,17 +3343,22 @@ static int mtk_get_irqs(struct platform_device *pdev, struct mtk_eth *eth) int i; /* future SoCs beginning with MT7988 should use named IRQs in dts */ - eth->irq[MTK_FE_IRQ_TX] = platform_get_irq_byname(pdev, "fe1"); - eth->irq[MTK_FE_IRQ_RX] = platform_get_irq_byname(pdev, "fe2"); + eth->irq[MTK_FE_IRQ_TX] = platform_get_irq_byname_optional(pdev, "fe1"); + eth->irq[MTK_FE_IRQ_RX] = platform_get_irq_byname_optional(pdev, "fe2"); if (eth->irq[MTK_FE_IRQ_TX] >= 0 && eth->irq[MTK_FE_IRQ_RX] >= 0) return 0; - /* only use legacy mode if platform_get_irq_byname returned -ENXIO */ + /* only use legacy mode if platform_get_irq_byname_optional returned -ENXIO */ if (eth->irq[MTK_FE_IRQ_TX] != -ENXIO) - return eth->irq[MTK_FE_IRQ_TX]; + return dev_err_probe(&pdev->dev, eth->irq[MTK_FE_IRQ_TX], + "Error requesting FE TX IRQ\n"); if (eth->irq[MTK_FE_IRQ_RX] != -ENXIO) - return eth->irq[MTK_FE_IRQ_RX]; + return dev_err_probe(&pdev->dev, eth->irq[MTK_FE_IRQ_RX], + "Error requesting FE RX IRQ\n"); + + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT)) + dev_warn(&pdev->dev, "legacy DT: missing interrupt-names."); /* legacy way: * On MTK_SHARED_INT SoCs (MT7621 + MT7628) the first IRQ is taken @@ -5002,9 +5009,30 @@ static int mtk_sgmii_init(struct mtk_eth *eth) return 0; } +static int mtk_setup_legacy_sram(struct mtk_eth *eth, struct resource *res) +{ + dev_warn(eth->dev, "legacy DT: using hard-coded SRAM offset.\n"); + + if (res->start + MTK_ETH_SRAM_OFFSET + MTK_ETH_NETSYS_V2_SRAM_SIZE - 1 > + res->end) + return -EINVAL; + + eth->sram_pool = devm_gen_pool_create(eth->dev, + const_ilog2(MTK_ETH_SRAM_GRANULARITY), + NUMA_NO_NODE, dev_name(eth->dev)); + + if (IS_ERR(eth->sram_pool)) + return PTR_ERR(eth->sram_pool); + + return gen_pool_add_virt(eth->sram_pool, + (unsigned long)eth->base + MTK_ETH_SRAM_OFFSET, + res->start + MTK_ETH_SRAM_OFFSET, + MTK_ETH_NETSYS_V2_SRAM_SIZE, NUMA_NO_NODE); +} + static int mtk_probe(struct platform_device *pdev) { - struct resource *res = NULL, *res_sram; + struct resource *res = NULL; struct device_node *mac_np; struct mtk_eth *eth; int err, i; @@ -5024,20 +5052,6 @@ static int mtk_probe(struct platform_device *pdev) if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) eth->ip_align = NET_IP_ALIGN; - if (MTK_HAS_CAPS(eth->soc->caps, MTK_SRAM)) { - /* SRAM is actual memory and supports transparent access just like DRAM. - * Hence we don't require __iomem being set and don't need to use accessor - * functions to read from or write to SRAM. - */ - if (mtk_is_netsys_v3_or_greater(eth)) { - eth->sram_base = (void __force *)devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(eth->sram_base)) - return PTR_ERR(eth->sram_base); - } else { - eth->sram_base = (void __force *)eth->base + MTK_ETH_SRAM_OFFSET; - } - } - if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(36)); if (!err) @@ -5112,16 +5126,21 @@ static int mtk_probe(struct platform_device *pdev) err = -EINVAL; goto err_destroy_sgmii; } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SRAM)) { - if (mtk_is_netsys_v3_or_greater(eth)) { - res_sram = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!res_sram) { + eth->sram_pool = of_gen_pool_get(pdev->dev.of_node, + "sram", 0); + if (!eth->sram_pool) { + if (!mtk_is_netsys_v3_or_greater(eth)) { + err = mtk_setup_legacy_sram(eth, res); + if (err) + goto err_destroy_sgmii; + } else { + dev_err(&pdev->dev, + "Could not get SRAM pool\n"); err = -EINVAL; goto err_destroy_sgmii; } - eth->phy_scratch_ring = res_sram->start; - } else { - eth->phy_scratch_ring = res->start + MTK_ETH_SRAM_OFFSET; } } } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 9261c0e13b59..0168e2fbc619 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -141,8 +141,10 @@ #define MTK_GDMA_MAC_ADRH(x) ({ typeof(x) _x = (x); (_x == MTK_GMAC3_ID) ? \ 0x54C : 0x50C + (_x * 0x1000); }) -/* Internal SRAM offset */ -#define MTK_ETH_SRAM_OFFSET 0x40000 +/* legacy DT support for internal SRAM */ +#define MTK_ETH_SRAM_OFFSET 0x40000 +#define MTK_ETH_SRAM_GRANULARITY 32 +#define MTK_ETH_NETSYS_V2_SRAM_SIZE 0x40000 /* FE global misc reg*/ #define MTK_FE_GLO_MISC 0x124 @@ -1243,8 +1245,9 @@ struct mtk_soc_data { /* struct mtk_eth - This is the main datasructure for holding the state * of the driver * @dev: The device pointer - * @dev: The device pointer used for dma mapping/alloc + * @dma_dev: The device pointer used for dma mapping/alloc * @base: The mapped register i/o base + * @sram_pool: Pointer to SRAM pool used for DMA descriptor rings * @page_lock: Make sure that register operations are atomic * @tx_irq__lock: Make sure that IRQ register operations are atomic * @rx_irq__lock: Make sure that IRQ register operations are atomic @@ -1290,7 +1293,7 @@ struct mtk_eth { struct device *dev; struct device *dma_dev; void __iomem *base; - void *sram_base; + struct gen_pool *sram_pool; spinlock_t page_lock; spinlock_t tx_irq_lock; spinlock_t rx_irq_lock; diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 351dd152f4f3..73c26fcfd85e 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -1318,26 +1318,14 @@ mtk_wed_rro_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring, static int mtk_wed_rro_alloc(struct mtk_wed_device *dev) { - struct reserved_mem *rmem; - struct device_node *np; - int index; + struct resource res; + int ret; - index = of_property_match_string(dev->hw->node, "memory-region-names", - "wo-dlm"); - if (index < 0) - return index; - - np = of_parse_phandle(dev->hw->node, "memory-region", index); - if (!np) - return -ENODEV; - - rmem = of_reserved_mem_lookup(np); - of_node_put(np); - - if (!rmem) - return -ENODEV; + ret = of_reserved_mem_region_to_resource_byname(dev->hw->node, "wo-dlm", &res); + if (ret) + return ret; - dev->rro.miod_phys = rmem->base; + dev->rro.miod_phys = res.start; dev->rro.fdbk_phys = MTK_WED_MIOD_COUNT + dev->rro.miod_phys; return mtk_wed_rro_ring_alloc(dev, &dev->rro.ring, diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c index c06e5ad18b01..8498b35ec7a6 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c @@ -234,25 +234,19 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data, } static int -mtk_wed_get_memory_region(struct mtk_wed_hw *hw, int index, +mtk_wed_get_memory_region(struct mtk_wed_hw *hw, const char *name, struct mtk_wed_wo_memory_region *region) { - struct reserved_mem *rmem; - struct device_node *np; - - np = of_parse_phandle(hw->node, "memory-region", index); - if (!np) - return -ENODEV; - - rmem = of_reserved_mem_lookup(np); - of_node_put(np); + struct resource res; + int ret; - if (!rmem) - return -ENODEV; + ret = of_reserved_mem_region_to_resource_byname(hw->node, name, &res); + if (ret) + return 0; - region->phy_addr = rmem->base; - region->size = rmem->size; - region->addr = devm_ioremap(hw->dev, region->phy_addr, region->size); + region->phy_addr = res.start; + region->size = resource_size(&res); + region->addr = devm_ioremap_resource(hw->dev, &res); return !region->addr ? -EINVAL : 0; } @@ -319,13 +313,7 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo) /* load firmware region metadata */ for (i = 0; i < ARRAY_SIZE(mem_region); i++) { - int index = of_property_match_string(wo->hw->node, - "memory-region-names", - mem_region[i].name); - if (index < 0) - continue; - - ret = mtk_wed_get_memory_region(wo->hw, index, &mem_region[i]); + ret = mtk_wed_get_memory_region(wo->hw, mem_region[i].name, &mem_region[i]); if (ret) return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 65a73913b9a2..64e69e616b1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -383,7 +383,6 @@ enum { MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_DIM, - MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, MLX5E_NUM_SQ_STATES, /* Must be kept last */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 5d0014129a7e..391b4e9c9dc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -340,8 +340,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, sq->stats = &c->priv->ptp_stats.sq[tc]; sq->ptpsq = ptpsq; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); - if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); sq->stop_room = param->stop_room; sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index c3bda4612fa9..bd96988e102c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -13,7 +13,6 @@ static const char * const sq_sw_state_type_name[] = { [MLX5E_SQ_STATE_RECOVERING] = "recovering", [MLX5E_SQ_STATE_IPSEC] = "ipsec", [MLX5E_SQ_STATE_DIM] = "dim", - [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index 74cd111ee320..c68ba0e58fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -567,7 +567,8 @@ inner_tir: return final_err; } -int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) +void mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric) { if (indir) memcpy(indir, rss->indir.table, @@ -582,8 +583,6 @@ int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bo if (symmetric) *symmetric = rss->hash.symmetric; - - return 0; } int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index 8ac902190010..c6c1b2847cf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -47,7 +47,8 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss); int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, struct mlx5e_packet_merge_param *pkt_merge_param); -int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bool *symmetric); +void mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric); int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, const u8 *key, const u8 *hfunc, const bool *symmetric, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 5fcbe47337b0..a2acbfee2b77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -71,17 +71,12 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, return 0; } -int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch) +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 rss_idx, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; struct mlx5e_rss *rss; - int i; - - for (i = 1; i < MLX5E_MAX_NUM_RSS; i++) - if (!res->rss[i]) - break; - if (i == MLX5E_MAX_NUM_RSS) + if (WARN_ON_ONCE(res->rss[rss_idx])) return -ENOSPC; rss = mlx5e_rss_init(res->mdev, inner_ft_support, res->drop_rqn, @@ -97,8 +92,7 @@ int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int i mlx5e_rss_enable(rss, res->rss_rqns, vhca_ids, res->rss_nch); } - res->rss[i] = rss; - *rss_idx = i; + res->rss[rss_idx] = rss; return 0; } @@ -193,19 +187,17 @@ void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int n mlx5e_rss_set_indir_uniform(res->rss[0], nch); } -int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, - u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) { - struct mlx5e_rss *rss; - - if (rss_idx >= MLX5E_MAX_NUM_RSS) - return -EINVAL; + struct mlx5e_rss *rss = NULL; - rss = res->rss[rss_idx]; - if (!rss) - return -ENOENT; + if (rss_idx < MLX5E_MAX_NUM_RSS) + rss = res->rss[rss_idx]; + if (WARN_ON_ONCE(!rss)) + return; - return mlx5e_rss_get_rxfh(rss, indir, key, hfunc, symmetric); + mlx5e_rss_get_rxfh(rss, indir, key, hfunc, symmetric); } int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, @@ -579,8 +571,6 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann for (ix = 0; ix < nch; ix++) mlx5e_rx_res_channel_activate_direct(res, chs, ix); - for (ix = nch; ix < res->max_nch; ix++) - mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { u32 rqn; @@ -603,7 +593,7 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) mlx5e_rx_res_rss_disable(res); - for (ix = 0; ix < res->max_nch; ix++) + for (ix = 0; ix < res->rss_nch; ix++) mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 3e09d91281af..1d049e2aa264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -48,8 +48,9 @@ void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *ch /* Configuration API */ void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); -int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, - u32 *indir, u8 *key, u8 *hfunc, bool *symmetric); +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric); int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, const u32 *indir, const u8 *key, const u8 *hfunc, const bool *symmetric); @@ -61,7 +62,7 @@ int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, struct mlx5e_packet_merge_param *pkt_merge_param); -int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch); +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 rss_idx, unsigned int init_nch); int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx); int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res); int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index a13c5e707b83..9bdb5820c553 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -94,29 +94,30 @@ mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, struct net_device **out_dev, struct netlink_ext_ack *extack) { - struct net_device *vlan_dev = *out_dev; - struct flow_action_entry vlan_act = { - .id = FLOW_ACTION_VLAN_PUSH, - .vlan.vid = vlan_dev_vlan_id(vlan_dev), - .vlan.proto = vlan_dev_vlan_proto(vlan_dev), - .vlan.prio = 0, - }; - int err; - - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL); - if (err) - return err; - - rcu_read_lock(); - *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); - rcu_read_unlock(); - if (!*out_dev) - return -ENODEV; + do { + struct net_device *vlan_dev = *out_dev; + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_PUSH, + .vlan.vid = vlan_dev_vlan_id(vlan_dev), + .vlan.proto = vlan_dev_vlan_proto(vlan_dev), + .vlan.prio = 0, + }; + int err; + + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, + &attr->action, extack, NULL); + if (err) + return err; - if (is_vlan_dev(*out_dev)) - err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack); + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), + dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + } while (is_vlan_dev(*out_dev)); - return err; + return 0; } int diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 81332cd4a582..870d12364f99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1195,6 +1195,7 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, struct flow_action_entry *meta_action; unsigned long cookie = flow->cookie; struct mlx5_ct_entry *entry; + bool has_nat; int err; meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); @@ -1236,6 +1237,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); if (err) goto err_set; + has_nat = memcmp(&entry->tuple, &entry->tuple_nat, + sizeof(entry->tuple)); spin_lock_bh(&ct_priv->ht_lock); @@ -1244,7 +1247,7 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (err) goto err_entries; - if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { + if (has_nat) { err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, &entry->tuple_nat_node, tuples_nat_ht_params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 995eedf7a51a..d507366d773e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1480,61 +1480,121 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) static int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { struct mlx5e_priv *priv = netdev_priv(netdev); - u32 rss_context = rxfh->rss_context; bool symmetric; - int err; mutex_lock(&priv->state_lock); - err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, - rxfh->indir, rxfh->key, &rxfh->hfunc, &symmetric); + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, 0, rxfh->indir, rxfh->key, + &rxfh->hfunc, &symmetric); mutex_unlock(&priv->state_lock); - if (err) - return err; - if (symmetric) rxfh->input_xfrm = RXH_XFRM_SYM_OR_XOR; return 0; } -static int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, - struct netlink_ext_ack *extack) +static int mlx5e_rxfh_hfunc_check(struct mlx5e_priv *priv, + const struct ethtool_rxfh_param *rxfh) { - bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; - struct mlx5e_priv *priv = netdev_priv(dev); - u32 *rss_context = &rxfh->rss_context; - u8 hfunc = rxfh->hfunc; unsigned int count; - int err; - - mutex_lock(&priv->state_lock); count = priv->channels.params.num_channels; - if (hfunc == ETH_RSS_HASH_XOR) { + if (rxfh->hfunc == ETH_RSS_HASH_XOR) { unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8(); if (count > xor8_max_channels) { - err = -EINVAL; netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n", __func__, count, xor8_max_channels); - goto unlock; + return -EINVAL; } } - if (*rss_context && rxfh->rss_delete) { - err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context); + return 0; +} + +static int mlx5e_set_rxfh(struct net_device *dev, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) goto unlock; - } - if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { - err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count); - if (err) - goto unlock; - } + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, + rxfh->indir, rxfh->key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, + rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_create_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) + goto unlock; + + err = mlx5e_rx_res_rss_init(priv->rx_res, rxfh->rss_context, + priv->channels.params.num_channels); + if (err) + goto unlock; + + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, + rxfh->indir, rxfh->key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, + rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); + if (err) + goto unlock; + + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rxfh->rss_context, + ethtool_rxfh_context_indir(ctx), + ethtool_rxfh_context_key(ctx), + &ctx->hfunc, &symmetric); + if (symmetric) + ctx->input_xfrm = RXH_XFRM_SYM_OR_XOR; + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} - err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, +static int mlx5e_modify_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) + goto unlock; + + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, rxfh->indir, rxfh->key, hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); @@ -1544,6 +1604,20 @@ unlock: return err; } +static int mlx5e_remove_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_destroy(priv->rx_res, rss_context); + mutex_unlock(&priv->state_lock); + return err; +} + #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 #define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000 #define MLX5E_PFC_PREVEN_MINOR_PRECENT 85 @@ -2654,9 +2728,9 @@ static void mlx5e_get_ts_stats(struct net_device *netdev, const struct ethtool_ops mlx5e_ethtool_ops = { .cap_link_lanes_supported = true, - .cap_rss_ctx_supported = true, .rxfh_per_ctx_fields = true, .rxfh_per_ctx_key = true, + .rxfh_max_num_contexts = MLX5E_MAX_NUM_RSS, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE | @@ -2685,6 +2759,9 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_rxfh = mlx5e_set_rxfh, .get_rxfh_fields = mlx5e_get_rxfh_fields, .set_rxfh_fields = mlx5e_set_rxfh_fields, + .create_rxfh_context = mlx5e_create_rxfh_context, + .modify_rxfh_context = mlx5e_modify_rxfh_context, + .remove_rxfh_context = mlx5e_remove_rxfh_context, .get_rxnfc = mlx5e_get_rxnfc, .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e8e5b347f9b2..fee323ade522 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1677,8 +1677,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); - if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (mlx5_ipsec_device_caps(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (param->is_mpw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 55a8629f0792..e6a301ba3254 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -256,8 +256,7 @@ mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, mode = sq->min_inline_mode; - if (skb_vlan_tag_present(skb) && - test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + if (skb_vlan_tag_present(skb)) mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); return mode; @@ -483,12 +482,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, } eseg->inline_hdr.sz |= cpu_to_be16(ihs); dseg += wqe_attr->ds_cnt_inl; - } else if (skb_vlan_tag_present(skb)) { - eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); - if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) - eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); - eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); - stats->added_vlan_packets++; } dseg += wqe_attr->ds_cnt_ids; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 154bbb17ec0e..7ca6bba24001 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1353,7 +1353,7 @@ static int esw_qos_switch_tc_arbiter_node_to_vports( &node->ix); if (err) { NL_SET_ERR_MSG_MOD(extack, - "Failed to create scheduling element for vports node when disabliing vports TC QoS"); + "Failed to create scheduling element for vports node when disabling vports TC QoS"); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a8046200d376..a4da182abe63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3245,34 +3245,62 @@ static int init_rdma_transport_rx_root_ns_one(struct mlx5_flow_steering *steering, int vport_idx) { + struct mlx5_flow_root_namespace *root_ns; struct fs_prio *prio; + int ret; + int i; steering->rdma_transport_rx_root_ns[vport_idx] = create_root_ns(steering, FS_FT_RDMA_TRANSPORT_RX); if (!steering->rdma_transport_rx_root_ns[vport_idx]) return -ENOMEM; - /* create 1 prio*/ - prio = fs_create_prio(&steering->rdma_transport_rx_root_ns[vport_idx]->ns, - MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1); - return PTR_ERR_OR_ZERO(prio); + root_ns = steering->rdma_transport_rx_root_ns[vport_idx]; + + for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) { + prio = fs_create_prio(&root_ns->ns, i, 1); + if (IS_ERR(prio)) { + ret = PTR_ERR(prio); + goto err; + } + } + set_prio_attrs(root_ns); + return 0; + +err: + cleanup_root_ns(root_ns); + return ret; } static int init_rdma_transport_tx_root_ns_one(struct mlx5_flow_steering *steering, int vport_idx) { + struct mlx5_flow_root_namespace *root_ns; struct fs_prio *prio; + int ret; + int i; steering->rdma_transport_tx_root_ns[vport_idx] = create_root_ns(steering, FS_FT_RDMA_TRANSPORT_TX); if (!steering->rdma_transport_tx_root_ns[vport_idx]) return -ENOMEM; - /* create 1 prio*/ - prio = fs_create_prio(&steering->rdma_transport_tx_root_ns[vport_idx]->ns, - MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1); - return PTR_ERR_OR_ZERO(prio); + root_ns = steering->rdma_transport_tx_root_ns[vport_idx]; + + for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) { + prio = fs_create_prio(&root_ns->ns, i, 1); + if (IS_ERR(prio)) { + ret = PTR_ERR(prio); + goto err; + } + } + set_prio_attrs(root_ns); + return 0; + +err: + cleanup_root_ns(root_ns); + return ret; } static int init_rdma_transport_rx_root_ns(struct mlx5_flow_steering *steering) @@ -3919,6 +3947,8 @@ int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) if (mlx5_fs_dr_is_supported(dev)) steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; + else if (mlx5_fs_hws_is_supported(dev)) + steering->mode = MLX5_FLOW_STEERING_MODE_HMFS; else steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index 7c5516b0a844..8115071c34a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -30,7 +30,7 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) dm = kzalloc(sizeof(*dm), GFP_KERNEL); if (!dm) - return ERR_PTR(-ENOMEM); + return NULL; spin_lock_init(&dm->lock); @@ -96,7 +96,7 @@ err_modify_hdr: err_steering: kfree(dm); - return ERR_PTR(-ENOMEM); + return NULL; } void mlx5_dm_cleanup(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 41e8660c819c..b0043cfee29b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1102,9 +1102,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) } dev->dm = mlx5_dm_create(dev); - if (IS_ERR(dev->dm)) - mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm)); - dev->tracer = mlx5_fw_tracer_create(dev); dev->hv_vhca = mlx5_hv_vhca_create(dev); dev->rsc_dump = mlx5_rsc_dump_create(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index 447ea3f8722c..396804369b00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -1358,12 +1358,9 @@ free_action: } struct mlx5hws_action * -mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, - size_t num_dest, +mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, struct mlx5hws_action_dest_attr *dests, - bool ignore_flow_level, - u32 flow_source, - u32 flags) + bool ignore_flow_level, u32 flags) { struct mlx5hws_cmd_set_fte_dest *dest_list = NULL; struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 9e057f808ea5..92de4b761a83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -48,7 +48,7 @@ static void hws_bwc_unlock_all_queues(struct mlx5hws_context *ctx) static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, u32 priority, - u8 size_log, + u8 size_log_rx, u8 size_log_tx, struct mlx5hws_matcher_attr *attr) { struct mlx5hws_bwc_matcher *first_matcher = @@ -62,7 +62,8 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, attr->optimize_flow_src = MLX5HWS_MATCHER_FLOW_SRC_ANY; attr->insert_mode = MLX5HWS_MATCHER_INSERT_BY_HASH; attr->distribute_mode = MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH; - attr->rule.num_log = size_log; + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].rule.num_log = size_log_rx; + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].rule.num_log = size_log_tx; attr->resizable = true; attr->max_num_of_at_attach = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; @@ -70,6 +71,130 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, first_matcher ? first_matcher->matcher->end_ft_id : 0; } +static int +hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + bool move_error = false, poll_error = false, drain_error = false; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher *matcher = bwc_matcher->matcher; + u16 bwc_queues = mlx5hws_bwc_queues(ctx); + struct mlx5hws_rule_attr rule_attr; + struct mlx5hws_bwc_rule *bwc_rule; + struct mlx5hws_send_engine *queue; + struct list_head *rules_list; + u32 pending_rules; + int i, ret = 0; + + mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); + + for (i = 0; i < bwc_queues; i++) { + if (list_empty(&bwc_matcher->rules[i])) + continue; + + pending_rules = 0; + rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); + rules_list = &bwc_matcher->rules[i]; + + list_for_each_entry(bwc_rule, rules_list, list_node) { + ret = mlx5hws_matcher_resize_rule_move(matcher, + bwc_rule->rule, + &rule_attr); + if (unlikely(ret && !move_error)) { + mlx5hws_err(ctx, + "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = true; + } + + pending_rules++; + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &pending_rules, + false); + if (unlikely(ret && !poll_error)) { + mlx5hws_err(ctx, + "Moving BWC rule: poll failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = true; + } + } + + if (pending_rules) { + queue = &ctx->send_queue[rule_attr.queue_id]; + mlx5hws_send_engine_flush_queue(queue); + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &pending_rules, + true); + if (unlikely(ret && !drain_error)) { + mlx5hws_err(ctx, + "Moving BWC rule: drain failed (%d), attempting to move rest of the rules\n", + ret); + drain_error = true; + } + } + } + + if (move_error || poll_error || drain_error) + ret = -EINVAL; + + return ret; +} + +static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + if (!bwc_matcher->complex) + return hws_bwc_matcher_move_all_simple(bwc_matcher); + + return mlx5hws_bwc_matcher_move_all_complex(bwc_matcher); +} + +static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher_attr matcher_attr = {0}; + struct mlx5hws_matcher *old_matcher; + struct mlx5hws_matcher *new_matcher; + int ret; + + hws_bwc_matcher_init_attr(bwc_matcher, + bwc_matcher->priority, + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, + &matcher_attr); + + old_matcher = bwc_matcher->matcher; + new_matcher = mlx5hws_matcher_create(old_matcher->tbl, + &bwc_matcher->mt, 1, + bwc_matcher->at, + bwc_matcher->num_of_at, + &matcher_attr); + if (!new_matcher) { + mlx5hws_err(ctx, "Rehash error: matcher creation failed\n"); + return -ENOMEM; + } + + ret = mlx5hws_matcher_resize_set_target(old_matcher, new_matcher); + if (ret) { + mlx5hws_err(ctx, "Rehash error: failed setting resize target\n"); + return ret; + } + + ret = hws_bwc_matcher_move_all(bwc_matcher); + if (ret) + mlx5hws_err(ctx, "Rehash error: moving rules failed, attempting to remove the old matcher\n"); + + /* Error during rehash can't be rolled back. + * The best option here is to allow the rehash to complete and remove + * the old matcher - can't leave the matcher in the 'in_resize' state. + */ + + bwc_matcher->matcher = new_matcher; + mlx5hws_matcher_destroy(old_matcher); + + return ret; +} + int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_table *table, u32 priority, @@ -92,11 +217,11 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, hws_bwc_matcher_init_attr(bwc_matcher, priority, - MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG, + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, &attr); bwc_matcher->priority = priority; - bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; bwc_matcher->at = kcalloc(bwc_matcher->size_of_at_array, @@ -148,6 +273,20 @@ err: return -EINVAL; } +static void +hws_bwc_matcher_init_size_rxtx(struct mlx5hws_bwc_matcher_size *size) +{ + size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + atomic_set(&size->num_of_rules, 0); + atomic_set(&size->rehash_required, false); +} + +static void hws_bwc_matcher_init_size(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + hws_bwc_matcher_init_size_rxtx(&bwc_matcher->rx_size); + hws_bwc_matcher_init_size_rxtx(&bwc_matcher->tx_size); +} + struct mlx5hws_bwc_matcher * mlx5hws_bwc_matcher_create(struct mlx5hws_table *table, u32 priority, @@ -168,8 +307,7 @@ mlx5hws_bwc_matcher_create(struct mlx5hws_table *table, if (!bwc_matcher) return NULL; - atomic_set(&bwc_matcher->num_of_rules, 0); - atomic_set(&bwc_matcher->rehash_required, false); + hws_bwc_matcher_init_size(bwc_matcher); /* Check if the required match params can be all matched * in single STE, otherwise complex matcher is needed. @@ -219,12 +357,13 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) { - u32 num_of_rules = atomic_read(&bwc_matcher->num_of_rules); + u32 rx_rules = atomic_read(&bwc_matcher->rx_size.num_of_rules); + u32 tx_rules = atomic_read(&bwc_matcher->tx_size.num_of_rules); - if (num_of_rules) + if (rx_rules || tx_rules) mlx5hws_err(bwc_matcher->matcher->tbl->ctx, - "BWC matcher destroy: matcher still has %d rules\n", - num_of_rules); + "BWC matcher destroy: matcher still has %u RX and %u TX rules\n", + rx_rules, tx_rules); if (bwc_matcher->complex) mlx5hws_bwc_matcher_destroy_complex(bwc_matcher); @@ -384,6 +523,80 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule, return 0; } +static void hws_bwc_rule_cnt_dec(struct mlx5hws_bwc_rule *bwc_rule) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + + if (!bwc_rule->skip_rx) + atomic_dec(&bwc_matcher->rx_size.num_of_rules); + if (!bwc_rule->skip_tx) + atomic_dec(&bwc_matcher->tx_size.num_of_rules); +} + +static int +hws_bwc_matcher_rehash_shrink(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_bwc_matcher_size *rx_size = &bwc_matcher->rx_size; + struct mlx5hws_bwc_matcher_size *tx_size = &bwc_matcher->tx_size; + + /* It is possible that another thread has added a rule. + * Need to check again if we really need rehash/shrink. + */ + if (atomic_read(&rx_size->num_of_rules) || + atomic_read(&tx_size->num_of_rules)) + return 0; + + /* If the current matcher RX/TX size is already at its initial size. */ + if (rx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG && + tx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG) + return 0; + + /* Now we've done all the checking - do the shrinking: + * - reset match RTC size to the initial size + * - create new matcher + * - move the rules, which will not do anything as the matcher is empty + * - destroy the old matcher + */ + + rx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + tx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + + return hws_bwc_matcher_move(bwc_matcher); +} + +static int hws_bwc_rule_cnt_dec_with_shrink(struct mlx5hws_bwc_rule *bwc_rule, + u16 bwc_queue_idx) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mutex *queue_lock; /* Protect the queue */ + int ret; + + hws_bwc_rule_cnt_dec(bwc_rule); + + if (atomic_read(&bwc_matcher->rx_size.num_of_rules) || + atomic_read(&bwc_matcher->tx_size.num_of_rules)) + return 0; + + /* Matcher has no more rules - shrink it to save ICM. */ + + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); + + hws_bwc_lock_all_queues(ctx); + ret = hws_bwc_matcher_rehash_shrink(bwc_matcher); + hws_bwc_unlock_all_queues(ctx); + + mutex_lock(queue_lock); + + if (unlikely(ret)) + mlx5hws_err(ctx, + "BWC rule deletion: shrinking empty matcher failed (%d)\n", + ret); + + return ret; +} + int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) { struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; @@ -400,8 +613,8 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) mutex_lock(queue_lock); ret = hws_bwc_rule_destroy_hws_sync(bwc_rule, &attr); - atomic_dec(&bwc_matcher->num_of_rules); hws_bwc_rule_list_remove(bwc_rule); + hws_bwc_rule_cnt_dec_with_shrink(bwc_rule, idx); mutex_unlock(queue_lock); @@ -487,25 +700,27 @@ hws_bwc_rule_update_sync(struct mlx5hws_bwc_rule *bwc_rule, } static bool -hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher) +hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size) { struct mlx5hws_cmd_query_caps *caps = bwc_matcher->matcher->tbl->ctx->caps; /* check the match RTC size */ - return (bwc_matcher->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH + + return (size->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) > (caps->ste_alloc_log_max - 1); } static bool hws_bwc_matcher_rehash_size_needed(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size, u32 num_of_rules) { - if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher))) + if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher, size))) return false; if (unlikely((num_of_rules * 100 / MLX5HWS_BWC_MATCHER_REHASH_PERCENT_TH) >= - (1UL << bwc_matcher->size_log))) + (1UL << size->size_log))) return true; return false; @@ -562,20 +777,21 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher, } static int -hws_bwc_matcher_extend_size(struct mlx5hws_bwc_matcher *bwc_matcher) +hws_bwc_matcher_extend_size(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size) { struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_cmd_query_caps *caps = ctx->caps; - if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher))) { + if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher, size))) { mlx5hws_err(ctx, "Can't resize matcher: depth exceeds limit %d\n", caps->rtc_log_depth_max); return -ENOMEM; } - bwc_matcher->size_log = - min(bwc_matcher->size_log + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, - caps->ste_alloc_log_max - MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH); + size->size_log = min(size->size_log + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, + caps->ste_alloc_log_max - + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH); return 0; } @@ -608,146 +824,42 @@ hws_bwc_matcher_find_at(struct mlx5hws_bwc_matcher *bwc_matcher, return -1; } -static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - bool move_error = false, poll_error = false, drain_error = false; - struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; - struct mlx5hws_matcher *matcher = bwc_matcher->matcher; - u16 bwc_queues = mlx5hws_bwc_queues(ctx); - struct mlx5hws_rule_attr rule_attr; - struct mlx5hws_bwc_rule *bwc_rule; - struct mlx5hws_send_engine *queue; - struct list_head *rules_list; - u32 pending_rules; - int i, ret = 0; - - mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); - - for (i = 0; i < bwc_queues; i++) { - if (list_empty(&bwc_matcher->rules[i])) - continue; - - pending_rules = 0; - rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); - rules_list = &bwc_matcher->rules[i]; - - list_for_each_entry(bwc_rule, rules_list, list_node) { - ret = mlx5hws_matcher_resize_rule_move(matcher, - bwc_rule->rule, - &rule_attr); - if (unlikely(ret && !move_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", - ret); - move_error = true; - } - - pending_rules++; - ret = mlx5hws_bwc_queue_poll(ctx, - rule_attr.queue_id, - &pending_rules, - false); - if (unlikely(ret && !poll_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: poll failed (%d), attempting to move rest of the rules\n", - ret); - poll_error = true; - } - } - - if (pending_rules) { - queue = &ctx->send_queue[rule_attr.queue_id]; - mlx5hws_send_engine_flush_queue(queue); - ret = mlx5hws_bwc_queue_poll(ctx, - rule_attr.queue_id, - &pending_rules, - true); - if (unlikely(ret && !drain_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: drain failed (%d), attempting to move rest of the rules\n", - ret); - drain_error = true; - } - } - } - - if (move_error || poll_error || drain_error) - ret = -EINVAL; - - return ret; -} - -static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - if (!bwc_matcher->complex) - return hws_bwc_matcher_move_all_simple(bwc_matcher); - - return mlx5hws_bwc_matcher_move_all_complex(bwc_matcher); -} - -static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; - struct mlx5hws_matcher_attr matcher_attr = {0}; - struct mlx5hws_matcher *old_matcher; - struct mlx5hws_matcher *new_matcher; - int ret; - - hws_bwc_matcher_init_attr(bwc_matcher, - bwc_matcher->priority, - bwc_matcher->size_log, - &matcher_attr); - - old_matcher = bwc_matcher->matcher; - new_matcher = mlx5hws_matcher_create(old_matcher->tbl, - &bwc_matcher->mt, 1, - bwc_matcher->at, - bwc_matcher->num_of_at, - &matcher_attr); - if (!new_matcher) { - mlx5hws_err(ctx, "Rehash error: matcher creation failed\n"); - return -ENOMEM; - } - - ret = mlx5hws_matcher_resize_set_target(old_matcher, new_matcher); - if (ret) { - mlx5hws_err(ctx, "Rehash error: failed setting resize target\n"); - return ret; - } - - ret = hws_bwc_matcher_move_all(bwc_matcher); - if (ret) - mlx5hws_err(ctx, "Rehash error: moving rules failed, attempting to remove the old matcher\n"); - - /* Error during rehash can't be rolled back. - * The best option here is to allow the rehash to complete and remove - * the old matcher - can't leave the matcher in the 'in_resize' state. - */ - - bwc_matcher->matcher = new_matcher; - mlx5hws_matcher_destroy(old_matcher); - - return ret; -} - static int hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) { + bool need_rx_rehash, need_tx_rehash; int ret; - /* If the current matcher size is already at its max size, we can't - * do the rehash. Skip it and try adding the rule again - perhaps - * there was some change. + need_rx_rehash = atomic_read(&bwc_matcher->rx_size.rehash_required); + need_tx_rehash = atomic_read(&bwc_matcher->tx_size.rehash_required); + + /* It is possible that another rule has already performed rehash. + * Need to check again if we really need rehash. */ - if (hws_bwc_matcher_size_maxed_out(bwc_matcher)) + if (!need_rx_rehash && !need_tx_rehash) return 0; - /* It is possible that other rule has already performed rehash. - * Need to check again if we really need rehash. + /* If the current matcher RX/TX size is already at its max size, + * it can't be rehashed. */ - if (!atomic_read(&bwc_matcher->rehash_required) && - !hws_bwc_matcher_rehash_size_needed(bwc_matcher, - atomic_read(&bwc_matcher->num_of_rules))) + if (need_rx_rehash && + hws_bwc_matcher_size_maxed_out(bwc_matcher, + &bwc_matcher->rx_size)) { + atomic_set(&bwc_matcher->rx_size.rehash_required, false); + need_rx_rehash = false; + } + if (need_tx_rehash && + hws_bwc_matcher_size_maxed_out(bwc_matcher, + &bwc_matcher->tx_size)) { + atomic_set(&bwc_matcher->tx_size.rehash_required, false); + need_tx_rehash = false; + } + + /* If both RX and TX rehash flags are now off, it means that whatever + * we wanted to rehash is now at its max size - no rehash can be done. + * Return and try adding the rule again - perhaps there was some change. + */ + if (!need_rx_rehash && !need_tx_rehash) return 0; /* Now we're done all the checking - do the rehash: @@ -756,12 +868,22 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) * - move all the rules to the new matcher * - destroy the old matcher */ + atomic_set(&bwc_matcher->rx_size.rehash_required, false); + atomic_set(&bwc_matcher->tx_size.rehash_required, false); - atomic_set(&bwc_matcher->rehash_required, false); + if (need_rx_rehash) { + ret = hws_bwc_matcher_extend_size(bwc_matcher, + &bwc_matcher->rx_size); + if (ret) + return ret; + } - ret = hws_bwc_matcher_extend_size(bwc_matcher); - if (ret) - return ret; + if (need_tx_rehash) { + ret = hws_bwc_matcher_extend_size(bwc_matcher, + &bwc_matcher->tx_size); + if (ret) + return ret; + } return hws_bwc_matcher_move(bwc_matcher); } @@ -813,6 +935,62 @@ out: return at_idx; } +static void hws_bwc_rule_cnt_inc_rxtx(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_bwc_matcher_size *size) +{ + u32 num_of_rules = atomic_inc_return(&size->num_of_rules); + + if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_rule->bwc_matcher, + size, num_of_rules))) + atomic_set(&size->rehash_required, true); +} + +static void hws_bwc_rule_cnt_inc(struct mlx5hws_bwc_rule *bwc_rule) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + + if (!bwc_rule->skip_rx) + hws_bwc_rule_cnt_inc_rxtx(bwc_rule, &bwc_matcher->rx_size); + if (!bwc_rule->skip_tx) + hws_bwc_rule_cnt_inc_rxtx(bwc_rule, &bwc_matcher->tx_size); +} + +static int hws_bwc_rule_cnt_inc_with_rehash(struct mlx5hws_bwc_rule *bwc_rule, + u16 bwc_queue_idx) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mutex *queue_lock; /* Protect the queue */ + int ret; + + hws_bwc_rule_cnt_inc(bwc_rule); + + if (!atomic_read(&bwc_matcher->rx_size.rehash_required) && + !atomic_read(&bwc_matcher->tx_size.rehash_required)) + return 0; + + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); + + hws_bwc_lock_all_queues(ctx); + ret = hws_bwc_matcher_rehash_size(bwc_matcher); + hws_bwc_unlock_all_queues(ctx); + + mutex_lock(queue_lock); + + if (likely(!ret)) + return 0; + + /* Failed to rehash. Print a diagnostic and rollback the counters. */ + mlx5hws_err(ctx, + "BWC rule insertion: rehash to sizes [%d, %d] failed (%d)\n", + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, ret); + hws_bwc_rule_cnt_dec(bwc_rule); + + return ret; +} + int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, u32 *match_param, struct mlx5hws_rule_action rule_actions[], @@ -823,7 +1001,6 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_rule_attr rule_attr; struct mutex *queue_lock; /* Protect the queue */ - u32 num_of_rules; int ret = 0; int at_idx; @@ -841,26 +1018,10 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, return -EINVAL; } - /* check if number of rules require rehash */ - num_of_rules = atomic_inc_return(&bwc_matcher->num_of_rules); - - if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))) { + ret = hws_bwc_rule_cnt_inc_with_rehash(bwc_rule, bwc_queue_idx); + if (unlikely(ret)) { mutex_unlock(queue_lock); - - hws_bwc_lock_all_queues(ctx); - ret = hws_bwc_matcher_rehash_size(bwc_matcher); - hws_bwc_unlock_all_queues(ctx); - - if (ret) { - mlx5hws_err(ctx, "BWC rule insertion: rehash size [%d -> %d] failed (%d)\n", - bwc_matcher->size_log - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, - bwc_matcher->size_log, - ret); - atomic_dec(&bwc_matcher->num_of_rules); - return ret; - } - - mutex_lock(queue_lock); + return ret; } ret = hws_bwc_rule_create_sync(bwc_rule, @@ -876,12 +1037,13 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, /* At this point the rule wasn't added. * It could be because there was collision, or some other problem. - * If we don't dive deeper than API, the only thing we know is that - * the status of completion is RTE_FLOW_OP_ERROR. * Try rehash by size and insert rule again - last chance. */ + if (!bwc_rule->skip_rx) + atomic_set(&bwc_matcher->rx_size.rehash_required, true); + if (!bwc_rule->skip_tx) + atomic_set(&bwc_matcher->tx_size.rehash_required, true); - atomic_set(&bwc_matcher->rehash_required, true); mutex_unlock(queue_lock); hws_bwc_lock_all_queues(ctx); @@ -890,7 +1052,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, if (ret) { mlx5hws_err(ctx, "BWC rule insertion: rehash failed (%d)\n", ret); - atomic_dec(&bwc_matcher->num_of_rules); + hws_bwc_rule_cnt_dec(bwc_rule); return ret; } @@ -906,7 +1068,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) { mutex_unlock(queue_lock); mlx5hws_err(ctx, "BWC rule insertion failed (%d)\n", ret); - atomic_dec(&bwc_matcher->num_of_rules); + hws_bwc_rule_cnt_dec(bwc_rule); return ret; } @@ -936,6 +1098,10 @@ mlx5hws_bwc_rule_create(struct mlx5hws_bwc_matcher *bwc_matcher, if (unlikely(!bwc_rule)) return NULL; + bwc_rule->flow_source = flow_source; + mlx5hws_rule_skip(bwc_matcher->matcher, flow_source, + &bwc_rule->skip_rx, &bwc_rule->skip_tx); + bwc_queue_idx = hws_bwc_gen_queue_idx(ctx); if (bwc_matcher->complex) @@ -971,7 +1137,8 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, idx = bwc_rule->bwc_queue_idx; - mlx5hws_bwc_rule_fill_attr(bwc_matcher, idx, 0, &rule_attr); + mlx5hws_bwc_rule_fill_attr(bwc_matcher, idx, bwc_rule->flow_source, + &rule_attr); queue_lock = hws_bwc_get_queue_lock(ctx, idx); mutex_lock(queue_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index d21fc247a510..af391d70c14f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -19,6 +19,13 @@ #define MLX5HWS_BWC_POLLING_TIMEOUT 60 struct mlx5hws_bwc_matcher_complex_data; + +struct mlx5hws_bwc_matcher_size { + u8 size_log; + atomic_t num_of_rules; + atomic_t rehash_required; +}; + struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; @@ -27,10 +34,9 @@ struct mlx5hws_bwc_matcher { struct mlx5hws_bwc_matcher *complex_first_bwc_matcher; u8 num_of_at; u8 size_of_at_array; - u8 size_log; u32 priority; - atomic_t num_of_rules; - atomic_t rehash_required; + struct mlx5hws_bwc_matcher_size rx_size; + struct mlx5hws_bwc_matcher_size tx_size; struct list_head *rules; }; @@ -39,7 +45,10 @@ struct mlx5hws_bwc_rule { struct mlx5hws_rule *rule; struct mlx5hws_bwc_rule *isolated_bwc_rule; struct mlx5hws_bwc_complex_rule_hash_node *complex_hash_node; + u32 flow_source; u16 bwc_queue_idx; + bool skip_rx; + bool skip_tx; struct list_head list_node; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c index 91568d6c1dac..2ec8cb10139a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c @@ -99,17 +99,19 @@ hws_debug_dump_matcher_attr(struct seq_file *f, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; - seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d\n", + seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d,-1,-1,%d,%d\n", MLX5HWS_DEBUG_RES_TYPE_MATCHER_ATTR, HWS_PTR_TO_ID(matcher), attr->priority, attr->mode, - attr->table.sz_row_log, - attr->table.sz_col_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].table.sz_row_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].table.sz_col_log, attr->optimize_using_rule_idx, attr->optimize_flow_src, attr->insert_mode, - attr->distribute_mode); + attr->distribute_mode, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].table.sz_row_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].table.sz_col_log); return 0; } @@ -118,7 +120,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma { enum mlx5hws_table_type tbl_type = matcher->tbl->type; struct mlx5hws_cmd_ft_query_attr ft_attr = {0}; - struct mlx5hws_pool *ste_pool; u64 icm_addr_0 = 0; u64 icm_addr_1 = 0; u32 ste_0_id = -1; @@ -133,12 +134,9 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma matcher->end_ft_id, matcher->col_matcher ? HWS_PTR_TO_ID(matcher->col_matcher) : 0); - ste_pool = matcher->match_ste.pool; - if (ste_pool) { - ste_0_id = mlx5hws_pool_get_base_id(ste_pool); - if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) - ste_1_id = mlx5hws_pool_get_base_mirror_id(ste_pool); - } + ste_0_id = matcher->match_ste.ste_0_base; + if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) + ste_1_id = matcher->match_ste.ste_1_base; seq_printf(f, ",%d,%d,%d,%d", matcher->match_ste.rtc_0_id, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index bf4643d0ce17..57592b92e24b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -571,14 +571,12 @@ static void mlx5_fs_put_dest_action_sampler(struct mlx5_fs_hws_context *fs_ctx, static struct mlx5hws_action * mlx5_fs_create_action_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_action_dest_attr *dests, - u32 num_of_dests, bool ignore_flow_level, - u32 flow_source) + u32 num_of_dests, bool ignore_flow_level) { u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; return mlx5hws_action_create_dest_array(ctx, num_of_dests, dests, - ignore_flow_level, - flow_source, flags); + ignore_flow_level, flags); } static struct mlx5hws_action * @@ -1015,7 +1013,6 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, } (*ractions)[num_actions++].action = dest_actions->dest; } else if (num_dest_actions > 1) { - u32 flow_source = fte->act_dests.flow_context.flow_source; bool ignore_flow_level; if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || @@ -1025,10 +1022,10 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, } ignore_flow_level = !!(fte_action->flags & FLOW_ACT_IGNORE_FLOW_LEVEL); - tmp_action = mlx5_fs_create_action_dest_array(ctx, dest_actions, - num_dest_actions, - ignore_flow_level, - flow_source); + tmp_action = + mlx5_fs_create_action_dest_array(ctx, dest_actions, + num_dest_actions, + ignore_flow_level); if (!tmp_action) { err = -EOPNOTSUPP; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index ce28ee1c0e41..f3ea09caba2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -468,12 +468,16 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher) struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0}; struct mlx5hws_match_template *mt = matcher->mt; struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size_rx, *size_tx; struct mlx5hws_table *tbl = matcher->tbl; u32 obj_id; int ret; - rtc_attr.log_size = attr->table.sz_row_log; - rtc_attr.log_depth = attr->table.sz_col_log; + size_rx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; + + rtc_attr.log_size = size_rx->table.sz_row_log; + rtc_attr.log_depth = size_rx->table.sz_col_log; rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt); rtc_attr.is_scnd_range = 0; rtc_attr.miss_ft_id = matcher->end_ft_id; @@ -507,10 +511,8 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher) } } - obj_id = mlx5hws_pool_get_base_id(matcher->match_ste.pool); - rtc_attr.pd = ctx->pd_num; - rtc_attr.ste_base = obj_id; + rtc_attr.ste_base = matcher->match_ste.ste_0_base; rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, false); hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, false); @@ -527,9 +529,9 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher) } if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) { - obj_id = mlx5hws_pool_get_base_mirror_id( - matcher->match_ste.pool); - rtc_attr.ste_base = obj_id; + rtc_attr.log_size = size_tx->table.sz_row_log; + rtc_attr.log_depth = size_tx->table.sz_col_log; + rtc_attr.ste_base = matcher->match_ste.ste_1_base; rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true); obj_id = mlx5hws_pool_get_base_mirror_id(ctx->stc_pool); @@ -566,43 +568,38 @@ hws_matcher_check_attr_sz(struct mlx5hws_cmd_query_caps *caps, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; + struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size; + int i; - if (attr->table.sz_col_log > caps->rtc_log_depth_max) { - mlx5hws_err(matcher->tbl->ctx, "Matcher depth exceeds limit %d\n", - caps->rtc_log_depth_max); - return -EOPNOTSUPP; - } + for (i = 0; i < 2; i++) { + size = &attr->size[i]; - if (attr->table.sz_col_log + attr->table.sz_row_log > caps->ste_alloc_log_max) { - mlx5hws_err(matcher->tbl->ctx, "Total matcher size exceeds limit %d\n", - caps->ste_alloc_log_max); - return -EOPNOTSUPP; - } + if (size->table.sz_col_log > caps->rtc_log_depth_max) { + mlx5hws_err(ctx, "Matcher depth exceeds limit %d\n", + caps->rtc_log_depth_max); + return -EOPNOTSUPP; + } - if (attr->table.sz_col_log + attr->table.sz_row_log < caps->ste_alloc_log_gran) { - mlx5hws_err(matcher->tbl->ctx, "Total matcher size below limit %d\n", - caps->ste_alloc_log_gran); - return -EOPNOTSUPP; + if (size->table.sz_col_log + size->table.sz_row_log > + caps->ste_alloc_log_max) { + mlx5hws_err(ctx, + "Total matcher size exceeds limit %d\n", + caps->ste_alloc_log_max); + return -EOPNOTSUPP; + } + + if (size->table.sz_col_log + size->table.sz_row_log < + caps->ste_alloc_log_gran) { + mlx5hws_err(ctx, "Total matcher size below limit %d\n", + caps->ste_alloc_log_gran); + return -EOPNOTSUPP; + } } return 0; } -static void hws_matcher_set_pool_attr(struct mlx5hws_pool_attr *attr, - struct mlx5hws_matcher *matcher) -{ - switch (matcher->attr.optimize_flow_src) { - case MLX5HWS_MATCHER_FLOW_SRC_VPORT: - attr->opt_type = MLX5HWS_POOL_OPTIMIZE_ORIG; - break; - case MLX5HWS_MATCHER_FLOW_SRC_WIRE: - attr->opt_type = MLX5HWS_POOL_OPTIMIZE_MIRROR; - break; - default: - break; - } -} - static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher, struct mlx5hws_action_template *at) { @@ -683,8 +680,9 @@ static void hws_matcher_set_ip_version_match(struct mlx5hws_matcher *matcher) static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) { + struct mlx5hws_cmd_ste_create_attr ste_attr = {}; struct mlx5hws_context *ctx = matcher->tbl->ctx; - struct mlx5hws_pool_attr pool_attr = {0}; + union mlx5hws_matcher_size *size; int ret; /* Calculate match, range and hash definers */ @@ -699,22 +697,39 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) hws_matcher_set_ip_version_match(matcher); - /* Create an STE pool per matcher*/ - pool_attr.table_type = matcher->tbl->type; - pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.alloc_log_sz = matcher->attr.table.sz_col_log + - matcher->attr.table.sz_row_log; - hws_matcher_set_pool_attr(&pool_attr, matcher); - - matcher->match_ste.pool = mlx5hws_pool_create(ctx, &pool_attr); - if (!matcher->match_ste.pool) { - mlx5hws_err(ctx, "Failed to allocate matcher STE pool\n"); - ret = -EOPNOTSUPP; + /* Create an STE range each for RX and TX. */ + ste_attr.table_type = FS_FT_FDB_RX; + size = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + ste_attr.log_obj_range = + matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_VPORT ? + 0 : size->table.sz_col_log + size->table.sz_row_log; + + ret = mlx5hws_cmd_ste_create(ctx->mdev, &ste_attr, + &matcher->match_ste.ste_0_base); + if (ret) { + mlx5hws_err(ctx, "Failed to allocate RX STE range (%d)\n", ret); goto uninit_match_definer; } + ste_attr.table_type = FS_FT_FDB_TX; + size = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; + ste_attr.log_obj_range = + matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_WIRE ? + 0 : size->table.sz_col_log + size->table.sz_row_log; + + ret = mlx5hws_cmd_ste_create(ctx->mdev, &ste_attr, + &matcher->match_ste.ste_1_base); + if (ret) { + mlx5hws_err(ctx, "Failed to allocate TX STE range (%d)\n", ret); + goto destroy_rx_ste_range; + } + return 0; +destroy_rx_ste_range: + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_0_base); uninit_match_definer: if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) mlx5hws_definer_mt_uninit(ctx, matcher->mt); @@ -723,9 +738,12 @@ uninit_match_definer: static void hws_matcher_unbind_mt(struct mlx5hws_matcher *matcher) { - mlx5hws_pool_destroy(matcher->match_ste.pool); + struct mlx5hws_context *ctx = matcher->tbl->ctx; + + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_1_base); + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_0_base); if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) - mlx5hws_definer_mt_uninit(matcher->tbl->ctx, matcher->mt); + mlx5hws_definer_mt_uninit(ctx, matcher->mt); } static int @@ -734,6 +752,10 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, { struct mlx5hws_matcher_attr *attr = &matcher->attr; struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size_rx, *size_tx; + + size_rx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; switch (attr->insert_mode) { case MLX5HWS_MATCHER_INSERT_BY_HASH: @@ -744,7 +766,7 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, break; case MLX5HWS_MATCHER_INSERT_BY_INDEX: - if (attr->table.sz_col_log) { + if (size_rx->table.sz_col_log || size_tx->table.sz_col_log) { mlx5hws_err(ctx, "Matcher with INSERT_BY_INDEX supports only Nx1 table size\n"); return -EOPNOTSUPP; } @@ -764,7 +786,10 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, return -EOPNOTSUPP; } - if (attr->table.sz_row_log > MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX) { + if (size_rx->table.sz_row_log > + MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX || + size_tx->table.sz_row_log > + MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX) { mlx5hws_err(ctx, "Matcher with linear distribute: rows exceed limit %d", MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX); return -EOPNOTSUPP; @@ -788,6 +813,10 @@ hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; + union mlx5hws_matcher_size *size_rx, *size_tx; + + size_rx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; if (hws_matcher_validate_insert_mode(caps, matcher)) return -EOPNOTSUPP; @@ -799,8 +828,12 @@ hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps, /* Convert number of rules to the required depth */ if (attr->mode == MLX5HWS_MATCHER_RESOURCE_MODE_RULE && - attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) - attr->table.sz_col_log = hws_matcher_rules_to_tbl_depth(attr->rule.num_log); + attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { + size_rx->table.sz_col_log = + hws_matcher_rules_to_tbl_depth(size_rx->rule.num_log); + size_tx->table.sz_col_log = + hws_matcher_rules_to_tbl_depth(size_tx->rule.num_log); + } matcher->flags |= attr->resizable ? MLX5HWS_MATCHER_FLAGS_RESIZABLE : 0; matcher->flags |= attr->isolated_matcher_end_ft_id ? @@ -861,14 +894,19 @@ static int hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher) { struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size_rx, *size_tx; struct mlx5hws_matcher *col_matcher; - int ret; + int i, ret; + + size_rx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; if (matcher->attr.mode != MLX5HWS_MATCHER_RESOURCE_MODE_RULE || matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) return 0; - if (!hws_matcher_requires_col_tbl(matcher->attr.rule.num_log)) + if (!hws_matcher_requires_col_tbl(size_rx->rule.num_log) && + !hws_matcher_requires_col_tbl(size_tx->rule.num_log)) return 0; col_matcher = kzalloc(sizeof(*matcher), GFP_KERNEL); @@ -885,10 +923,16 @@ hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher) col_matcher->flags |= MLX5HWS_MATCHER_FLAGS_COLLISION; col_matcher->attr.mode = MLX5HWS_MATCHER_RESOURCE_MODE_HTABLE; col_matcher->attr.optimize_flow_src = matcher->attr.optimize_flow_src; - col_matcher->attr.table.sz_row_log = matcher->attr.rule.num_log; - col_matcher->attr.table.sz_col_log = MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH; - if (col_matcher->attr.table.sz_row_log > MLX5HWS_MATCHER_ASSURED_ROW_RATIO) - col_matcher->attr.table.sz_row_log -= MLX5HWS_MATCHER_ASSURED_ROW_RATIO; + for (i = 0; i < 2; i++) { + union mlx5hws_matcher_size *dst = &col_matcher->attr.size[i]; + union mlx5hws_matcher_size *src = &matcher->attr.size[i]; + + dst->table.sz_row_log = src->rule.num_log; + dst->table.sz_col_log = MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH; + if (dst->table.sz_row_log > MLX5HWS_MATCHER_ASSURED_ROW_RATIO) + dst->table.sz_row_log -= + MLX5HWS_MATCHER_ASSURED_ROW_RATIO; + } col_matcher->attr.max_num_of_at_attach = matcher->attr.max_num_of_at_attach; col_matcher->attr.isolated_matcher_end_ft_id = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h index 32e83cddcd60..ae20bcebfdde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h @@ -48,7 +48,8 @@ struct mlx5hws_match_template { struct mlx5hws_matcher_match_ste { u32 rtc_0_id; u32 rtc_1_id; - struct mlx5hws_pool *pool; + u32 ste_0_base; + u32 ste_1_base; }; enum { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index d8ac6c196211..59c14745ed0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -93,6 +93,23 @@ enum mlx5hws_matcher_distribute_mode { MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR = 0x1, }; +enum mlx5hws_matcher_size_type { + MLX5HWS_MATCHER_SIZE_TYPE_RX, + MLX5HWS_MATCHER_SIZE_TYPE_TX, + MLX5HWS_MATCHER_SIZE_TYPE_MAX, +}; + +union mlx5hws_matcher_size { + struct { + u8 sz_row_log; + u8 sz_col_log; + } table; + + struct { + u8 num_log; + } rule; +}; + struct mlx5hws_matcher_attr { /* Processing priority inside table */ u32 priority; @@ -107,16 +124,7 @@ struct mlx5hws_matcher_attr { enum mlx5hws_matcher_distribute_mode distribute_mode; /* Define whether the created matcher supports resizing into a bigger matcher */ bool resizable; - union { - struct { - u8 sz_row_log; - u8 sz_col_log; - } table; - - struct { - u8 num_log; - } rule; - }; + union mlx5hws_matcher_size size[MLX5HWS_MATCHER_SIZE_TYPE_MAX]; /* Optional AT attach configuration - Max number of additional AT */ u8 max_num_of_at_attach; /* Optional end FT (miss FT ID) for match RTC (for isolated matcher) */ @@ -727,18 +735,14 @@ mlx5hws_action_create_push_vlan(struct mlx5hws_context *ctx, u32 flags); * @dests: The destination array. Each contains a destination action and can * have additional actions. * @ignore_flow_level: Whether to turn on 'ignore_flow_level' for this dest. - * @flow_source: Source port of the traffic for this actions. * @flags: Action creation flags (enum mlx5hws_action_flags). * * Return: pointer to mlx5hws_action on success NULL otherwise. */ struct mlx5hws_action * -mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, - size_t num_dest, +mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, struct mlx5hws_action_dest_attr *dests, - bool ignore_flow_level, - u32 flow_source, - u32 flags); + bool ignore_flow_level, u32 flags); /** * mlx5hws_action_create_insert_header - Create insert header action. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c index 5342a4cc7194..a94f094e72ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c @@ -3,10 +3,8 @@ #include "internal.h" -static void hws_rule_skip(struct mlx5hws_matcher *matcher, - struct mlx5hws_match_template *mt, - u32 flow_source, - bool *skip_rx, bool *skip_tx) +void mlx5hws_rule_skip(struct mlx5hws_matcher *matcher, u32 flow_source, + bool *skip_rx, bool *skip_tx) { /* By default FDB rules are added to both RX and TX */ *skip_rx = false; @@ -14,20 +12,21 @@ static void hws_rule_skip(struct mlx5hws_matcher *matcher, if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT) { *skip_rx = true; - } else if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) { + return; + } + + if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) { *skip_tx = true; - } else { - /* If no flow source was set for current rule, - * check for flow source in matcher attributes. - */ - if (matcher->attr.optimize_flow_src) { - *skip_tx = - matcher->attr.optimize_flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE; - *skip_rx = - matcher->attr.optimize_flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT; - return; - } + return; } + + /* If no flow source was set for current rule, + * check for flow source in matcher attributes. + */ + *skip_tx = matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_WIRE; + *skip_rx = matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_VPORT; } static void @@ -66,7 +65,8 @@ static void hws_rule_init_dep_wqe(struct mlx5hws_send_ring_dep_wqe *dep_wqe, attr->rule_idx : 0; if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) { - hws_rule_skip(matcher, mt, attr->flow_source, &skip_rx, &skip_tx); + mlx5hws_rule_skip(matcher, attr->flow_source, + &skip_rx, &skip_tx); if (!skip_rx) { dep_wqe->rtc_0 = matcher->match_ste.rtc_0_id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h index 1c47a9c11572..d0f082b8dbf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h @@ -69,6 +69,9 @@ struct mlx5hws_rule { */ }; +void mlx5hws_rule_skip(struct mlx5hws_matcher *matcher, u32 flow_source, + bool *skip_rx, bool *skip_tx); + void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste); int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c index 740b719e7072..2f0316616fa4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c @@ -378,6 +378,9 @@ err_create_cq: mlx5_free_bfreg(mdev, &sq->bfreg); err_alloc_bfreg: kfree(sq); + + if (mdev->wc_state == MLX5_WC_STATE_UNSUPPORTED) + mlx5_core_warn(mdev, "Write combining is not supported\n"); } bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index d76d7a945899..d1f8a72cae53 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -142,8 +142,10 @@ static int mlxbf_gige_open(struct net_device *netdev) mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: clean_port failed: %pe\n", ERR_PTR(err)); return err; + } /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -154,19 +156,25 @@ static int mlxbf_gige_open(struct net_device *netdev) phy_start(phydev); err = mlxbf_gige_tx_init(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: tx_init failed: %pe\n", ERR_PTR(err)); goto phy_deinit; + } err = mlxbf_gige_rx_init(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: rx_init failed: %pe\n", ERR_PTR(err)); goto tx_deinit; + } netif_napi_add(netdev, &priv->napi, mlxbf_gige_poll); napi_enable(&priv->napi); netif_start_queue(netdev); err = mlxbf_gige_request_irqs(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: request_irqs failed: %pe\n", ERR_PTR(err)); goto napi_deinit; + } mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX); mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX); @@ -418,8 +426,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev) /* Attach MDIO device */ err = mlxbf_gige_mdio_probe(pdev, priv); - if (err) + if (err) { + dev_err(priv->dev, "probe: mdio_probe failed: %pe\n", ERR_PTR(err)); return err; + } priv->base = base; priv->llu_base = llu_base; @@ -438,7 +448,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); + dev_err(&pdev->dev, "DMA configuration failed: %pe\n", ERR_PTR(err)); goto out; } @@ -468,7 +478,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) mlxbf_gige_link_cfgs[priv->hw_version].adjust_link, mlxbf_gige_link_cfgs[priv->hw_version].phy_mode); if (err) { - dev_err(&pdev->dev, "Could not attach to PHY\n"); + dev_err(&pdev->dev, "Could not attach to PHY: %pe\n", ERR_PTR(err)); goto out; } @@ -479,7 +489,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) err = register_netdev(netdev); if (err) { - dev_err(&pdev->dev, "Failed to register netdev\n"); + dev_err(&pdev->dev, "Failed to register netdev: %pe\n", ERR_PTR(err)); phy_disconnect(phydev); goto out; } diff --git a/drivers/net/ethernet/meta/fbnic/Makefile b/drivers/net/ethernet/meta/fbnic/Makefile index 0dbc634adb4b..15e8ff649615 100644 --- a/drivers/net/ethernet/meta/fbnic/Makefile +++ b/drivers/net/ethernet/meta/fbnic/Makefile @@ -12,6 +12,7 @@ fbnic-y := fbnic_csr.o \ fbnic_devlink.o \ fbnic_ethtool.o \ fbnic_fw.o \ + fbnic_fw_log.o \ fbnic_hw_stats.o \ fbnic_hwmon.o \ fbnic_irq.o \ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 65815d4f379e..c376e06880c9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -12,6 +12,7 @@ #include "fbnic_csr.h" #include "fbnic_fw.h" +#include "fbnic_fw_log.h" #include "fbnic_hw_stats.h" #include "fbnic_mac.h" #include "fbnic_rpc.h" @@ -85,6 +86,8 @@ struct fbnic_dev { /* Lock protecting access to hw_stats */ spinlock_t hw_stats_lock; + + struct fbnic_fw_log fw_log; }; /* Reserve entry 0 in the MSI-X "others" array until we have filled all diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 9c89d5378668..a81db842aa53 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -12,13 +12,28 @@ #define DESC_BIT(nr) BIT_ULL(nr) #define DESC_GENMASK(h, l) GENMASK_ULL(h, l) +#define FW_VER_CODE(_major, _minor, _patch, _build) ( \ + FIELD_PREP(FBNIC_FW_CAP_RESP_VERSION_MAJOR, _major) | \ + FIELD_PREP(FBNIC_FW_CAP_RESP_VERSION_MINOR, _minor) | \ + FIELD_PREP(FBNIC_FW_CAP_RESP_VERSION_PATCH, _patch) | \ + FIELD_PREP(FBNIC_FW_CAP_RESP_VERSION_BUILD, _build)) + /* Defines the minimum firmware version required by the driver */ -#define MIN_FW_MAJOR_VERSION 0 -#define MIN_FW_MINOR_VERSION 10 -#define MIN_FW_BUILD_VERSION 6 -#define MIN_FW_VERSION_CODE (MIN_FW_MAJOR_VERSION * (1u << 24) + \ - MIN_FW_MINOR_VERSION * (1u << 16) + \ - MIN_FW_BUILD_VERSION) +#define MIN_FW_VER_CODE FW_VER_CODE(0, 10, 6, 0) + +/* Defines the minimum firmware version required for firmware logs */ +#define MIN_FW_VER_CODE_LOG FW_VER_CODE(0, 12, 9, 0) + +/* Driver can request that firmware sends all cached logs in bulk. This + * feature was enabled on older firmware however firmware has a bug + * which attempted to send 30 messages per mbx message which caused an + * overflow flooding the mailbox. This results in a kernel warning + * related to corrupt mailbox messages. + * + * If firmware is new enough only request sending historical logs when + * the log buffer is empty to prevent duplicate logs. + */ +#define MIN_FW_VER_CODE_HIST FW_VER_CODE(25, 5, 7, 0) #define PCI_DEVICE_ID_META_FBNIC_ASIC 0x0013 diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c index e8f2d7f2d962..b7238dd967fe 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c @@ -170,6 +170,33 @@ static int fbnic_dbg_ipo_dst_show(struct seq_file *s, void *v) } DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ipo_dst); +static int fbnic_dbg_fw_log_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + struct fbnic_fw_log_entry *entry; + unsigned long flags; + + if (!fbnic_fw_log_ready(fbd)) + return -ENXIO; + + spin_lock_irqsave(&fbd->fw_log.lock, flags); + + list_for_each_entry_reverse(entry, &fbd->fw_log.entries, list) { + seq_printf(s, FBNIC_FW_LOG_FMT, entry->index, + (entry->timestamp / (MSEC_PER_SEC * 60 * 60 * 24)), + (entry->timestamp / (MSEC_PER_SEC * 60 * 60)) % 24, + ((entry->timestamp / (MSEC_PER_SEC * 60) % 60)), + ((entry->timestamp / MSEC_PER_SEC) % 60), + (entry->timestamp % MSEC_PER_SEC), + entry->msg); + } + + spin_unlock_irqrestore(&fbd->fw_log.lock, flags); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_fw_log); + static int fbnic_dbg_pcie_stats_show(struct seq_file *s, void *v) { struct fbnic_dev *fbd = s->private; @@ -222,6 +249,8 @@ void fbnic_dbg_fbd_init(struct fbnic_dev *fbd) &fbnic_dbg_ipo_src_fops); debugfs_create_file("ipo_dst", 0400, fbd->dbg_fbd, fbd, &fbnic_dbg_ipo_dst_fops); + debugfs_create_file("fw_log", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_fw_log_fops); } void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 1d220d8369e7..0c55be7d2547 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -573,16 +573,15 @@ static int fbnic_fw_parse_cap_resp(void *opaque, struct fbnic_tlv_msg **results) if (!fbd->fw_cap.running.mgmt.version) return -EINVAL; - if (fbd->fw_cap.running.mgmt.version < MIN_FW_VERSION_CODE) { + if (fbd->fw_cap.running.mgmt.version < MIN_FW_VER_CODE) { + char required_ver[FBNIC_FW_VER_MAX_SIZE]; char running_ver[FBNIC_FW_VER_MAX_SIZE]; fbnic_mk_fw_ver_str(fbd->fw_cap.running.mgmt.version, running_ver); - dev_err(fbd->dev, "Device firmware version(%s) is older than minimum required version(%02d.%02d.%02d)\n", - running_ver, - MIN_FW_MAJOR_VERSION, - MIN_FW_MINOR_VERSION, - MIN_FW_BUILD_VERSION); + fbnic_mk_fw_ver_str(MIN_FW_VER_CODE, required_ver); + dev_err(fbd->dev, "Device firmware version(%s) is older than minimum required version(%s)\n", + running_ver, required_ver); /* Disable TX mailbox to prevent card use until firmware is * updated. */ @@ -1035,6 +1034,169 @@ msg_err: return err; } +static const struct fbnic_tlv_index fbnic_fw_log_req_index[] = { + FBNIC_TLV_ATTR_U32(FBNIC_FW_LOG_MSEC), + FBNIC_TLV_ATTR_U64(FBNIC_FW_LOG_INDEX), + FBNIC_TLV_ATTR_STRING(FBNIC_FW_LOG_MSG, FBNIC_FW_LOG_MAX_SIZE), + FBNIC_TLV_ATTR_U32(FBNIC_FW_LOG_LENGTH), + FBNIC_TLV_ATTR_ARRAY(FBNIC_FW_LOG_MSEC_ARRAY), + FBNIC_TLV_ATTR_ARRAY(FBNIC_FW_LOG_INDEX_ARRAY), + FBNIC_TLV_ATTR_ARRAY(FBNIC_FW_LOG_MSG_ARRAY), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_process_log_array(struct fbnic_tlv_msg **results, + u16 length, u16 arr_type_idx, + u16 attr_type_idx, + struct fbnic_tlv_msg **tlv_array_out) +{ + struct fbnic_tlv_msg *attr; + int attr_len; + int err; + + if (!results[attr_type_idx]) + return -EINVAL; + + tlv_array_out[0] = results[attr_type_idx]; + + if (!length) + return 0; + + if (!results[arr_type_idx]) + return -EINVAL; + + attr = results[arr_type_idx]; + attr_len = le16_to_cpu(attr->hdr.len) / sizeof(u32) - 1; + err = fbnic_tlv_attr_parse_array(&attr[1], attr_len, &tlv_array_out[1], + fbnic_fw_log_req_index, + attr_type_idx, + length); + if (err) + return err; + + return 0; +} + +static int fbnic_fw_parse_logs(struct fbnic_dev *fbd, + struct fbnic_tlv_msg **msec_tlv, + struct fbnic_tlv_msg **index_tlv, + struct fbnic_tlv_msg **log_tlv, + int count) +{ + int i; + + for (i = 0; i < count; i++) { + char log[FBNIC_FW_LOG_MAX_SIZE]; + ssize_t len; + u64 index; + u32 msec; + int err; + + if (!msec_tlv[i] || !index_tlv[i] || !log_tlv[i]) { + dev_warn(fbd->dev, "Received log message with missing attributes!\n"); + return -EINVAL; + } + + index = fbnic_tlv_attr_get_signed(index_tlv[i], 0); + msec = fbnic_tlv_attr_get_signed(msec_tlv[i], 0); + len = fbnic_tlv_attr_get_string(log_tlv[i], log, + FBNIC_FW_LOG_MAX_SIZE); + if (len < 0) + return len; + + err = fbnic_fw_log_write(fbd, index, msec, log); + if (err) + return err; + } + + return 0; +} + +static int fbnic_fw_parse_log_req(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_tlv_msg *index_tlv[FBNIC_FW_MAX_LOG_HISTORY]; + struct fbnic_tlv_msg *msec_tlv[FBNIC_FW_MAX_LOG_HISTORY]; + struct fbnic_tlv_msg *log_tlv[FBNIC_FW_MAX_LOG_HISTORY]; + struct fbnic_dev *fbd = opaque; + u16 length; + int err; + + length = fta_get_uint(results, FBNIC_FW_LOG_LENGTH); + if (length >= FBNIC_FW_MAX_LOG_HISTORY) + return -E2BIG; + + err = fbnic_fw_process_log_array(results, length, + FBNIC_FW_LOG_MSEC_ARRAY, + FBNIC_FW_LOG_MSEC, msec_tlv); + if (err) + return err; + + err = fbnic_fw_process_log_array(results, length, + FBNIC_FW_LOG_INDEX_ARRAY, + FBNIC_FW_LOG_INDEX, index_tlv); + if (err) + return err; + + err = fbnic_fw_process_log_array(results, length, + FBNIC_FW_LOG_MSG_ARRAY, + FBNIC_FW_LOG_MSG, log_tlv); + if (err) + return err; + + err = fbnic_fw_parse_logs(fbd, msec_tlv, index_tlv, log_tlv, + length + 1); + if (err) + return err; + + return 0; +} + +int fbnic_fw_xmit_send_logs(struct fbnic_dev *fbd, bool enable, + bool send_log_history) +{ + struct fbnic_tlv_msg *msg; + int err; + + if (fbd->fw_cap.running.mgmt.version < MIN_FW_VER_CODE_LOG) { + dev_warn(fbd->dev, "Firmware version is too old to support firmware logs!\n"); + return -EOPNOTSUPP; + } + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_LOG_SEND_LOGS_REQ); + if (!msg) + return -ENOMEM; + + if (enable) { + err = fbnic_tlv_attr_put_flag(msg, FBNIC_SEND_LOGS); + if (err) + goto free_message; + + /* Report request for version 1 of logs */ + err = fbnic_tlv_attr_put_int(msg, FBNIC_SEND_LOGS_VERSION, + FBNIC_FW_LOG_VERSION); + if (err) + goto free_message; + + if (send_log_history) { + err = fbnic_tlv_attr_put_flag(msg, + FBNIC_SEND_LOGS_HISTORY); + if (err) + goto free_message; + } + } + + err = fbnic_mbx_map_tlv_msg(fbd, msg); + if (err) + goto free_message; + + return 0; + +free_message: + free_page((unsigned long)msg); + return err; +} + static const struct fbnic_tlv_parser fbnic_fw_tlv_parser[] = { FBNIC_TLV_PARSER(FW_CAP_RESP, fbnic_fw_cap_resp_index, fbnic_fw_parse_cap_resp), @@ -1054,6 +1216,9 @@ static const struct fbnic_tlv_parser fbnic_fw_tlv_parser[] = { FBNIC_TLV_PARSER(TSENE_READ_RESP, fbnic_tsene_read_resp_index, fbnic_fw_parse_tsene_read_resp), + FBNIC_TLV_PARSER(LOG_MSG_REQ, + fbnic_fw_log_req_index, + fbnic_fw_parse_log_req), FBNIC_TLV_MSG_ERROR }; @@ -1167,7 +1332,7 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) * to indicate we entered the polling state waiting for a response */ for (fbd->fw_cap.running.mgmt.version = 1; - fbd->fw_cap.running.mgmt.version < MIN_FW_VERSION_CODE;) { + fbd->fw_cap.running.mgmt.version < MIN_FW_VER_CODE;) { if (!tx_mbx->ready) err = -ENODEV; if (err) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index 555b231b38c1..fde331696fdd 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -22,7 +22,20 @@ struct fbnic_fw_mbx { #define FBNIC_FW_VER_MAX_SIZE 32 // Formatted version is in the format XX.YY.ZZ_RRR_COMMIT #define FBNIC_FW_CAP_RESP_COMMIT_MAX_SIZE (FBNIC_FW_VER_MAX_SIZE - 13) +#define FBNIC_FW_LOG_VERSION 1 #define FBNIC_FW_LOG_MAX_SIZE 256 +/* + * The max amount of logs which can fit in a single mailbox message. Firmware + * assumes each mailbox message is 4096B. The amount of messages supported is + * calculated as 4096 minus headers for message, arrays, and length minus the + * size of length divided by headers for each array plus the maximum LOG size, + * and the size of MSEC and INDEX. Put another way: + * + * MAX_LOG_HISTORY = ((4096 - TLV_HDR_SZ * 5 - LENGTH_SZ) + * / (FBNIC_FW_LOG_MAX_SIZE + TLV_HDR_SZ * 3 + MSEC_SZ + * + INDEX_SZ)) + */ +#define FBNIC_FW_MAX_LOG_HISTORY 14 struct fbnic_fw_ver { u32 version; @@ -82,6 +95,8 @@ int fbnic_fw_xmit_fw_write_chunk(struct fbnic_dev *fbd, int cancel_error); int fbnic_fw_xmit_tsene_read_msg(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data); +int fbnic_fw_xmit_send_logs(struct fbnic_dev *fbd, bool enable, + bool send_log_history); struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type); void fbnic_fw_put_cmpl(struct fbnic_fw_completion *cmpl_data); @@ -125,6 +140,9 @@ enum { FBNIC_TLV_MSG_ID_FW_FINISH_UPGRADE_RESP = 0x29, FBNIC_TLV_MSG_ID_TSENE_READ_REQ = 0x3C, FBNIC_TLV_MSG_ID_TSENE_READ_RESP = 0x3D, + FBNIC_TLV_MSG_ID_LOG_SEND_LOGS_REQ = 0x43, + FBNIC_TLV_MSG_ID_LOG_MSG_REQ = 0x44, + FBNIC_TLV_MSG_ID_LOG_MSG_RESP = 0x45, }; #define FBNIC_FW_CAP_RESP_VERSION_MAJOR CSR_GENMASK(31, 24) @@ -199,4 +217,22 @@ enum { FBNIC_FW_FINISH_UPGRADE_MSG_MAX }; +enum { + FBNIC_SEND_LOGS = 0x0, + FBNIC_SEND_LOGS_VERSION = 0x1, + FBNIC_SEND_LOGS_HISTORY = 0x2, + FBNIC_SEND_LOGS_MSG_MAX +}; + +enum { + FBNIC_FW_LOG_MSEC = 0x0, + FBNIC_FW_LOG_INDEX = 0x1, + FBNIC_FW_LOG_MSG = 0x2, + FBNIC_FW_LOG_LENGTH = 0x3, + FBNIC_FW_LOG_MSEC_ARRAY = 0x4, + FBNIC_FW_LOG_INDEX_ARRAY = 0x5, + FBNIC_FW_LOG_MSG_ARRAY = 0x6, + FBNIC_FW_LOG_MSG_MAX +}; + #endif /* _FBNIC_FW_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c new file mode 100644 index 000000000000..38749d47cee6 --- /dev/null +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) Meta Platforms, Inc. and affiliates. */ + +#include <linux/spinlock.h> +#include <linux/vmalloc.h> + +#include "fbnic.h" +#include "fbnic_fw.h" +#include "fbnic_fw_log.h" + +void fbnic_fw_log_enable(struct fbnic_dev *fbd, bool send_hist) +{ + int err; + + if (!fbnic_fw_log_ready(fbd)) + return; + + if (fbd->fw_cap.running.mgmt.version < MIN_FW_VER_CODE_HIST) + send_hist = false; + + err = fbnic_fw_xmit_send_logs(fbd, true, send_hist); + if (err && err != -EOPNOTSUPP) + dev_warn(fbd->dev, "Unable to enable firmware logs: %d\n", err); +} + +void fbnic_fw_log_disable(struct fbnic_dev *fbd) +{ + int err; + + err = fbnic_fw_xmit_send_logs(fbd, false, false); + if (err && err != -EOPNOTSUPP) + dev_warn(fbd->dev, "Unable to disable firmware logs: %d\n", + err); +} + +int fbnic_fw_log_init(struct fbnic_dev *fbd) +{ + struct fbnic_fw_log *log = &fbd->fw_log; + void *data; + + if (WARN_ON_ONCE(fbnic_fw_log_ready(fbd))) + return -EEXIST; + + data = vmalloc(FBNIC_FW_LOG_SIZE); + if (!data) + return -ENOMEM; + + spin_lock_init(&fbd->fw_log.lock); + INIT_LIST_HEAD(&log->entries); + log->size = FBNIC_FW_LOG_SIZE; + log->data_start = data; + log->data_end = data + FBNIC_FW_LOG_SIZE; + + fbnic_fw_log_enable(fbd, true); + + return 0; +} + +void fbnic_fw_log_free(struct fbnic_dev *fbd) +{ + struct fbnic_fw_log *log = &fbd->fw_log; + + if (!fbnic_fw_log_ready(fbd)) + return; + + fbnic_fw_log_disable(fbd); + INIT_LIST_HEAD(&log->entries); + log->size = 0; + vfree(log->data_start); + log->data_start = NULL; + log->data_end = NULL; +} + +int fbnic_fw_log_write(struct fbnic_dev *fbd, u64 index, u32 timestamp, + char *msg) +{ + struct fbnic_fw_log_entry *entry, *head, *tail, *next; + struct fbnic_fw_log *log = &fbd->fw_log; + size_t msg_len = strlen(msg) + 1; + unsigned long flags; + void *entry_end; + + if (!fbnic_fw_log_ready(fbd)) { + dev_err(fbd->dev, "Firmware sent log entry without being requested!\n"); + return -ENOSPC; + } + + spin_lock_irqsave(&log->lock, flags); + + if (list_empty(&log->entries)) { + entry = log->data_start; + } else { + head = list_first_entry(&log->entries, typeof(*head), list); + entry = (struct fbnic_fw_log_entry *)&head->msg[head->len + 1]; + entry = PTR_ALIGN(entry, 8); + } + + entry_end = &entry->msg[msg_len + 1]; + + /* We've reached the end of the buffer, wrap around */ + if (entry_end > log->data_end) { + entry = log->data_start; + entry_end = &entry->msg[msg_len + 1]; + } + + /* Make room for entry by removing from tail. */ + list_for_each_entry_safe_reverse(tail, next, &log->entries, list) { + if (entry <= tail && entry_end > (void *)tail) + list_del(&tail->list); + else + break; + } + + entry->index = index; + entry->timestamp = timestamp; + entry->len = msg_len; + strscpy(entry->msg, msg, entry->len); + list_add(&entry->list, &log->entries); + + spin_unlock_irqrestore(&log->lock, flags); + + return 0; +} diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h new file mode 100644 index 000000000000..cb6555f40a24 --- /dev/null +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Meta Platforms, Inc. and affiliates. */ + +#ifndef _FBNIC_FW_LOG_H_ +#define _FBNIC_FW_LOG_H_ + +#include <linux/spinlock.h> +#include <linux/types.h> + +/* A 512K log buffer was chosen fairly arbitrarily */ +#define FBNIC_FW_LOG_SIZE (512 * 1024) /* bytes */ + +/* Firmware log output is prepended with log index followed by a timestamp. + * The timestamp is similar to Zephyr's format DD:HH:MM:SS.MMM + */ +#define FBNIC_FW_LOG_FMT "[%5lld] [%02ld:%02ld:%02ld:%02ld.%03ld] %s\n" + +struct fbnic_dev; + +struct fbnic_fw_log_entry { + struct list_head list; + u64 index; + u32 timestamp; + u16 len; + char msg[] __counted_by(len); +}; + +struct fbnic_fw_log { + void *data_start; + void *data_end; + size_t size; + struct list_head entries; + /* Spin lock for accessing or modifying entries */ + spinlock_t lock; +}; + +#define fbnic_fw_log_ready(_fbd) (!!(_fbd)->fw_log.data_start) + +void fbnic_fw_log_enable(struct fbnic_dev *fbd, bool send_hist); +void fbnic_fw_log_disable(struct fbnic_dev *fbd); +int fbnic_fw_log_init(struct fbnic_dev *fbd); +void fbnic_fw_log_free(struct fbnic_dev *fbd); +int fbnic_fw_log_write(struct fbnic_dev *fbd, u64 index, u32 timestamp, + char *msg); +#endif /* _FBNIC_FW_LOG_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 249d3ef862d5..b70e4cadb37b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -291,6 +291,17 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto free_irqs; } + /* Send the request to enable the FW logging to host. Note if this + * fails we ignore the error and just display a message as it is + * possible the FW is just too old to support the logging and needs + * to be updated. + */ + err = fbnic_fw_log_init(fbd); + if (err) + dev_warn(fbd->dev, + "Unable to initialize firmware log buffer: %d\n", + err); + fbnic_devlink_register(fbd); fbnic_dbg_fbd_init(fbd); spin_lock_init(&fbd->hw_stats_lock); @@ -365,6 +376,7 @@ static void fbnic_remove(struct pci_dev *pdev) fbnic_hwmon_unregister(fbd); fbnic_dbg_fbd_exit(fbd); fbnic_devlink_unregister(fbd); + fbnic_fw_log_free(fbd); fbnic_fw_free_mbx(fbd); fbnic_free_irqs(fbd); @@ -389,6 +401,8 @@ static int fbnic_pm_suspend(struct device *dev) rtnl_unlock(); null_uc_addr: + fbnic_fw_log_disable(fbd); + devl_lock(priv_to_devlink(fbd)); fbnic_fw_free_mbx(fbd); @@ -434,6 +448,11 @@ static int __fbnic_pm_resume(struct device *dev) devl_unlock(priv_to_devlink(fbd)); + /* Only send log history if log buffer is empty to prevent duplicate + * log entries. + */ + fbnic_fw_log_enable(fbd, list_empty(&fbd->fw_log.entries)); + /* No netdev means there isn't a network interface to bring up */ if (fbnic_init_failure(fbd)) return 0; @@ -455,6 +474,8 @@ static int __fbnic_pm_resume(struct device *dev) return 0; err_free_mbx: + fbnic_fw_log_disable(fbd); + rtnl_unlock(); fbnic_fw_free_mbx(fbd); err_free_irqs: diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index a468cd8e5f36..d6c0699bc8cf 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -657,7 +657,7 @@ static int mana_gd_register_irq(struct gdma_queue *queue, return 0; } -static void mana_gd_deregiser_irq(struct gdma_queue *queue) +static void mana_gd_deregister_irq(struct gdma_queue *queue) { struct gdma_dev *gd = queue->gdma_dev; struct gdma_irq_context *gic; @@ -750,7 +750,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); } - mana_gd_deregiser_irq(queue); + mana_gd_deregister_irq(queue); if (queue->eq.disable_needed) mana_gd_disable_queue(queue); diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 6b3f7fca8d15..05c4b6c8c9c3 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -1259,7 +1259,12 @@ static int rtsn_probe(struct platform_device *pdev) priv = netdev_priv(ndev); priv->pdev = pdev; priv->ndev = ndev; + priv->ptp_priv = rcar_gen4_ptp_alloc(pdev); + if (!priv->ptp_priv) { + ret = -ENOMEM; + goto error_free; + } spin_lock_init(&priv->lock); platform_set_drvdata(pdev, priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sophgo.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sophgo.c index 3303784cbbf8..3b7947a7a7ba 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sophgo.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sophgo.c @@ -54,6 +54,7 @@ static int sophgo_dwmac_probe(struct platform_device *pdev) } static const struct of_device_id sophgo_dwmac_match[] = { + { .compatible = "sophgo,sg2042-dwmac" }, { .compatible = "sophgo,sg2044-dwmac" }, { /* sentinel */ } }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 7840bc403788..5dcc95bc0ad2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -364,19 +364,17 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, } /* TX/RX NORMAL interrupts */ - if (likely(intr_status & XGMAC_NIS)) { - if (likely(intr_status & XGMAC_RI)) { - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->rx_normal_irq_n[chan]); - u64_stats_update_end(&stats->syncp); - ret |= handle_rx; - } - if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->tx_normal_irq_n[chan]); - u64_stats_update_end(&stats->syncp); - ret |= handle_tx; - } + if (likely(intr_status & XGMAC_RI)) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); + ret |= handle_rx; + } + if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->tx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); + ret |= handle_tx; } /* Clear interrupts */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 4164b3a580d8..38b1c04c92a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -410,6 +410,7 @@ static const char * const stmmac_gmac4_compats[] = { "snps,dwmac-4.00", "snps,dwmac-4.10a", "snps,dwmac-4.20a", + "snps,dwmac-5.00a", "snps,dwmac-5.10a", "snps,dwmac-5.20", "snps,dwmac-5.30a", diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 75d7e10944d4..67625fb12101 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3336,7 +3336,7 @@ static int niu_rbr_add_page(struct niu *np, struct rx_ring_info *rp, addr = np->ops->map_page(np->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (!addr) { + if (np->ops->mapping_error(np->device, addr)) { __free_page(page); return -ENOMEM; } @@ -6676,6 +6676,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, len = skb_headlen(skb); mapping = np->ops->map_single(np->device, skb->data, len, DMA_TO_DEVICE); + if (np->ops->mapping_error(np->device, mapping)) + goto out_drop; prod = rp->prod; @@ -6717,6 +6719,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, mapping = np->ops->map_page(np->device, skb_frag_page(frag), skb_frag_off(frag), len, DMA_TO_DEVICE); + if (np->ops->mapping_error(np->device, mapping)) + goto out_unmap; rp->tx_buffs[prod].skb = NULL; rp->tx_buffs[prod].mapping = mapping; @@ -6741,6 +6745,19 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, out: return NETDEV_TX_OK; +out_unmap: + while (i--) { + const skb_frag_t *frag; + + prod = PREVIOUS_TX(rp, prod); + frag = &skb_shinfo(skb)->frags[i]; + np->ops->unmap_page(np->device, rp->tx_buffs[prod].mapping, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + np->ops->unmap_single(np->device, rp->tx_buffs[rp->prod].mapping, + skb_headlen(skb), DMA_TO_DEVICE); + out_drop: rp->tx_errors++; kfree_skb(skb); @@ -9645,6 +9662,11 @@ static void niu_pci_unmap_single(struct device *dev, u64 dma_address, dma_unmap_single(dev, dma_address, size, direction); } +static int niu_pci_mapping_error(struct device *dev, u64 addr) +{ + return dma_mapping_error(dev, addr); +} + static const struct niu_ops niu_pci_ops = { .alloc_coherent = niu_pci_alloc_coherent, .free_coherent = niu_pci_free_coherent, @@ -9652,6 +9674,7 @@ static const struct niu_ops niu_pci_ops = { .unmap_page = niu_pci_unmap_page, .map_single = niu_pci_map_single, .unmap_single = niu_pci_unmap_single, + .mapping_error = niu_pci_mapping_error, }; static void niu_driver_version(void) @@ -10020,6 +10043,11 @@ static void niu_phys_unmap_single(struct device *dev, u64 dma_address, /* Nothing to do. */ } +static int niu_phys_mapping_error(struct device *dev, u64 dma_address) +{ + return false; +} + static const struct niu_ops niu_phys_ops = { .alloc_coherent = niu_phys_alloc_coherent, .free_coherent = niu_phys_free_coherent, @@ -10027,6 +10055,7 @@ static const struct niu_ops niu_phys_ops = { .unmap_page = niu_phys_unmap_page, .map_single = niu_phys_map_single, .unmap_single = niu_phys_unmap_single, + .mapping_error = niu_phys_mapping_error, }; static int niu_of_probe(struct platform_device *op) diff --git a/drivers/net/ethernet/sun/niu.h b/drivers/net/ethernet/sun/niu.h index 04c215f91fc0..0b169c08b0f2 100644 --- a/drivers/net/ethernet/sun/niu.h +++ b/drivers/net/ethernet/sun/niu.h @@ -2879,6 +2879,9 @@ struct tx_ring_info { #define NEXT_TX(tp, index) \ (((index) + 1) < (tp)->pending ? ((index) + 1) : 0) +#define PREVIOUS_TX(tp, index) \ + (((index) - 1) >= 0 ? ((index) - 1) : (((tp)->pending) - 1)) + static inline u32 niu_tx_avail(struct tx_ring_info *tp) { return (tp->pending - @@ -3140,6 +3143,7 @@ struct niu_ops { enum dma_data_direction direction); void (*unmap_single)(struct device *dev, u64 dma_address, size_t size, enum dma_data_direction direction); + int (*mapping_error)(struct device *dev, u64 dma_address); }; struct niu_link_config { diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 519757e618ad..ecd6ecac87bb 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -856,8 +856,6 @@ static struct sk_buff *am65_cpsw_build_skb(void *page_addr, { struct sk_buff *skb; - len += AM65_CPSW_HEADROOM; - skb = build_skb(page_addr, len); if (unlikely(!skb)) return NULL; @@ -1344,7 +1342,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, } skb = am65_cpsw_build_skb(page_addr, ndev, - AM65_CPSW_MAX_PACKET_SIZE, headroom); + PAGE_SIZE, headroom); if (unlikely(!skb)) { new_page = page; goto requeue; diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index e5fc942c28cc..c548f4e80565 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -64,4 +64,37 @@ config TXGBE To compile this driver as a module, choose M here. The module will be called txgbe. +config TXGBEVF + tristate "Wangxun(R) 10/25/40G Virtual Function Ethernet support" + depends on PCI + depends on PCI_MSI + select LIBWX + select PHYLINK + help + This driver supports virtual functions for SP1000A, WX1820AL, + WX5XXX, WX5XXXAL. + + This driver was formerly named txgbevf. + + More specific information on configuring the driver is in + <file:Documentation/networking/device_drivers/ethernet/wangxun/txgbevf.rst>. + + To compile this driver as a module, choose M here. MSI-X interrupt + support is required for this driver to work correctly. + +config NGBEVF + tristate "Wangxun(R) GbE Virtual Function Ethernet support" + depends on PCI_MSI + select LIBWX + help + This driver supports virtual functions for WX1860, WX1860AL. + + This driver was formerly named ngbevf. + + More specific information on configuring the driver is in + <file:Documentation/networking/device_drivers/ethernet/wangxun/ngbevf.rst>. + + To compile this driver as a module, choose M here. MSI-X interrupt + support is required for this driver to work correctly. + endif # NET_VENDOR_WANGXUN diff --git a/drivers/net/ethernet/wangxun/Makefile b/drivers/net/ethernet/wangxun/Makefile index ca19311dbe38..0a71a710b717 100644 --- a/drivers/net/ethernet/wangxun/Makefile +++ b/drivers/net/ethernet/wangxun/Makefile @@ -5,4 +5,6 @@ obj-$(CONFIG_LIBWX) += libwx/ obj-$(CONFIG_TXGBE) += txgbe/ +obj-$(CONFIG_TXGBEVF) += txgbevf/ obj-$(CONFIG_NGBE) += ngbe/ +obj-$(CONFIG_NGBEVF) += ngbevf/ diff --git a/drivers/net/ethernet/wangxun/libwx/Makefile b/drivers/net/ethernet/wangxun/libwx/Makefile index 9b78b604a94e..a71b0ad77de3 100644 --- a/drivers/net/ethernet/wangxun/libwx/Makefile +++ b/drivers/net/ethernet/wangxun/libwx/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_LIBWX) += libwx.o libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o wx_mbx.o wx_sriov.o +libwx-objs += wx_vf.o wx_vf_lib.o wx_vf_common.o diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 0f4be72116b8..58e9d6a38802 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -11,6 +11,7 @@ #include "wx_type.h" #include "wx_lib.h" #include "wx_sriov.h" +#include "wx_vf.h" #include "wx_hw.h" static int wx_phy_read_reg_mdi(struct mii_bus *bus, int phy_addr, int devnum, int regnum) @@ -124,6 +125,11 @@ void wx_intr_enable(struct wx *wx, u64 qmask) { u32 mask; + if (wx->pdev->is_virtfn) { + wr32(wx, WX_VXIMC, qmask); + return; + } + mask = (qmask & U32_MAX); if (mask) wr32(wx, WX_PX_IMC(0), mask); @@ -1107,7 +1113,7 @@ static int wx_write_uc_addr_list(struct net_device *netdev, int pool) * by the MO field of the MCSTCTRL. The MO field is set during initialization * to mc_filter_type. **/ -static u32 wx_mta_vector(struct wx *wx, u8 *mc_addr) +u32 wx_mta_vector(struct wx *wx, u8 *mc_addr) { u32 vector = 0; @@ -1827,7 +1833,7 @@ void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring) } EXPORT_SYMBOL(wx_disable_rx_queue); -static void wx_enable_rx_queue(struct wx *wx, struct wx_ring *ring) +void wx_enable_rx_queue(struct wx *wx, struct wx_ring *ring) { u8 reg_idx = ring->reg_idx; u32 rxdctl; @@ -1843,6 +1849,7 @@ static void wx_enable_rx_queue(struct wx *wx, struct wx_ring *ring) reg_idx); } } +EXPORT_SYMBOL(wx_enable_rx_queue); static void wx_configure_srrctl(struct wx *wx, struct wx_ring *rx_ring) @@ -2368,7 +2375,8 @@ int wx_sw_init(struct wx *wx) wx->bus.device = PCI_SLOT(pdev->devfn); wx->bus.func = PCI_FUNC(pdev->devfn); - if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN) { + if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || + pdev->is_virtfn) { wx->subsystem_vendor_id = pdev->subsystem_vendor; wx->subsystem_device_id = pdev->subsystem_device; } else { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index 26a56cba60b9..2393a743b564 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -29,6 +29,7 @@ void wx_mac_set_default_filter(struct wx *wx, u8 *addr); int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool); int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool); void wx_flush_sw_mac_table(struct wx *wx); +u32 wx_mta_vector(struct wx *wx, u8 *mc_addr); int wx_set_mac(struct net_device *netdev, void *p); void wx_disable_rx(struct wx *wx); int wx_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); @@ -37,6 +38,7 @@ void wx_enable_sec_rx_path(struct wx *wx); void wx_set_rx_mode(struct net_device *netdev); int wx_change_mtu(struct net_device *netdev, int new_mtu); void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring); +void wx_enable_rx_queue(struct wx *wx, struct wx_ring *ring); void wx_configure_rx(struct wx *wx); void wx_configure(struct wx *wx); void wx_start_hw(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index c57cc4f27249..0e76be1c8154 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1705,6 +1705,7 @@ static void wx_set_rss_queues(struct wx *wx) clear_bit(WX_FLAG_FDIR_HASH, wx->flags); + wx->ring_feature[RING_F_FDIR].indices = 1; /* Use Flow Director in addition to RSS to ensure the best * distribution of flows across cores, even when an FDIR flow * isn't matched. @@ -1746,7 +1747,7 @@ static void wx_set_num_queues(struct wx *wx) */ static int wx_acquire_msix_vectors(struct wx *wx) { - struct irq_affinity affd = { .pre_vectors = 1 }; + struct irq_affinity affd = { .post_vectors = 1 }; int nvecs, i; /* We start by asking for one vector per queue pair */ @@ -1783,16 +1784,24 @@ static int wx_acquire_msix_vectors(struct wx *wx) return nvecs; } - wx->msix_entry->entry = 0; - wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); nvecs -= 1; for (i = 0; i < nvecs; i++) { wx->msix_q_entries[i].entry = i; - wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i + 1); + wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i); } wx->num_q_vectors = nvecs; + wx->msix_entry->entry = nvecs; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, nvecs); + + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) { + wx->msix_entry->entry = 0; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); + wx->msix_q_entries[0].entry = 0; + wx->msix_q_entries[0].vector = pci_irq_vector(wx->pdev, 1); + } + return 0; } @@ -1810,7 +1819,7 @@ static int wx_set_interrupt_capability(struct wx *wx) /* We will try to get MSI-X interrupts first */ ret = wx_acquire_msix_vectors(wx); - if (ret == 0 || (ret == -ENOMEM)) + if (ret == 0 || (ret == -ENOMEM) || pdev->is_virtfn) return ret; /* Disable VMDq support */ @@ -2161,7 +2170,12 @@ int wx_init_interrupt_scheme(struct wx *wx) int ret; /* Number of supported queues */ - wx_set_num_queues(wx); + if (wx->pdev->is_virtfn) { + if (wx->set_num_queues) + wx->set_num_queues(wx); + } else { + wx_set_num_queues(wx); + } /* Set interrupt mode */ ret = wx_set_interrupt_capability(wx); @@ -2291,6 +2305,8 @@ static void wx_set_ivar(struct wx *wx, s8 direction, if (direction == -1) { /* other causes */ + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) + msix_vector = 0; msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = 0; ivar = rd32(wx, WX_PX_MISC_IVAR); @@ -2299,8 +2315,6 @@ static void wx_set_ivar(struct wx *wx, s8 direction, wr32(wx, WX_PX_MISC_IVAR, ivar); } else { /* tx or rx causes */ - if (!(wx->mac.type == wx_mac_em && wx->num_vfs == 7)) - msix_vector += 1; /* offset for queue vectors */ msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = ((16 * (queue & 1)) + (8 * direction)); ivar = rd32(wx, WX_PX_IVAR(queue >> 1)); @@ -2339,7 +2353,7 @@ void wx_write_eitr(struct wx_q_vector *q_vector) itr_reg |= WX_PX_ITR_CNT_WDIS; - wr32(wx, WX_PX_ITR(v_idx + 1), itr_reg); + wr32(wx, WX_PX_ITR(v_idx), itr_reg); } /** @@ -2392,9 +2406,9 @@ void wx_configure_vectors(struct wx *wx) wx_write_eitr(q_vector); } - wx_set_ivar(wx, -1, 0, 0); + wx_set_ivar(wx, -1, 0, v_idx); if (pdev->msix_enabled) - wr32(wx, WX_PX_ITR(0), 1950); + wr32(wx, WX_PX_ITR(v_idx), 1950); } EXPORT_SYMBOL(wx_configure_vectors); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_mbx.c b/drivers/net/ethernet/wangxun/libwx/wx_mbx.c index 73af5f11c3bd..2aa03eadf064 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_mbx.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_mbx.c @@ -174,3 +174,246 @@ int wx_check_for_rst_pf(struct wx *wx, u16 vf) return 0; } + +static u32 wx_read_v2p_mailbox(struct wx *wx) +{ + u32 mailbox = rd32(wx, WX_VXMAILBOX); + + mailbox |= wx->mbx.mailbox; + wx->mbx.mailbox |= mailbox & WX_VXMAILBOX_R2C_BITS; + + return mailbox; +} + +static u32 wx_mailbox_get_lock_vf(struct wx *wx) +{ + wr32(wx, WX_VXMAILBOX, WX_VXMAILBOX_VFU); + return wx_read_v2p_mailbox(wx); +} + +/** + * wx_obtain_mbx_lock_vf - obtain mailbox lock + * @wx: pointer to the HW structure + * + * Return: return 0 on success and -EBUSY on failure + **/ +static int wx_obtain_mbx_lock_vf(struct wx *wx) +{ + int count = 5, ret; + u32 mailbox; + + ret = readx_poll_timeout_atomic(wx_mailbox_get_lock_vf, wx, mailbox, + (mailbox & WX_VXMAILBOX_VFU), + 1, count); + if (ret) + wx_err(wx, "Failed to obtain mailbox lock for VF.\n"); + + return ret; +} + +static int wx_check_for_bit_vf(struct wx *wx, u32 mask) +{ + u32 mailbox = wx_read_v2p_mailbox(wx); + + wx->mbx.mailbox &= ~mask; + + return (mailbox & mask ? 0 : -EBUSY); +} + +/** + * wx_check_for_ack_vf - checks to see if the PF has ACK'd + * @wx: pointer to the HW structure + * + * Return: return 0 if the PF has set the status bit or else -EBUSY + **/ +static int wx_check_for_ack_vf(struct wx *wx) +{ + /* read clear the pf ack bit */ + return wx_check_for_bit_vf(wx, WX_VXMAILBOX_PFACK); +} + +/** + * wx_check_for_msg_vf - checks to see if the PF has sent mail + * @wx: pointer to the HW structure + * + * Return: return 0 if the PF has got req bit or else -EBUSY + **/ +int wx_check_for_msg_vf(struct wx *wx) +{ + /* read clear the pf sts bit */ + return wx_check_for_bit_vf(wx, WX_VXMAILBOX_PFSTS); +} + +/** + * wx_check_for_rst_vf - checks to see if the PF has reset + * @wx: pointer to the HW structure + * + * Return: return 0 if the PF has set the reset done and -EBUSY on failure + **/ +int wx_check_for_rst_vf(struct wx *wx) +{ + /* read clear the pf reset done bit */ + return wx_check_for_bit_vf(wx, + WX_VXMAILBOX_RSTD | + WX_VXMAILBOX_RSTI); +} + +/** + * wx_poll_for_msg - Wait for message notification + * @wx: pointer to the HW structure + * + * Return: return 0 if the VF has successfully received a message notification + **/ +static int wx_poll_for_msg(struct wx *wx) +{ + struct wx_mbx_info *mbx = &wx->mbx; + u32 val; + + return readx_poll_timeout_atomic(wx_check_for_msg_vf, wx, val, + (val == 0), mbx->udelay, mbx->timeout); +} + +/** + * wx_poll_for_ack - Wait for message acknowledgment + * @wx: pointer to the HW structure + * + * Return: return 0 if the VF has successfully received a message ack + **/ +static int wx_poll_for_ack(struct wx *wx) +{ + struct wx_mbx_info *mbx = &wx->mbx; + u32 val; + + return readx_poll_timeout_atomic(wx_check_for_ack_vf, wx, val, + (val == 0), mbx->udelay, mbx->timeout); +} + +/** + * wx_read_posted_mbx - Wait for message notification and receive message + * @wx: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * + * Return: returns 0 if it successfully received a message notification and + * copied it into the receive buffer. + **/ +int wx_read_posted_mbx(struct wx *wx, u32 *msg, u16 size) +{ + int ret; + + ret = wx_poll_for_msg(wx); + /* if ack received read message, otherwise we timed out */ + if (ret) + return ret; + + return wx_read_mbx_vf(wx, msg, size); +} + +/** + * wx_write_posted_mbx - Write a message to the mailbox, wait for ack + * @wx: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * + * Return: returns 0 if it successfully copied message into the buffer and + * received an ack to that message within delay * timeout period + **/ +int wx_write_posted_mbx(struct wx *wx, u32 *msg, u16 size) +{ + int ret; + + /* send msg */ + ret = wx_write_mbx_vf(wx, msg, size); + /* if msg sent wait until we receive an ack */ + if (ret) + return ret; + + return wx_poll_for_ack(wx); +} + +/** + * wx_write_mbx_vf - Write a message to the mailbox + * @wx: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * + * Return: returns 0 if it successfully copied message into the buffer + **/ +int wx_write_mbx_vf(struct wx *wx, u32 *msg, u16 size) +{ + struct wx_mbx_info *mbx = &wx->mbx; + int ret, i; + + /* mbx->size is up to 15 */ + if (size > mbx->size) { + wx_err(wx, "Invalid mailbox message size %d", size); + return -EINVAL; + } + + /* lock the mailbox to prevent pf/vf race condition */ + ret = wx_obtain_mbx_lock_vf(wx); + if (ret) + return ret; + + /* flush msg and acks as we are overwriting the message buffer */ + wx_check_for_msg_vf(wx); + wx_check_for_ack_vf(wx); + + /* copy the caller specified message to the mailbox memory buffer */ + for (i = 0; i < size; i++) + wr32a(wx, WX_VXMBMEM, i, msg[i]); + + /* Drop VFU and interrupt the PF to tell it a message has been sent */ + wr32(wx, WX_VXMAILBOX, WX_VXMAILBOX_REQ); + + return 0; +} + +/** + * wx_read_mbx_vf - Reads a message from the inbox intended for vf + * @wx: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * + * Return: returns 0 if it successfully copied message into the buffer + **/ +int wx_read_mbx_vf(struct wx *wx, u32 *msg, u16 size) +{ + struct wx_mbx_info *mbx = &wx->mbx; + int ret, i; + + /* limit read to size of mailbox and mbx->size is up to 15 */ + if (size > mbx->size) + size = mbx->size; + + /* lock the mailbox to prevent pf/vf race condition */ + ret = wx_obtain_mbx_lock_vf(wx); + if (ret) + return ret; + + /* copy the message from the mailbox memory buffer */ + for (i = 0; i < size; i++) + msg[i] = rd32a(wx, WX_VXMBMEM, i); + + /* Acknowledge receipt and release mailbox, then we're done */ + wr32(wx, WX_VXMAILBOX, WX_VXMAILBOX_ACK); + + return 0; +} + +int wx_init_mbx_params_vf(struct wx *wx) +{ + wx->vfinfo = kzalloc(sizeof(struct vf_data_storage), + GFP_KERNEL); + if (!wx->vfinfo) + return -ENOMEM; + + /* Initialize mailbox parameters */ + wx->mbx.size = WX_VXMAILBOX_SIZE; + wx->mbx.mailbox = WX_VXMAILBOX; + wx->mbx.udelay = 10; + wx->mbx.timeout = 1000; + + return 0; +} +EXPORT_SYMBOL(wx_init_mbx_params_vf); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_mbx.h b/drivers/net/ethernet/wangxun/libwx/wx_mbx.h index 05aae138dbc3..82df9218490a 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_mbx.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_mbx.h @@ -11,6 +11,20 @@ #define WX_PXMAILBOX_ACK BIT(1) /* Ack message recv'd from VF */ #define WX_PXMAILBOX_PFU BIT(3) /* PF owns the mailbox buffer */ +/* VF Registers */ +#define WX_VXMAILBOX 0x600 +#define WX_VXMAILBOX_REQ BIT(0) /* Request for PF Ready bit */ +#define WX_VXMAILBOX_ACK BIT(1) /* Ack PF message received */ +#define WX_VXMAILBOX_VFU BIT(2) /* VF owns the mailbox buffer */ +#define WX_VXMAILBOX_PFU BIT(3) /* PF owns the mailbox buffer */ +#define WX_VXMAILBOX_PFSTS BIT(4) /* PF wrote a message in the MB */ +#define WX_VXMAILBOX_PFACK BIT(5) /* PF ack the previous VF msg */ +#define WX_VXMAILBOX_RSTI BIT(6) /* PF has reset indication */ +#define WX_VXMAILBOX_RSTD BIT(7) /* PF has indicated reset done */ +#define WX_VXMAILBOX_R2C_BITS (WX_VXMAILBOX_RSTD | \ + WX_VXMAILBOX_PFSTS | WX_VXMAILBOX_PFACK) + +#define WX_VXMBMEM 0x00C00 /* 16*4B */ #define WX_PXMBMEM(i) (0x5000 + (64 * (i))) /* i=[0,63] */ #define WX_VFLRE(i) (0x4A0 + (4 * (i))) /* i=[0,1] */ @@ -74,4 +88,12 @@ int wx_check_for_rst_pf(struct wx *wx, u16 mbx_id); int wx_check_for_msg_pf(struct wx *wx, u16 mbx_id); int wx_check_for_ack_pf(struct wx *wx, u16 mbx_id); +int wx_read_posted_mbx(struct wx *wx, u32 *msg, u16 size); +int wx_write_posted_mbx(struct wx *wx, u32 *msg, u16 size); +int wx_check_for_rst_vf(struct wx *wx); +int wx_check_for_msg_vf(struct wx *wx); +int wx_read_mbx_vf(struct wx *wx, u32 *msg, u16 size); +int wx_write_mbx_vf(struct wx *wx, u32 *msg, u16 size); +int wx_init_mbx_params_vf(struct wx *wx); + #endif /* _WX_MBX_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c index e8656d9d733b..c82ae137756c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -64,6 +64,7 @@ static void wx_sriov_clear_data(struct wx *wx) wr32m(wx, WX_PSR_VM_CTL, WX_PSR_VM_CTL_POOL_MASK, 0); wx->ring_feature[RING_F_VMDQ].offset = 0; + clear_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags); clear_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); /* Disable VMDq flag so device will be set in NM mode */ if (wx->ring_feature[RING_F_VMDQ].limit == 1) @@ -78,6 +79,9 @@ static int __wx_enable_sriov(struct wx *wx, u8 num_vfs) set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); dev_info(&wx->pdev->dev, "SR-IOV enabled with %d VFs\n", num_vfs); + if (num_vfs == 7 && wx->mac.type == wx_mac_em) + set_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags); + /* Enable VMDq flag so device will be set in VM mode */ set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags); if (!wx->ring_feature[RING_F_VMDQ].limit) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 7730c9fc3e02..42b0e65fe983 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -825,6 +825,11 @@ struct wx_bus_info { struct wx_mbx_info { u16 size; + u32 mailbox; + u32 udelay; + u32 timeout; + /* lock mbx access */ + spinlock_t mbx_lock; }; struct wx_thermal_sensor_data { @@ -1191,6 +1196,7 @@ enum wx_pf_flags { WX_FLAG_VMDQ_ENABLED, WX_FLAG_VLAN_PROMISC, WX_FLAG_SRIOV_ENABLED, + WX_FLAG_IRQ_VECTOR_SHARED, WX_FLAG_FDIR_CAPABLE, WX_FLAG_FDIR_HASH, WX_FLAG_FDIR_PERFECT, @@ -1200,6 +1206,8 @@ enum wx_pf_flags { WX_FLAG_PTP_PPS_ENABLED, WX_FLAG_NEED_LINK_CONFIG, WX_FLAG_NEED_SFP_RESET, + WX_FLAG_NEED_UPDATE_LINK, + WX_FLAG_NEED_DO_RESET, WX_PF_FLAGS_NBITS /* must be last */ }; @@ -1210,6 +1218,7 @@ struct wx { void *priv; u8 __iomem *hw_addr; + u8 __iomem *b4_addr; /* vf only */ struct pci_dev *pdev; struct net_device *netdev; struct wx_bus_info bus; @@ -1284,6 +1293,8 @@ struct wx { u32 *isb_mem; u32 isb_tag[WX_ISB_MAX]; bool misc_irq_domain; + u32 eims_other; + u32 eims_enable_mask; #define WX_MAX_RETA_ENTRIES 128 #define WX_RSS_INDIR_TBL_MAX 64 @@ -1315,6 +1326,7 @@ struct wx { int (*setup_tc)(struct net_device *netdev, u8 tc); void (*do_reset)(struct net_device *netdev); int (*ptp_setup_sdp)(struct wx *wx); + void (*set_num_queues)(struct wx *wx); bool pps_enabled; u64 pps_width; @@ -1343,7 +1355,7 @@ struct wx { }; #define WX_INTR_ALL (~0ULL) -#define WX_INTR_Q(i) BIT((i) + 1) +#define WX_INTR_Q(i) BIT((i)) /* register operations */ #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf.c b/drivers/net/ethernet/wangxun/libwx/wx_vf.c new file mode 100644 index 000000000000..7567216a005f --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#include <linux/etherdevice.h> +#include <linux/pci.h> + +#include "wx_type.h" +#include "wx_hw.h" +#include "wx_mbx.h" +#include "wx_vf.h" + +static void wx_virt_clr_reg(struct wx *wx) +{ + u32 vfsrrctl, i; + + /* VRSRRCTL default values (BSIZEPACKET = 2048, BSIZEHEADER = 256) */ + vfsrrctl = WX_VXRXDCTL_HDRSZ(wx_hdr_sz(WX_RX_HDR_SIZE)); + vfsrrctl |= WX_VXRXDCTL_BUFSZ(wx_buf_sz(WX_RX_BUF_SIZE)); + + /* clear all rxd ctl */ + for (i = 0; i < WX_VF_MAX_RING_NUMS; i++) + wr32m(wx, WX_VXRXDCTL(i), + WX_VXRXDCTL_HDRSZ_MASK | WX_VXRXDCTL_BUFSZ_MASK, + vfsrrctl); + + rd32(wx, WX_VXSTATUS); +} + +/** + * wx_init_hw_vf - virtual function hardware initialization + * @wx: pointer to hardware structure + * + * Initialize the mac address + **/ +void wx_init_hw_vf(struct wx *wx) +{ + wx_get_mac_addr_vf(wx, wx->mac.addr); +} +EXPORT_SYMBOL(wx_init_hw_vf); + +static int wx_mbx_write_and_read_reply(struct wx *wx, u32 *req_buf, + u32 *resp_buf, u16 size) +{ + int ret; + + ret = wx_write_posted_mbx(wx, req_buf, size); + if (ret) + return ret; + + return wx_read_posted_mbx(wx, resp_buf, size); +} + +/** + * wx_reset_hw_vf - Performs hardware reset + * @wx: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks and + * clears all interrupts. + * + * Return: returns 0 on success, negative error code on failure + **/ +int wx_reset_hw_vf(struct wx *wx) +{ + struct wx_mbx_info *mbx = &wx->mbx; + u32 msgbuf[4] = {WX_VF_RESET}; + u8 *addr = (u8 *)(&msgbuf[1]); + u32 b4_buf[16] = {0}; + u32 timeout = 200; + int ret; + u32 i; + + /* Call wx stop to disable tx/rx and clear interrupts */ + wx_stop_adapter_vf(wx); + + /* reset the api version */ + wx->vfinfo->vf_api = wx_mbox_api_null; + + /* backup msix vectors */ + if (wx->b4_addr) { + for (i = 0; i < 16; i++) + b4_buf[i] = readl(wx->b4_addr + i * 4); + } + + wr32m(wx, WX_VXCTRL, WX_VXCTRL_RST, WX_VXCTRL_RST); + rd32(wx, WX_VXSTATUS); + + /* we cannot reset while the RSTI / RSTD bits are asserted */ + while (!wx_check_for_rst_vf(wx) && timeout) { + timeout--; + udelay(5); + } + + /* restore msix vectors */ + if (wx->b4_addr) { + for (i = 0; i < 16; i++) + writel(b4_buf[i], wx->b4_addr + i * 4); + } + + /* amlite: bme */ + if (wx->mac.type == wx_mac_aml || wx->mac.type == wx_mac_aml40) + wr32(wx, WX_VX_PF_BME, WX_VF_BME_ENABLE); + + if (!timeout) + return -EBUSY; + + /* Reset VF registers to initial values */ + wx_virt_clr_reg(wx); + + /* mailbox timeout can now become active */ + mbx->timeout = 2000; + + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + + if (msgbuf[0] != (WX_VF_RESET | WX_VT_MSGTYPE_ACK) && + msgbuf[0] != (WX_VF_RESET | WX_VT_MSGTYPE_NACK)) + return -EINVAL; + + if (msgbuf[0] == (WX_VF_RESET | WX_VT_MSGTYPE_ACK)) + ether_addr_copy(wx->mac.perm_addr, addr); + + wx->mac.mc_filter_type = msgbuf[3]; + + return 0; +} +EXPORT_SYMBOL(wx_reset_hw_vf); + +/** + * wx_stop_adapter_vf - Generic stop Tx/Rx units + * @wx: pointer to hardware structure + * + * Clears interrupts, disables transmit and receive units. + **/ +void wx_stop_adapter_vf(struct wx *wx) +{ + u32 reg_val; + u16 i; + + /* Clear interrupt mask to stop from interrupts being generated */ + wr32(wx, WX_VXIMS, WX_VF_IRQ_CLEAR_MASK); + + /* Clear any pending interrupts, flush previous writes */ + wr32(wx, WX_VXICR, U32_MAX); + + /* Disable the transmit unit. Each queue must be disabled. */ + for (i = 0; i < wx->mac.max_tx_queues; i++) + wr32(wx, WX_VXTXDCTL(i), WX_VXTXDCTL_FLUSH); + + /* Disable the receive unit by stopping each queue */ + for (i = 0; i < wx->mac.max_rx_queues; i++) { + reg_val = rd32(wx, WX_VXRXDCTL(i)); + reg_val &= ~WX_VXRXDCTL_ENABLE; + wr32(wx, WX_VXRXDCTL(i), reg_val); + } + /* Clear packet split and pool config */ + wr32(wx, WX_VXMRQC, 0); + + /* flush all queues disables */ + rd32(wx, WX_VXSTATUS); +} +EXPORT_SYMBOL(wx_stop_adapter_vf); + +/** + * wx_set_rar_vf - set device MAC address + * @wx: pointer to hardware structure + * @index: Receive address register to write + * @addr: Address to put into receive address register + * @enable_addr: set flag that address is active + * + * Return: returns 0 on success, negative error code on failure + **/ +int wx_set_rar_vf(struct wx *wx, u32 index, u8 *addr, u32 enable_addr) +{ + u32 msgbuf[3] = {WX_VF_SET_MAC_ADDR}; + u8 *msg_addr = (u8 *)(&msgbuf[1]); + int ret; + + memcpy(msg_addr, addr, ETH_ALEN); + + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + msgbuf[0] &= ~WX_VT_MSGTYPE_CTS; + + /* if nacked the address was rejected, use "perm_addr" */ + if (msgbuf[0] == (WX_VF_SET_MAC_ADDR | WX_VT_MSGTYPE_NACK)) { + wx_get_mac_addr_vf(wx, wx->mac.addr); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(wx_set_rar_vf); + +/** + * wx_update_mc_addr_list_vf - Update Multicast addresses + * @wx: pointer to the HW structure + * @netdev: pointer to the net device structure + * + * Updates the Multicast Table Array. + * + * Return: returns 0 on success, negative error code on failure + **/ +int wx_update_mc_addr_list_vf(struct wx *wx, struct net_device *netdev) +{ + u32 msgbuf[WX_VXMAILBOX_SIZE] = {WX_VF_SET_MULTICAST}; + u16 *vector_l = (u16 *)&msgbuf[1]; + struct netdev_hw_addr *ha; + u32 cnt, i; + + cnt = netdev_mc_count(netdev); + if (cnt > 28) + cnt = 28; + msgbuf[0] |= cnt << WX_VT_MSGINFO_SHIFT; + + i = 0; + netdev_for_each_mc_addr(ha, netdev) { + if (i == cnt) + break; + if (is_link_local_ether_addr(ha->addr)) + continue; + + vector_l[i++] = wx_mta_vector(wx, ha->addr); + } + + return wx_write_posted_mbx(wx, msgbuf, ARRAY_SIZE(msgbuf)); +} +EXPORT_SYMBOL(wx_update_mc_addr_list_vf); + +/** + * wx_update_xcast_mode_vf - Update Multicast mode + * @wx: pointer to the HW structure + * @xcast_mode: new multicast mode + * + * Updates the Multicast Mode of VF. + * + * Return: returns 0 on success, negative error code on failure + **/ +int wx_update_xcast_mode_vf(struct wx *wx, int xcast_mode) +{ + u32 msgbuf[2] = {WX_VF_UPDATE_XCAST_MODE, xcast_mode}; + int ret = 0; + + if (wx->vfinfo->vf_api < wx_mbox_api_13) + return -EINVAL; + + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + + msgbuf[0] &= ~WX_VT_MSGTYPE_CTS; + if (msgbuf[0] == (WX_VF_UPDATE_XCAST_MODE | WX_VT_MSGTYPE_NACK)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(wx_update_xcast_mode_vf); + +/** + * wx_get_link_state_vf - Get VF link state from PF + * @wx: pointer to the HW structure + * @link_state: link state storage + * + * Return: return state of the operation error or success. + **/ +int wx_get_link_state_vf(struct wx *wx, u16 *link_state) +{ + u32 msgbuf[2] = {WX_VF_GET_LINK_STATE}; + int ret; + + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + + if (msgbuf[0] & WX_VT_MSGTYPE_NACK) + return -EINVAL; + + *link_state = msgbuf[1]; + + return 0; +} +EXPORT_SYMBOL(wx_get_link_state_vf); + +/** + * wx_set_vfta_vf - Set/Unset vlan filter table address + * @wx: pointer to the HW structure + * @vlan: 12 bit VLAN ID + * @vind: unused by VF drivers + * @vlan_on: if true then set bit, else clear bit + * @vlvf_bypass: boolean flag indicating updating default pool is okay + * + * Turn on/off specified VLAN in the VLAN filter table. + * + * Return: returns 0 on success, negative error code on failure + **/ +int wx_set_vfta_vf(struct wx *wx, u32 vlan, u32 vind, bool vlan_on, + bool vlvf_bypass) +{ + u32 msgbuf[2] = {WX_VF_SET_VLAN, vlan}; + bool vlan_offload = false; + int ret; + + /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */ + msgbuf[0] |= vlan_on << WX_VT_MSGINFO_SHIFT; + /* if vf vlan offload is disabled, allow to create vlan under pf port vlan */ + msgbuf[0] |= BIT(vlan_offload); + + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + + if (msgbuf[0] & WX_VT_MSGTYPE_ACK) + return 0; + + return msgbuf[0] & WX_VT_MSGTYPE_NACK; +} +EXPORT_SYMBOL(wx_set_vfta_vf); + +void wx_get_mac_addr_vf(struct wx *wx, u8 *mac_addr) +{ + ether_addr_copy(mac_addr, wx->mac.perm_addr); +} +EXPORT_SYMBOL(wx_get_mac_addr_vf); + +int wx_get_fw_version_vf(struct wx *wx) +{ + u32 msgbuf[2] = {WX_VF_GET_FW_VERSION}; + int ret; + + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + + if (msgbuf[0] & WX_VT_MSGTYPE_NACK) + return -EINVAL; + snprintf(wx->eeprom_id, 32, "0x%08x", msgbuf[1]); + + return 0; +} +EXPORT_SYMBOL(wx_get_fw_version_vf); + +int wx_set_uc_addr_vf(struct wx *wx, u32 index, u8 *addr) +{ + u32 msgbuf[3] = {WX_VF_SET_MACVLAN}; + u8 *msg_addr = (u8 *)(&msgbuf[1]); + int ret; + + /* If index is one then this is the start of a new list and needs + * indication to the PF so it can do it's own list management. + * If it is zero then that tells the PF to just clear all of + * this VF's macvlans and there is no new list. + */ + msgbuf[0] |= index << WX_VT_MSGINFO_SHIFT; + if (addr) + memcpy(msg_addr, addr, 6); + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + + msgbuf[0] &= ~WX_VT_MSGTYPE_CTS; + + if (msgbuf[0] == (WX_VF_SET_MACVLAN | WX_VT_MSGTYPE_NACK)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(wx_set_uc_addr_vf); + +/** + * wx_rlpml_set_vf - Set the maximum receive packet length + * @wx: pointer to the HW structure + * @max_size: value to assign to max frame size + * + * Return: returns 0 on success, negative error code on failure + **/ +int wx_rlpml_set_vf(struct wx *wx, u16 max_size) +{ + u32 msgbuf[2] = {WX_VF_SET_LPE, max_size}; + int ret; + + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + if ((msgbuf[0] & WX_VF_SET_LPE) && + (msgbuf[0] & WX_VT_MSGTYPE_NACK)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(wx_rlpml_set_vf); + +/** + * wx_negotiate_api_version - Negotiate supported API version + * @wx: pointer to the HW structure + * @api: integer containing requested API version + * + * Return: returns 0 on success, negative error code on failure + **/ +int wx_negotiate_api_version(struct wx *wx, int api) +{ + u32 msgbuf[2] = {WX_VF_API_NEGOTIATE, api}; + int ret; + + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + + msgbuf[0] &= ~WX_VT_MSGTYPE_CTS; + + /* Store value and return 0 on success */ + if (msgbuf[0] == (WX_VF_API_NEGOTIATE | WX_VT_MSGTYPE_NACK)) + return -EINVAL; + wx->vfinfo->vf_api = api; + + return 0; +} +EXPORT_SYMBOL(wx_negotiate_api_version); + +int wx_get_queues_vf(struct wx *wx, u32 *num_tcs, u32 *default_tc) +{ + u32 msgbuf[5] = {WX_VF_GET_QUEUES}; + int ret; + + /* do nothing if API doesn't support wx_get_queues */ + if (wx->vfinfo->vf_api < wx_mbox_api_13) + return -EINVAL; + + /* Fetch queue configuration from the PF */ + ret = wx_mbx_write_and_read_reply(wx, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (ret) + return ret; + msgbuf[0] &= ~WX_VT_MSGTYPE_CTS; + + /* if we didn't get an ACK there must have been + * some sort of mailbox error so we should treat it + * as such + */ + if (msgbuf[0] != (WX_VF_GET_QUEUES | WX_VT_MSGTYPE_ACK)) + return -EINVAL; + /* record and validate values from message */ + wx->mac.max_tx_queues = msgbuf[WX_VF_TX_QUEUES]; + if (wx->mac.max_tx_queues == 0 || + wx->mac.max_tx_queues > WX_VF_MAX_TX_QUEUES) + wx->mac.max_tx_queues = WX_VF_MAX_TX_QUEUES; + + wx->mac.max_rx_queues = msgbuf[WX_VF_RX_QUEUES]; + if (wx->mac.max_rx_queues == 0 || + wx->mac.max_rx_queues > WX_VF_MAX_RX_QUEUES) + wx->mac.max_rx_queues = WX_VF_MAX_RX_QUEUES; + + *num_tcs = msgbuf[WX_VF_TRANS_VLAN]; + /* in case of unknown state assume we cannot tag frames */ + if (*num_tcs > wx->mac.max_rx_queues) + *num_tcs = 1; + *default_tc = msgbuf[WX_VF_DEF_QUEUE]; + /* default to queue 0 on out-of-bounds queue number */ + if (*default_tc >= wx->mac.max_tx_queues) + *default_tc = 0; + + return 0; +} +EXPORT_SYMBOL(wx_get_queues_vf); + +static int wx_get_link_status_from_pf(struct wx *wx, u32 *msgbuf) +{ + u32 links_reg = msgbuf[1]; + + if (msgbuf[1] & WX_PF_NOFITY_VF_NET_NOT_RUNNING) + wx->notify_down = true; + else + wx->notify_down = false; + + if (wx->notify_down) { + wx->link = false; + wx->speed = SPEED_UNKNOWN; + return 0; + } + + wx->link = WX_PFLINK_STATUS(links_reg); + wx->speed = WX_PFLINK_SPEED(links_reg); + + return 0; +} + +static int wx_pf_ping_vf(struct wx *wx, u32 *msgbuf) +{ + if (!(msgbuf[0] & WX_VT_MSGTYPE_CTS)) + /* msg is not CTS, we need to do reset */ + return -EINVAL; + + return 0; +} + +static struct wx_link_reg_fields wx_speed_lookup_vf[] = { + {wx_mac_unknown}, + {wx_mac_sp, SPEED_10000, SPEED_1000, SPEED_100, SPEED_UNKNOWN, SPEED_UNKNOWN}, + {wx_mac_em, SPEED_1000, SPEED_100, SPEED_10, SPEED_UNKNOWN, SPEED_UNKNOWN}, + {wx_mac_aml, SPEED_40000, SPEED_25000, SPEED_10000, SPEED_1000, SPEED_UNKNOWN}, + {wx_mac_aml40, SPEED_40000, SPEED_25000, SPEED_10000, SPEED_1000, SPEED_UNKNOWN}, +}; + +static void wx_check_physical_link(struct wx *wx) +{ + u32 val, link_val; + int ret; + + /* get link status from hw status reg + * for SFP+ modules and DA cables, it can take up to 500usecs + * before the link status is correct + */ + if (wx->mac.type == wx_mac_em) + ret = read_poll_timeout_atomic(rd32, val, val & GENMASK(4, 1), + 100, 500, false, wx, WX_VXSTATUS); + else + ret = read_poll_timeout_atomic(rd32, val, val & BIT(0), 100, + 500, false, wx, WX_VXSTATUS); + if (ret) { + wx->speed = SPEED_UNKNOWN; + wx->link = false; + return; + } + + wx->link = true; + link_val = WX_VXSTATUS_SPEED(val); + + if (link_val & BIT(0)) + wx->speed = wx_speed_lookup_vf[wx->mac.type].bit0_f; + else if (link_val & BIT(1)) + wx->speed = wx_speed_lookup_vf[wx->mac.type].bit1_f; + else if (link_val & BIT(2)) + wx->speed = wx_speed_lookup_vf[wx->mac.type].bit2_f; + else if (link_val & BIT(3)) + wx->speed = wx_speed_lookup_vf[wx->mac.type].bit3_f; + else + wx->speed = SPEED_UNKNOWN; +} + +int wx_check_mac_link_vf(struct wx *wx) +{ + struct wx_mbx_info *mbx = &wx->mbx; + u32 msgbuf[2] = {0}; + int ret = 0; + + if (!mbx->timeout) + goto out; + + wx_check_for_rst_vf(wx); + if (!wx_check_for_msg_vf(wx)) + ret = wx_read_mbx_vf(wx, msgbuf, 2); + if (ret) + goto out; + + switch (msgbuf[0] & GENMASK(8, 0)) { + case WX_PF_NOFITY_VF_LINK_STATUS | WX_PF_CONTROL_MSG: + ret = wx_get_link_status_from_pf(wx, msgbuf); + goto out; + case WX_PF_CONTROL_MSG: + ret = wx_pf_ping_vf(wx, msgbuf); + goto out; + case 0: + if (msgbuf[0] & WX_VT_MSGTYPE_NACK) { + /* msg is NACK, we must have lost CTS status */ + ret = -EBUSY; + goto out; + } + /* no message, check link status */ + wx_check_physical_link(wx); + goto out; + default: + break; + } + + if (!(msgbuf[0] & WX_VT_MSGTYPE_CTS)) { + /* msg is not CTS and is NACK we must have lost CTS status */ + if (msgbuf[0] & WX_VT_MSGTYPE_NACK) + ret = -EBUSY; + goto out; + } + + /* the pf is talking, if we timed out in the past we reinit */ + if (!mbx->timeout) { + ret = -EBUSY; + goto out; + } + +out: + return ret; +} diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf.h b/drivers/net/ethernet/wangxun/libwx/wx_vf.h new file mode 100644 index 000000000000..fec1126703e3 --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#ifndef _WX_VF_H_ +#define _WX_VF_H_ + +#define WX_VF_MAX_RING_NUMS 8 +#define WX_VX_PF_BME 0x4B8 +#define WX_VF_BME_ENABLE BIT(0) +#define WX_VXSTATUS 0x4 +#define WX_VXCTRL 0x8 +#define WX_VXCTRL_RST BIT(0) + +#define WX_VXMRQC 0x78 +#define WX_VXICR 0x100 +#define WX_VXIMS 0x108 +#define WX_VXIMC 0x10C +#define WX_VF_IRQ_CLEAR_MASK 7 +#define WX_VF_MAX_TX_QUEUES 4 +#define WX_VF_MAX_RX_QUEUES 4 +#define WX_VXTXDCTL(r) (0x3010 + (0x40 * (r))) +#define WX_VXRXDCTL(r) (0x1010 + (0x40 * (r))) +#define WX_VXRXDCTL_ENABLE BIT(0) +#define WX_VXTXDCTL_FLUSH BIT(26) + +#define WX_VXITR(i) (0x200 + (4 * (i))) /* i=[0,1] */ +#define WX_VXITR_MASK GENMASK(8, 0) +#define WX_VXITR_CNT_WDIS BIT(31) +#define WX_VXIVAR_MISC 0x260 +#define WX_VXIVAR(i) (0x240 + (4 * (i))) /* i=[0,3] */ + +#define WX_VXRXDCTL_RSCMAX(f) FIELD_PREP(GENMASK(24, 23), f) +#define WX_VXRXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f) +#define WX_VXRXDCTL_BUFSZ(f) FIELD_PREP(GENMASK(11, 8), f) +#define WX_VXRXDCTL_HDRSZ(f) FIELD_PREP(GENMASK(15, 12), f) + +#define WX_VXRXDCTL_RSCMAX_MASK GENMASK(24, 23) +#define WX_VXRXDCTL_BUFLEN_MASK GENMASK(6, 1) +#define WX_VXRXDCTL_BUFSZ_MASK GENMASK(11, 8) +#define WX_VXRXDCTL_HDRSZ_MASK GENMASK(15, 12) + +#define wx_conf_size(v, mwidth, uwidth) ({ \ + typeof(v) _v = (v); \ + (_v == 2 << (mwidth) ? 0 : _v >> (uwidth)); \ +}) +#define wx_buf_len(v) wx_conf_size(v, 13, 7) +#define wx_hdr_sz(v) wx_conf_size(v, 10, 6) +#define wx_buf_sz(v) wx_conf_size(v, 14, 10) +#define wx_pkt_thresh(v) wx_conf_size(v, 4, 0) + +#define WX_RX_HDR_SIZE 256 +#define WX_RX_BUF_SIZE 2048 + +#define WX_RXBUFFER_2048 (2048) +#define WX_RXBUFFER_3072 3072 + +/* Receive Path */ +#define WX_VXRDBAL(r) (0x1000 + (0x40 * (r))) +#define WX_VXRDBAH(r) (0x1004 + (0x40 * (r))) +#define WX_VXRDT(r) (0x1008 + (0x40 * (r))) +#define WX_VXRDH(r) (0x100C + (0x40 * (r))) + +#define WX_VXRXDCTL_RSCEN BIT(29) +#define WX_VXRXDCTL_DROP BIT(30) +#define WX_VXRXDCTL_VLAN BIT(31) + +#define WX_VXTDBAL(r) (0x3000 + (0x40 * (r))) +#define WX_VXTDBAH(r) (0x3004 + (0x40 * (r))) +#define WX_VXTDT(r) (0x3008 + (0x40 * (r))) +#define WX_VXTDH(r) (0x300C + (0x40 * (r))) + +#define WX_VXTXDCTL_ENABLE BIT(0) +#define WX_VXTXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f) +#define WX_VXTXDCTL_PTHRESH(f) FIELD_PREP(GENMASK(11, 8), f) +#define WX_VXTXDCTL_WTHRESH(f) FIELD_PREP(GENMASK(22, 16), f) + +#define WX_VXMRQC_PSR(f) FIELD_PREP(GENMASK(5, 1), f) +#define WX_VXMRQC_PSR_MASK GENMASK(5, 1) +#define WX_VXMRQC_PSR_L4HDR BIT(0) +#define WX_VXMRQC_PSR_L3HDR BIT(1) +#define WX_VXMRQC_PSR_L2HDR BIT(2) +#define WX_VXMRQC_PSR_TUNHDR BIT(3) +#define WX_VXMRQC_PSR_TUNMAC BIT(4) + +#define WX_VXRSSRK(i) (0x80 + ((i) * 4)) /* i=[0,9] */ +#define WX_VXRETA(i) (0xC0 + ((i) * 4)) /* i=[0,15] */ + +#define WX_VXMRQC_RSS(f) FIELD_PREP(GENMASK(31, 16), f) +#define WX_VXMRQC_RSS_MASK GENMASK(31, 16) +#define WX_VXMRQC_RSS_ALG_IPV4_TCP BIT(0) +#define WX_VXMRQC_RSS_ALG_IPV4 BIT(1) +#define WX_VXMRQC_RSS_ALG_IPV6 BIT(4) +#define WX_VXMRQC_RSS_ALG_IPV6_TCP BIT(5) +#define WX_VXMRQC_RSS_EN BIT(8) +#define WX_VXMRQC_RSS_HASH(f) FIELD_PREP(GENMASK(15, 13), f) + +#define WX_PFLINK_STATUS(g) FIELD_GET(BIT(0), g) +#define WX_PFLINK_SPEED(g) FIELD_GET(GENMASK(31, 1), g) +#define WX_VXSTATUS_SPEED(g) FIELD_GET(GENMASK(4, 1), g) + +struct wx_link_reg_fields { + u32 mac_type; + u32 bit0_f; + u32 bit1_f; + u32 bit2_f; + u32 bit3_f; + u32 bit4_f; +}; + +void wx_init_hw_vf(struct wx *wx); +int wx_reset_hw_vf(struct wx *wx); +void wx_get_mac_addr_vf(struct wx *wx, u8 *mac_addr); +void wx_stop_adapter_vf(struct wx *wx); +int wx_get_fw_version_vf(struct wx *wx); +int wx_set_rar_vf(struct wx *wx, u32 index, u8 *addr, u32 enable_addr); +int wx_update_mc_addr_list_vf(struct wx *wx, struct net_device *netdev); +int wx_set_uc_addr_vf(struct wx *wx, u32 index, u8 *addr); +int wx_rlpml_set_vf(struct wx *wx, u16 max_size); +int wx_negotiate_api_version(struct wx *wx, int api); +int wx_get_queues_vf(struct wx *wx, u32 *num_tcs, u32 *default_tc); +int wx_update_xcast_mode_vf(struct wx *wx, int xcast_mode); +int wx_get_link_state_vf(struct wx *wx, u16 *link_state); +int wx_set_vfta_vf(struct wx *wx, u32 vlan, u32 vind, bool vlan_on, + bool vlvf_bypass); +int wx_check_mac_link_vf(struct wx *wx); + +#endif /* _WX_VF_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c new file mode 100644 index 000000000000..ade2bfe563aa --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#include <linux/etherdevice.h> +#include <linux/pci.h> + +#include "wx_type.h" +#include "wx_mbx.h" +#include "wx_lib.h" +#include "wx_vf.h" +#include "wx_vf_lib.h" +#include "wx_vf_common.h" + +int wxvf_suspend(struct device *dev_d) +{ + struct pci_dev *pdev = to_pci_dev(dev_d); + struct wx *wx = pci_get_drvdata(pdev); + + netif_device_detach(wx->netdev); + wx_clear_interrupt_scheme(wx); + pci_disable_device(pdev); + + return 0; +} +EXPORT_SYMBOL(wxvf_suspend); + +void wxvf_shutdown(struct pci_dev *pdev) +{ + wxvf_suspend(&pdev->dev); +} +EXPORT_SYMBOL(wxvf_shutdown); + +int wxvf_resume(struct device *dev_d) +{ + struct pci_dev *pdev = to_pci_dev(dev_d); + struct wx *wx = pci_get_drvdata(pdev); + + pci_set_master(pdev); + wx_init_interrupt_scheme(wx); + netif_device_attach(wx->netdev); + + return 0; +} +EXPORT_SYMBOL(wxvf_resume); + +void wxvf_remove(struct pci_dev *pdev) +{ + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + + cancel_work_sync(&wx->service_task); + netdev = wx->netdev; + unregister_netdev(netdev); + kfree(wx->vfinfo); + kfree(wx->rss_key); + kfree(wx->mac_table); + wx_clear_interrupt_scheme(wx); + pci_release_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM)); + pci_disable_device(pdev); +} +EXPORT_SYMBOL(wxvf_remove); + +static irqreturn_t wx_msix_misc_vf(int __always_unused irq, void *data) +{ + struct wx *wx = data; + + set_bit(WX_FLAG_NEED_UPDATE_LINK, wx->flags); + /* Clear the interrupt */ + if (netif_running(wx->netdev)) + wr32(wx, WX_VXIMC, wx->eims_other); + + return IRQ_HANDLED; +} + +int wx_request_msix_irqs_vf(struct wx *wx) +{ + struct net_device *netdev = wx->netdev; + int vector, err; + + for (vector = 0; vector < wx->num_q_vectors; vector++) { + struct wx_q_vector *q_vector = wx->q_vector[vector]; + struct msix_entry *entry = &wx->msix_q_entries[vector]; + + if (q_vector->tx.ring && q_vector->rx.ring) + snprintf(q_vector->name, sizeof(q_vector->name) - 1, + "%s-TxRx-%d", netdev->name, entry->entry); + else + /* skip this unused q_vector */ + continue; + + err = request_irq(entry->vector, wx_msix_clean_rings, 0, + q_vector->name, q_vector); + if (err) { + wx_err(wx, "request_irq failed for MSIX interrupt %s Error: %d\n", + q_vector->name, err); + goto free_queue_irqs; + } + } + + err = request_threaded_irq(wx->msix_entry->vector, wx_msix_misc_vf, + NULL, IRQF_ONESHOT, netdev->name, wx); + if (err) { + wx_err(wx, "request_irq for msix_other failed: %d\n", err); + goto free_queue_irqs; + } + + return 0; + +free_queue_irqs: + while (vector) { + vector--; + free_irq(wx->msix_q_entries[vector].vector, + wx->q_vector[vector]); + } + wx_reset_interrupt_capability(wx); + return err; +} +EXPORT_SYMBOL(wx_request_msix_irqs_vf); + +void wx_negotiate_api_vf(struct wx *wx) +{ + int api[] = { + wx_mbox_api_13, + wx_mbox_api_null}; + int err = 0, idx = 0; + + spin_lock_bh(&wx->mbx.mbx_lock); + while (api[idx] != wx_mbox_api_null) { + err = wx_negotiate_api_version(wx, api[idx]); + if (!err) + break; + idx++; + } + spin_unlock_bh(&wx->mbx.mbx_lock); +} +EXPORT_SYMBOL(wx_negotiate_api_vf); + +void wx_reset_vf(struct wx *wx) +{ + struct net_device *netdev = wx->netdev; + int ret = 0; + + ret = wx_reset_hw_vf(wx); + if (!ret) + wx_init_hw_vf(wx); + wx_negotiate_api_vf(wx); + if (is_valid_ether_addr(wx->mac.addr)) { + eth_hw_addr_set(netdev, wx->mac.addr); + ether_addr_copy(netdev->perm_addr, wx->mac.addr); + } +} +EXPORT_SYMBOL(wx_reset_vf); + +void wx_set_rx_mode_vf(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + unsigned int flags = netdev->flags; + int xcast_mode; + + xcast_mode = (flags & IFF_ALLMULTI) ? WXVF_XCAST_MODE_ALLMULTI : + (flags & (IFF_BROADCAST | IFF_MULTICAST)) ? + WXVF_XCAST_MODE_MULTI : WXVF_XCAST_MODE_NONE; + /* request the most inclusive mode we need */ + if (flags & IFF_PROMISC) + xcast_mode = WXVF_XCAST_MODE_PROMISC; + else if (flags & IFF_ALLMULTI) + xcast_mode = WXVF_XCAST_MODE_ALLMULTI; + else if (flags & (IFF_BROADCAST | IFF_MULTICAST)) + xcast_mode = WXVF_XCAST_MODE_MULTI; + else + xcast_mode = WXVF_XCAST_MODE_NONE; + + spin_lock_bh(&wx->mbx.mbx_lock); + wx_update_xcast_mode_vf(wx, xcast_mode); + wx_update_mc_addr_list_vf(wx, netdev); + wx_write_uc_addr_list_vf(netdev); + spin_unlock_bh(&wx->mbx.mbx_lock); +} +EXPORT_SYMBOL(wx_set_rx_mode_vf); + +/** + * wx_configure_rx_vf - Configure Receive Unit after Reset + * @wx: board private structure + * + * Configure the Rx unit of the MAC after a reset. + **/ +static void wx_configure_rx_vf(struct wx *wx) +{ + struct net_device *netdev = wx->netdev; + int i, ret; + + wx_setup_psrtype_vf(wx); + wx_setup_vfmrqc_vf(wx); + + spin_lock_bh(&wx->mbx.mbx_lock); + ret = wx_rlpml_set_vf(wx, + netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); + spin_unlock_bh(&wx->mbx.mbx_lock); + if (ret) + wx_dbg(wx, "Failed to set MTU at %d\n", netdev->mtu); + + /* Setup the HW Rx Head and Tail Descriptor Pointers and + * the Base and Length of the Rx Descriptor Ring + */ + for (i = 0; i < wx->num_rx_queues; i++) { + struct wx_ring *rx_ring = wx->rx_ring[i]; +#ifdef HAVE_SWIOTLB_SKIP_CPU_SYNC + wx_set_rx_buffer_len_vf(wx, rx_ring); +#endif + wx_configure_rx_ring_vf(wx, rx_ring); + } +} + +void wx_configure_vf(struct wx *wx) +{ + wx_set_rx_mode_vf(wx->netdev); + wx_configure_tx_vf(wx); + wx_configure_rx_vf(wx); +} +EXPORT_SYMBOL(wx_configure_vf); + +int wx_set_mac_vf(struct net_device *netdev, void *p) +{ + struct wx *wx = netdev_priv(netdev); + struct sockaddr *addr = p; + int ret; + + ret = eth_prepare_mac_addr_change(netdev, addr); + if (ret) + return ret; + + spin_lock_bh(&wx->mbx.mbx_lock); + ret = wx_set_rar_vf(wx, 1, (u8 *)addr->sa_data, 1); + spin_unlock_bh(&wx->mbx.mbx_lock); + + if (ret) + return -EPERM; + + memcpy(wx->mac.addr, addr->sa_data, netdev->addr_len); + memcpy(wx->mac.perm_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); + + return 0; +} +EXPORT_SYMBOL(wx_set_mac_vf); + +void wxvf_watchdog_update_link(struct wx *wx) +{ + int err; + + if (!test_bit(WX_FLAG_NEED_UPDATE_LINK, wx->flags)) + return; + + spin_lock_bh(&wx->mbx.mbx_lock); + err = wx_check_mac_link_vf(wx); + spin_unlock_bh(&wx->mbx.mbx_lock); + if (err) { + wx->link = false; + set_bit(WX_FLAG_NEED_DO_RESET, wx->flags); + } + clear_bit(WX_FLAG_NEED_UPDATE_LINK, wx->flags); +} +EXPORT_SYMBOL(wxvf_watchdog_update_link); + +static void wxvf_irq_enable(struct wx *wx) +{ + wr32(wx, WX_VXIMC, wx->eims_enable_mask); +} + +static void wxvf_up_complete(struct wx *wx) +{ + /* Always set the carrier off */ + netif_carrier_off(wx->netdev); + mod_timer(&wx->service_timer, jiffies + HZ); + set_bit(WX_FLAG_NEED_UPDATE_LINK, wx->flags); + + wx_configure_msix_vf(wx); + smp_mb__before_atomic(); + wx_napi_enable_all(wx); + + /* clear any pending interrupts, may auto mask */ + wr32(wx, WX_VXICR, U32_MAX); + wxvf_irq_enable(wx); + /* enable transmits */ + netif_tx_start_all_queues(wx->netdev); +} + +int wxvf_open(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + int err; + + err = wx_setup_resources(wx); + if (err) + goto err_reset; + wx_configure_vf(wx); + + err = wx_request_msix_irqs_vf(wx); + if (err) + goto err_free_resources; + + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues); + if (err) + goto err_free_irq; + + err = netif_set_real_num_rx_queues(netdev, wx->num_rx_queues); + if (err) + goto err_free_irq; + + wxvf_up_complete(wx); + + return 0; +err_free_irq: + wx_free_irq(wx); +err_free_resources: + wx_free_resources(wx); +err_reset: + wx_reset_vf(wx); + return err; +} +EXPORT_SYMBOL(wxvf_open); + +static void wxvf_down(struct wx *wx) +{ + struct net_device *netdev = wx->netdev; + + timer_delete_sync(&wx->service_timer); + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + netif_carrier_off(netdev); + wx_napi_disable_all(wx); + wx_reset_vf(wx); + + wx_clean_all_tx_rings(wx); + wx_clean_all_rx_rings(wx); +} + +static void wxvf_reinit_locked(struct wx *wx) +{ + while (test_and_set_bit(WX_STATE_RESETTING, wx->state)) + usleep_range(1000, 2000); + wxvf_down(wx); + wx_free_irq(wx); + wx_configure_vf(wx); + wx_request_msix_irqs_vf(wx); + wxvf_up_complete(wx); + clear_bit(WX_STATE_RESETTING, wx->state); +} + +static void wxvf_reset_subtask(struct wx *wx) +{ + if (!test_bit(WX_FLAG_NEED_DO_RESET, wx->flags)) + return; + clear_bit(WX_FLAG_NEED_DO_RESET, wx->flags); + + rtnl_lock(); + if (test_bit(WX_STATE_RESETTING, wx->state) || + !(netif_running(wx->netdev))) { + rtnl_unlock(); + return; + } + wxvf_reinit_locked(wx); + rtnl_unlock(); +} + +int wxvf_close(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + + wxvf_down(wx); + wx_free_irq(wx); + wx_free_resources(wx); + + return 0; +} +EXPORT_SYMBOL(wxvf_close); + +static void wxvf_link_config_subtask(struct wx *wx) +{ + struct net_device *netdev = wx->netdev; + + wxvf_watchdog_update_link(wx); + if (wx->link) { + if (netif_carrier_ok(netdev)) + return; + netif_carrier_on(netdev); + netdev_info(netdev, "Link is Up - %s\n", + phy_speed_to_str(wx->speed)); + } else { + if (!netif_carrier_ok(netdev)) + return; + netif_carrier_off(netdev); + netdev_info(netdev, "Link is Down\n"); + } +} + +static void wxvf_service_task(struct work_struct *work) +{ + struct wx *wx = container_of(work, struct wx, service_task); + + wxvf_link_config_subtask(wx); + wxvf_reset_subtask(wx); + wx_service_event_complete(wx); +} + +void wxvf_init_service(struct wx *wx) +{ + timer_setup(&wx->service_timer, wx_service_timer, 0); + INIT_WORK(&wx->service_task, wxvf_service_task); + clear_bit(WX_STATE_SERVICE_SCHED, wx->state); +} +EXPORT_SYMBOL(wxvf_init_service); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.h b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.h new file mode 100644 index 000000000000..cbbb1b178cb2 --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#ifndef _WX_VF_COMMON_H_ +#define _WX_VF_COMMON_H_ + +int wxvf_suspend(struct device *dev_d); +void wxvf_shutdown(struct pci_dev *pdev); +int wxvf_resume(struct device *dev_d); +void wxvf_remove(struct pci_dev *pdev); +int wx_request_msix_irqs_vf(struct wx *wx); +void wx_negotiate_api_vf(struct wx *wx); +void wx_reset_vf(struct wx *wx); +void wx_set_rx_mode_vf(struct net_device *netdev); +void wx_configure_vf(struct wx *wx); +int wx_set_mac_vf(struct net_device *netdev, void *p); +void wxvf_watchdog_update_link(struct wx *wx); +int wxvf_open(struct net_device *netdev); +int wxvf_close(struct net_device *netdev); +void wxvf_init_service(struct wx *wx); + +#endif /* _WX_VF_COMMON_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c new file mode 100644 index 000000000000..5d48df7a849f --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#include <linux/etherdevice.h> +#include <linux/pci.h> + +#include "wx_type.h" +#include "wx_hw.h" +#include "wx_lib.h" +#include "wx_vf.h" +#include "wx_vf_lib.h" + +static void wx_write_eitr_vf(struct wx_q_vector *q_vector) +{ + struct wx *wx = q_vector->wx; + int v_idx = q_vector->v_idx; + u32 itr_reg; + + itr_reg = q_vector->itr & WX_VXITR_MASK; + + /* set the WDIS bit to not clear the timer bits and cause an + * immediate assertion of the interrupt + */ + itr_reg |= WX_VXITR_CNT_WDIS; + + wr32(wx, WX_VXITR(v_idx), itr_reg); +} + +static void wx_set_ivar_vf(struct wx *wx, s8 direction, u8 queue, + u8 msix_vector) +{ + u32 ivar, index; + + if (direction == -1) { + /* other causes */ + msix_vector |= WX_PX_IVAR_ALLOC_VAL; + ivar = rd32(wx, WX_VXIVAR_MISC); + ivar &= ~0xFF; + ivar |= msix_vector; + wr32(wx, WX_VXIVAR_MISC, ivar); + } else { + /* tx or rx causes */ + msix_vector |= WX_PX_IVAR_ALLOC_VAL; + index = ((16 * (queue & 1)) + (8 * direction)); + ivar = rd32(wx, WX_VXIVAR(queue >> 1)); + ivar &= ~(0xFF << index); + ivar |= (msix_vector << index); + wr32(wx, WX_VXIVAR(queue >> 1), ivar); + } +} + +void wx_configure_msix_vf(struct wx *wx) +{ + int v_idx; + + wx->eims_enable_mask = 0; + for (v_idx = 0; v_idx < wx->num_q_vectors; v_idx++) { + struct wx_q_vector *q_vector = wx->q_vector[v_idx]; + struct wx_ring *ring; + + wx_for_each_ring(ring, q_vector->rx) + wx_set_ivar_vf(wx, 0, ring->reg_idx, v_idx); + + wx_for_each_ring(ring, q_vector->tx) + wx_set_ivar_vf(wx, 1, ring->reg_idx, v_idx); + + /* add q_vector eims value to global eims_enable_mask */ + wx->eims_enable_mask |= BIT(v_idx); + wx_write_eitr_vf(q_vector); + } + + wx_set_ivar_vf(wx, -1, 1, v_idx); + + /* setup eims_other and add value to global eims_enable_mask */ + wx->eims_other = BIT(v_idx); + wx->eims_enable_mask |= wx->eims_other; +} + +int wx_write_uc_addr_list_vf(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + int count = 0; + + if (!netdev_uc_empty(netdev)) { + struct netdev_hw_addr *ha; + + netdev_for_each_uc_addr(ha, netdev) + wx_set_uc_addr_vf(wx, ++count, ha->addr); + } else { + /* + * If the list is empty then send message to PF driver to + * clear all macvlans on this VF. + */ + wx_set_uc_addr_vf(wx, 0, NULL); + } + + return count; +} + +/** + * wx_configure_tx_ring_vf - Configure Tx ring after Reset + * @wx: board private structure + * @ring: structure containing ring specific data + * + * Configure the Tx descriptor ring after a reset. + **/ +static void wx_configure_tx_ring_vf(struct wx *wx, struct wx_ring *ring) +{ + u8 reg_idx = ring->reg_idx; + u64 tdba = ring->dma; + u32 txdctl = 0; + int ret; + + /* disable queue to avoid issues while updating state */ + wr32(wx, WX_VXTXDCTL(reg_idx), WX_VXTXDCTL_FLUSH); + wr32(wx, WX_VXTDBAL(reg_idx), tdba & DMA_BIT_MASK(32)); + wr32(wx, WX_VXTDBAH(reg_idx), tdba >> 32); + + /* enable relaxed ordering */ + pcie_capability_clear_and_set_word(wx->pdev, PCI_EXP_DEVCTL, + 0, PCI_EXP_DEVCTL_RELAX_EN); + + /* reset head and tail pointers */ + wr32(wx, WX_VXTDH(reg_idx), 0); + wr32(wx, WX_VXTDT(reg_idx), 0); + ring->tail = wx->hw_addr + WX_VXTDT(reg_idx); + + /* reset ntu and ntc to place SW in sync with hardwdare */ + ring->next_to_clean = 0; + ring->next_to_use = 0; + + txdctl |= WX_VXTXDCTL_BUFLEN(wx_buf_len(ring->count)); + txdctl |= WX_VXTXDCTL_ENABLE; + + /* reinitialize tx_buffer_info */ + memset(ring->tx_buffer_info, 0, + sizeof(struct wx_tx_buffer) * ring->count); + + wr32(wx, WX_VXTXDCTL(reg_idx), txdctl); + /* poll to verify queue is enabled */ + ret = read_poll_timeout(rd32, txdctl, txdctl & WX_VXTXDCTL_ENABLE, + 1000, 10000, true, wx, WX_VXTXDCTL(reg_idx)); + if (ret == -ETIMEDOUT) + wx_err(wx, "Could not enable Tx Queue %d\n", reg_idx); +} + +/** + * wx_configure_tx_vf - Configure Transmit Unit after Reset + * @wx: board private structure + * + * Configure the Tx unit of the MAC after a reset. + **/ +void wx_configure_tx_vf(struct wx *wx) +{ + u32 i; + + /* Setup the HW Tx Head and Tail descriptor pointers */ + for (i = 0; i < wx->num_tx_queues; i++) + wx_configure_tx_ring_vf(wx, wx->tx_ring[i]); +} + +static void wx_configure_srrctl_vf(struct wx *wx, struct wx_ring *ring, + int index) +{ + u32 srrctl; + + srrctl = rd32m(wx, WX_VXRXDCTL(index), + (u32)~(WX_VXRXDCTL_HDRSZ_MASK | WX_VXRXDCTL_BUFSZ_MASK)); + srrctl |= WX_VXRXDCTL_DROP; + srrctl |= WX_VXRXDCTL_HDRSZ(wx_hdr_sz(WX_RX_HDR_SIZE)); + srrctl |= WX_VXRXDCTL_BUFSZ(wx_buf_sz(WX_RX_BUF_SIZE)); + + wr32(wx, WX_VXRXDCTL(index), srrctl); +} + +void wx_setup_psrtype_vf(struct wx *wx) +{ + /* PSRTYPE must be initialized */ + u32 psrtype = WX_VXMRQC_PSR_L2HDR | + WX_VXMRQC_PSR_L3HDR | + WX_VXMRQC_PSR_L4HDR | + WX_VXMRQC_PSR_TUNHDR | + WX_VXMRQC_PSR_TUNMAC; + + wr32m(wx, WX_VXMRQC, WX_VXMRQC_PSR_MASK, WX_VXMRQC_PSR(psrtype)); +} + +void wx_setup_vfmrqc_vf(struct wx *wx) +{ + u16 rss_i = wx->num_rx_queues; + u32 vfmrqc = 0, vfreta = 0; + u8 i, j; + + /* Fill out hash function seeds */ + netdev_rss_key_fill(wx->rss_key, sizeof(wx->rss_key)); + for (i = 0; i < WX_RSS_KEY_SIZE / 4; i++) + wr32(wx, WX_VXRSSRK(i), wx->rss_key[i]); + + for (i = 0, j = 0; i < WX_MAX_RETA_ENTRIES; i++, j++) { + if (j == rss_i) + j = 0; + + wx->rss_indir_tbl[i] = j; + + vfreta |= j << (i & 0x3) * 8; + if ((i & 3) == 3) { + wr32(wx, WX_VXRETA(i >> 2), vfreta); + vfreta = 0; + } + } + + /* Perform hash on these packet types */ + vfmrqc |= WX_VXMRQC_RSS_ALG_IPV4 | + WX_VXMRQC_RSS_ALG_IPV4_TCP | + WX_VXMRQC_RSS_ALG_IPV6 | + WX_VXMRQC_RSS_ALG_IPV6_TCP; + + vfmrqc |= WX_VXMRQC_RSS_EN; + + if (wx->num_rx_queues > 3) + vfmrqc |= WX_VXMRQC_RSS_HASH(2); + else if (wx->num_rx_queues > 1) + vfmrqc |= WX_VXMRQC_RSS_HASH(1); + wr32m(wx, WX_VXMRQC, WX_VXMRQC_RSS_MASK, WX_VXMRQC_RSS(vfmrqc)); +} + +void wx_configure_rx_ring_vf(struct wx *wx, struct wx_ring *ring) +{ + u8 reg_idx = ring->reg_idx; + union wx_rx_desc *rx_desc; + u64 rdba = ring->dma; + u32 rxdctl; + + /* disable queue to avoid issues while updating state */ + rxdctl = rd32(wx, WX_VXRXDCTL(reg_idx)); + wx_disable_rx_queue(wx, ring); + + wr32(wx, WX_VXRDBAL(reg_idx), rdba & DMA_BIT_MASK(32)); + wr32(wx, WX_VXRDBAH(reg_idx), rdba >> 32); + + /* enable relaxed ordering */ + pcie_capability_clear_and_set_word(wx->pdev, PCI_EXP_DEVCTL, + 0, PCI_EXP_DEVCTL_RELAX_EN); + + /* reset head and tail pointers */ + wr32(wx, WX_VXRDH(reg_idx), 0); + wr32(wx, WX_VXRDT(reg_idx), 0); + ring->tail = wx->hw_addr + WX_VXRDT(reg_idx); + + /* initialize rx_buffer_info */ + memset(ring->rx_buffer_info, 0, + sizeof(struct wx_rx_buffer) * ring->count); + + /* initialize Rx descriptor 0 */ + rx_desc = WX_RX_DESC(ring, 0); + rx_desc->wb.upper.length = 0; + + /* reset ntu and ntc to place SW in sync with hardwdare */ + ring->next_to_clean = 0; + ring->next_to_use = 0; + ring->next_to_alloc = 0; + + wx_configure_srrctl_vf(wx, ring, reg_idx); + + /* allow any size packet since we can handle overflow */ + rxdctl &= ~WX_VXRXDCTL_BUFLEN_MASK; + rxdctl |= WX_VXRXDCTL_BUFLEN(wx_buf_len(ring->count)); + rxdctl |= WX_VXRXDCTL_ENABLE | WX_VXRXDCTL_VLAN; + + /* enable RSC */ + rxdctl &= ~WX_VXRXDCTL_RSCMAX_MASK; + rxdctl |= WX_VXRXDCTL_RSCMAX(0); + rxdctl |= WX_VXRXDCTL_RSCEN; + + wr32(wx, WX_VXRXDCTL(reg_idx), rxdctl); + + /* pf/vf reuse */ + wx_enable_rx_queue(wx, ring); + wx_alloc_rx_buffers(ring, wx_desc_unused(ring)); +} diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h new file mode 100644 index 000000000000..43ea126b79eb --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#ifndef _WX_VF_LIB_H_ +#define _WX_VF_LIB_H_ + +void wx_configure_msix_vf(struct wx *wx); +int wx_write_uc_addr_list_vf(struct net_device *netdev); +void wx_setup_psrtype_vf(struct wx *wx); +void wx_setup_vfmrqc_vf(struct wx *wx); +void wx_configure_tx_vf(struct wx *wx); +void wx_configure_rx_ring_vf(struct wx *wx, struct wx_ring *ring); + +#endif /* _WX_VF_LIB_H_ */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index b5022c49dc5e..e0fc897b0a58 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -161,7 +161,7 @@ static void ngbe_irq_enable(struct wx *wx, bool queues) if (queues) wx_intr_enable(wx, NGBE_INTR_ALL); else - wx_intr_enable(wx, NGBE_INTR_MISC); + wx_intr_enable(wx, NGBE_INTR_MISC(wx)); } /** @@ -286,7 +286,7 @@ static int ngbe_request_msix_irqs(struct wx *wx) * for queue. But when num_vfs == 7, vector[1] is assigned to vf6. * Misc and queue should reuse interrupt vector[0]. */ - if (wx->num_vfs == 7) + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) err = request_irq(wx->msix_entry->vector, ngbe_misc_and_queue, 0, netdev->name, wx); else diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index bb74263f0498..3b2ca7f47e33 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -87,7 +87,7 @@ #define NGBE_PX_MISC_IC_TIMESYNC BIT(11) /* time sync */ #define NGBE_INTR_ALL 0x1FF -#define NGBE_INTR_MISC BIT(0) +#define NGBE_INTR_MISC(A) BIT((A)->msix_entry->entry) #define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4)) #define NGBE_CFG_LAN_SPEED 0x14440 diff --git a/drivers/net/ethernet/wangxun/ngbevf/Makefile b/drivers/net/ethernet/wangxun/ngbevf/Makefile new file mode 100644 index 000000000000..11a4f15e2ce3 --- /dev/null +++ b/drivers/net/ethernet/wangxun/ngbevf/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. +# +# Makefile for the Wangxun(R) 1GbE virtual functions driver +# + +obj-$(CONFIG_NGBE) += ngbevf.o + +ngbevf-objs := ngbevf_main.o diff --git a/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c new file mode 100644 index 000000000000..c1246ab5239c --- /dev/null +++ b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/etherdevice.h> + +#include "../libwx/wx_type.h" +#include "../libwx/wx_hw.h" +#include "../libwx/wx_lib.h" +#include "../libwx/wx_mbx.h" +#include "../libwx/wx_vf.h" +#include "../libwx/wx_vf_common.h" +#include "ngbevf_type.h" + +/* ngbevf_pci_tbl - PCI Device ID Table + * + * Wildcard entries (PCI_ANY_ID) should come last + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, + * Class, Class Mask, private data (not used) } + */ +static const struct pci_device_id ngbevf_pci_tbl[] = { + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860AL_W), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860A2), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860A2S), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860A4), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860A4S), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860AL2), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860AL2S), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860AL4), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860AL4S), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860NCSI), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860A1), 0}, + { PCI_VDEVICE(WANGXUN, NGBEVF_DEV_ID_EM_WX1860AL1), 0}, + /* required last entry */ + { .device = 0 } +}; + +static const struct net_device_ops ngbevf_netdev_ops = { + .ndo_open = wxvf_open, + .ndo_stop = wxvf_close, + .ndo_start_xmit = wx_xmit_frame, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = wx_set_mac_vf, +}; + +static void ngbevf_set_num_queues(struct wx *wx) +{ + /* Start with base case */ + wx->num_rx_queues = 1; + wx->num_tx_queues = 1; +} + +static int ngbevf_sw_init(struct wx *wx) +{ + struct net_device *netdev = wx->netdev; + struct pci_dev *pdev = wx->pdev; + int err; + + /* Initialize pcie info and common capability flags */ + err = wx_sw_init(wx); + if (err < 0) + goto err_wx_sw_init; + + /* Initialize the mailbox */ + err = wx_init_mbx_params_vf(wx); + if (err) + goto err_init_mbx_params; + + /* Initialize the device type */ + wx->mac.type = wx_mac_em; + wx->mac.max_msix_vectors = NGBEVF_MAX_MSIX_VECTORS; + /* lock to protect mailbox accesses */ + spin_lock_init(&wx->mbx.mbx_lock); + + err = wx_reset_hw_vf(wx); + if (err) { + wx_err(wx, "PF still in reset state. Is the PF interface up?\n"); + goto err_reset_hw; + } + wx_init_hw_vf(wx); + wx_negotiate_api_vf(wx); + if (is_zero_ether_addr(wx->mac.addr)) + dev_info(&pdev->dev, + "MAC address not assigned by administrator.\n"); + eth_hw_addr_set(netdev, wx->mac.addr); + + if (!is_valid_ether_addr(netdev->dev_addr)) { + dev_info(&pdev->dev, "Assigning random MAC address\n"); + eth_hw_addr_random(netdev); + ether_addr_copy(wx->mac.addr, netdev->dev_addr); + ether_addr_copy(wx->mac.perm_addr, netdev->dev_addr); + } + + wx->mac.max_tx_queues = NGBEVF_MAX_TX_QUEUES; + wx->mac.max_rx_queues = NGBEVF_MAX_RX_QUEUES; + /* Enable dynamic interrupt throttling rates */ + wx->rx_itr_setting = 1; + wx->tx_itr_setting = 1; + /* set default ring sizes */ + wx->tx_ring_count = NGBEVF_DEFAULT_TXD; + wx->rx_ring_count = NGBEVF_DEFAULT_RXD; + /* set default work limits */ + wx->tx_work_limit = NGBEVF_DEFAULT_TX_WORK; + wx->rx_work_limit = NGBEVF_DEFAULT_RX_WORK; + wx->set_num_queues = ngbevf_set_num_queues; + + return 0; +err_reset_hw: + kfree(wx->vfinfo); +err_init_mbx_params: + kfree(wx->rss_key); + kfree(wx->mac_table); +err_wx_sw_init: + return err; +} + +/** + * ngbevf_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in ngbevf_pci_tbl + * + * Return: return 0 on success, negative on failure + * + * ngbevf_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +static int ngbevf_probe(struct pci_dev *pdev, + const struct pci_device_id __always_unused *ent) +{ + struct net_device *netdev; + struct wx *wx = NULL; + int err; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_pci_disable_dev; + } + + err = pci_request_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM), + dev_driver_string(&pdev->dev)); + if (err) { + dev_err(&pdev->dev, + "pci_request_selected_regions failed 0x%x\n", err); + goto err_pci_disable_dev; + } + + pci_set_master(pdev); + + netdev = devm_alloc_etherdev_mqs(&pdev->dev, + sizeof(struct wx), + NGBEVF_MAX_TX_QUEUES, + NGBEVF_MAX_RX_QUEUES); + if (!netdev) { + err = -ENOMEM; + goto err_pci_release_regions; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + wx = netdev_priv(netdev); + wx->netdev = netdev; + wx->pdev = pdev; + + wx->msg_enable = netif_msg_init(-1, NETIF_MSG_DRV | + NETIF_MSG_PROBE | NETIF_MSG_LINK); + wx->hw_addr = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!wx->hw_addr) { + err = -EIO; + goto err_pci_release_regions; + } + + netdev->netdev_ops = &ngbevf_netdev_ops; + + /* setup the private structure */ + err = ngbevf_sw_init(wx); + if (err) + goto err_pci_release_regions; + + netdev->features |= NETIF_F_HIGHDMA; + + eth_hw_addr_set(netdev, wx->mac.perm_addr); + ether_addr_copy(netdev->perm_addr, wx->mac.addr); + + wxvf_init_service(wx); + err = wx_init_interrupt_scheme(wx); + if (err) + goto err_free_sw_init; + + err = register_netdev(netdev); + if (err) + goto err_register; + + pci_set_drvdata(pdev, wx); + netif_tx_stop_all_queues(netdev); + + return 0; + +err_register: + wx_clear_interrupt_scheme(wx); +err_free_sw_init: + timer_delete_sync(&wx->service_timer); + cancel_work_sync(&wx->service_task); + kfree(wx->vfinfo); + kfree(wx->rss_key); + kfree(wx->mac_table); +err_pci_release_regions: + pci_release_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM)); +err_pci_disable_dev: + pci_disable_device(pdev); + return err; +} + +/** + * ngbevf_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ngbevf_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ +static void ngbevf_remove(struct pci_dev *pdev) +{ + wxvf_remove(pdev); +} + +static DEFINE_SIMPLE_DEV_PM_OPS(ngbevf_pm_ops, wxvf_suspend, wxvf_resume); + +static struct pci_driver ngbevf_driver = { + .name = KBUILD_MODNAME, + .id_table = ngbevf_pci_tbl, + .probe = ngbevf_probe, + .remove = ngbevf_remove, + .shutdown = wxvf_shutdown, + /* Power Management Hooks */ + .driver.pm = pm_sleep_ptr(&ngbevf_pm_ops) +}; + +module_pci_driver(ngbevf_driver); + +MODULE_DEVICE_TABLE(pci, ngbevf_pci_tbl); +MODULE_AUTHOR("Beijing WangXun Technology Co., Ltd, <software@trustnetic.com>"); +MODULE_DESCRIPTION("WangXun(R) Gigabit PCI Express Network Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wangxun/ngbevf/ngbevf_type.h b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_type.h new file mode 100644 index 000000000000..67e761089e99 --- /dev/null +++ b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_type.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#ifndef _NGBEVF_TYPE_H_ +#define _NGBEVF_TYPE_H_ + +/* Device IDs */ +#define NGBEVF_DEV_ID_EM_WX1860AL_W 0x0110 +#define NGBEVF_DEV_ID_EM_WX1860A2 0x0111 +#define NGBEVF_DEV_ID_EM_WX1860A2S 0x0112 +#define NGBEVF_DEV_ID_EM_WX1860A4 0x0113 +#define NGBEVF_DEV_ID_EM_WX1860A4S 0x0114 +#define NGBEVF_DEV_ID_EM_WX1860AL2 0x0115 +#define NGBEVF_DEV_ID_EM_WX1860AL2S 0x0116 +#define NGBEVF_DEV_ID_EM_WX1860AL4 0x0117 +#define NGBEVF_DEV_ID_EM_WX1860AL4S 0x0118 +#define NGBEVF_DEV_ID_EM_WX1860NCSI 0x0119 +#define NGBEVF_DEV_ID_EM_WX1860A1 0x011a +#define NGBEVF_DEV_ID_EM_WX1860AL1 0x011b + +#define NGBEVF_MAX_MSIX_VECTORS 1 +#define NGBEVF_MAX_RX_QUEUES 1 +#define NGBEVF_MAX_TX_QUEUES 1 +#define NGBEVF_DEFAULT_TXD 128 +#define NGBEVF_DEFAULT_RXD 128 +#define NGBEVF_DEFAULT_TX_WORK 256 +#define NGBEVF_DEFAULT_RX_WORK 256 + +#endif /* _NGBEVF_TYPE_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c index 7dbcf41750c1..dc87ccad9652 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c @@ -294,6 +294,7 @@ static void txgbe_mac_link_up_aml(struct phylink_config *config, wx_fc_enable(wx, tx_pause, rx_pause); txgbe_reconfig_mac(wx); + txgbe_enable_sec_tx_path(wx); txcfg = rd32(wx, TXGBE_AML_MAC_TX_CFG); txcfg &= ~TXGBE_AML_MAC_TX_CFG_SPEED_MASK; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index 20b9a28bcb55..3885283681ec 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c @@ -31,7 +31,7 @@ void txgbe_irq_enable(struct wx *wx, bool queues) wr32(wx, WX_PX_MISC_IEN, misc_ien); /* unmask interrupt */ - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); if (queues) wx_intr_enable(wx, TXGBE_INTR_QALL(wx)); } @@ -78,7 +78,6 @@ free_queue_irqs: free_irq(wx->msix_q_entries[vector].vector, wx->q_vector[vector]); } - wx_reset_interrupt_capability(wx); return err; } @@ -132,7 +131,7 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data) txgbe->eicr = eicr; if (eicr & TXGBE_PX_MISC_IC_VF_MBOX) { wx_msg_task(txgbe->wx); - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); } return IRQ_WAKE_THREAD; } @@ -184,7 +183,7 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data) nhandled++; } - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); } @@ -211,6 +210,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe) free_irq(txgbe->link_irq, txgbe); free_irq(txgbe->misc.irq, txgbe); txgbe_del_irq_domain(txgbe); + txgbe->wx->misc_irq_domain = false; } int txgbe_setup_misc_irq(struct txgbe *txgbe) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index f3d2778b8e35..a5867f3c93fc 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -458,10 +458,14 @@ static int txgbe_open(struct net_device *netdev) wx_configure(wx); - err = txgbe_request_queue_irqs(wx); + err = txgbe_setup_misc_irq(wx->priv); if (err) goto err_free_resources; + err = txgbe_request_queue_irqs(wx); + if (err) + goto err_free_misc_irq; + /* Notify the stack of the actual queue counts. */ err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues); if (err) @@ -479,6 +483,9 @@ static int txgbe_open(struct net_device *netdev) err_free_irq: wx_free_irq(wx); +err_free_misc_irq: + txgbe_free_misc_irq(wx->priv); + wx_reset_interrupt_capability(wx); err_free_resources: wx_free_resources(wx); err_reset: @@ -519,6 +526,7 @@ static int txgbe_close(struct net_device *netdev) wx_ptp_stop(wx); txgbe_down(wx); wx_free_irq(wx); + txgbe_free_misc_irq(wx->priv); wx_free_resources(wx); txgbe_fdir_filter_exit(wx); wx_control_hw(wx, false); @@ -564,7 +572,6 @@ static void txgbe_shutdown(struct pci_dev *pdev) int txgbe_setup_tc(struct net_device *dev, u8 tc) { struct wx *wx = netdev_priv(dev); - struct txgbe *txgbe = wx->priv; /* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the @@ -575,7 +582,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) else txgbe_reset(wx); - txgbe_free_misc_irq(txgbe); wx_clear_interrupt_scheme(wx); if (tc) @@ -584,7 +590,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) netdev_reset_tc(dev); wx_init_interrupt_scheme(wx); - txgbe_setup_misc_irq(txgbe); if (netif_running(dev)) txgbe_open(dev); @@ -882,13 +887,9 @@ static int txgbe_probe(struct pci_dev *pdev, txgbe_init_fdir(txgbe); - err = txgbe_setup_misc_irq(txgbe); - if (err) - goto err_release_hw; - err = txgbe_init_phy(txgbe); if (err) - goto err_free_misc_irq; + goto err_release_hw; err = register_netdev(netdev); if (err) @@ -916,8 +917,6 @@ static int txgbe_probe(struct pci_dev *pdev, err_remove_phy: txgbe_remove_phy(txgbe); -err_free_misc_irq: - txgbe_free_misc_irq(txgbe); err_release_hw: wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); @@ -957,7 +956,6 @@ static void txgbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); txgbe_remove_phy(txgbe); - txgbe_free_misc_irq(txgbe); wx_free_isb_resources(wx); pci_release_selected_regions(pdev, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 42ec815159e8..41915d7dd372 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -302,8 +302,8 @@ struct txgbe_fdir_filter { #define TXGBE_DEFAULT_RX_WORK 128 #endif -#define TXGBE_INTR_MISC BIT(0) -#define TXGBE_INTR_QALL(A) GENMASK((A)->num_q_vectors, 1) +#define TXGBE_INTR_MISC(A) BIT((A)->num_q_vectors) +#define TXGBE_INTR_QALL(A) (TXGBE_INTR_MISC(A) - 1) #define TXGBE_MAX_EITR GENMASK(11, 3) diff --git a/drivers/net/ethernet/wangxun/txgbevf/Makefile b/drivers/net/ethernet/wangxun/txgbevf/Makefile new file mode 100644 index 000000000000..4c7e6de04424 --- /dev/null +++ b/drivers/net/ethernet/wangxun/txgbevf/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. +# +# Makefile for the Wangxun(R) 10/25/40GbE virtual functions driver +# + +obj-$(CONFIG_TXGBE) += txgbevf.o + +txgbevf-objs := txgbevf_main.o diff --git a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c new file mode 100644 index 000000000000..ebfce3cf753e --- /dev/null +++ b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/etherdevice.h> + +#include "../libwx/wx_type.h" +#include "../libwx/wx_hw.h" +#include "../libwx/wx_lib.h" +#include "../libwx/wx_mbx.h" +#include "../libwx/wx_vf.h" +#include "../libwx/wx_vf_common.h" +#include "txgbevf_type.h" + +/* txgbevf_pci_tbl - PCI Device ID Table + * + * Wildcard entries (PCI_ANY_ID) should come last + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, + * Class, Class Mask, private data (not used) } + */ +static const struct pci_device_id txgbevf_pci_tbl[] = { + { PCI_VDEVICE(WANGXUN, TXGBEVF_DEV_ID_SP1000), 0}, + { PCI_VDEVICE(WANGXUN, TXGBEVF_DEV_ID_WX1820), 0}, + { PCI_VDEVICE(WANGXUN, TXGBEVF_DEV_ID_AML500F), 0}, + { PCI_VDEVICE(WANGXUN, TXGBEVF_DEV_ID_AML510F), 0}, + { PCI_VDEVICE(WANGXUN, TXGBEVF_DEV_ID_AML5024), 0}, + { PCI_VDEVICE(WANGXUN, TXGBEVF_DEV_ID_AML5124), 0}, + { PCI_VDEVICE(WANGXUN, TXGBEVF_DEV_ID_AML503F), 0}, + { PCI_VDEVICE(WANGXUN, TXGBEVF_DEV_ID_AML513F), 0}, + /* required last entry */ + { .device = 0 } +}; + +static const struct net_device_ops txgbevf_netdev_ops = { + .ndo_open = wxvf_open, + .ndo_stop = wxvf_close, + .ndo_start_xmit = wx_xmit_frame, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = wx_set_mac_vf, +}; + +static void txgbevf_set_num_queues(struct wx *wx) +{ + u32 def_q = 0, num_tcs = 0; + u16 rss, queue; + int ret = 0; + + /* Start with base case */ + wx->num_rx_queues = 1; + wx->num_tx_queues = 1; + + spin_lock_bh(&wx->mbx.mbx_lock); + /* fetch queue configuration from the PF */ + ret = wx_get_queues_vf(wx, &num_tcs, &def_q); + spin_unlock_bh(&wx->mbx.mbx_lock); + + if (ret) + return; + + /* we need as many queues as traffic classes */ + if (num_tcs > 1) { + wx->num_rx_queues = num_tcs; + } else { + rss = min_t(u16, num_online_cpus(), TXGBEVF_MAX_RSS_NUM); + queue = min_t(u16, wx->mac.max_rx_queues, wx->mac.max_tx_queues); + rss = min_t(u16, queue, rss); + + if (wx->vfinfo->vf_api >= wx_mbox_api_13) { + wx->num_rx_queues = rss; + wx->num_tx_queues = rss; + } + } +} + +static void txgbevf_init_type_code(struct wx *wx) +{ + switch (wx->device_id) { + case TXGBEVF_DEV_ID_SP1000: + case TXGBEVF_DEV_ID_WX1820: + wx->mac.type = wx_mac_sp; + break; + case TXGBEVF_DEV_ID_AML500F: + case TXGBEVF_DEV_ID_AML510F: + case TXGBEVF_DEV_ID_AML5024: + case TXGBEVF_DEV_ID_AML5124: + case TXGBEVF_DEV_ID_AML503F: + case TXGBEVF_DEV_ID_AML513F: + wx->mac.type = wx_mac_aml; + break; + default: + wx->mac.type = wx_mac_unknown; + break; + } +} + +static int txgbevf_sw_init(struct wx *wx) +{ + struct net_device *netdev = wx->netdev; + struct pci_dev *pdev = wx->pdev; + int err; + + /* Initialize pcie info and common capability flags */ + err = wx_sw_init(wx); + if (err < 0) + goto err_wx_sw_init; + + /* Initialize the mailbox */ + err = wx_init_mbx_params_vf(wx); + if (err) + goto err_init_mbx_params; + + /* max q_vectors */ + wx->mac.max_msix_vectors = TXGBEVF_MAX_MSIX_VECTORS; + /* Initialize the device type */ + txgbevf_init_type_code(wx); + /* lock to protect mailbox accesses */ + spin_lock_init(&wx->mbx.mbx_lock); + + err = wx_reset_hw_vf(wx); + if (err) { + wx_err(wx, "PF still in reset state. Is the PF interface up?\n"); + goto err_reset_hw; + } + wx_init_hw_vf(wx); + wx_negotiate_api_vf(wx); + if (is_zero_ether_addr(wx->mac.addr)) + dev_info(&pdev->dev, + "MAC address not assigned by administrator.\n"); + eth_hw_addr_set(netdev, wx->mac.addr); + + if (!is_valid_ether_addr(netdev->dev_addr)) { + dev_info(&pdev->dev, "Assigning random MAC address\n"); + eth_hw_addr_random(netdev); + ether_addr_copy(wx->mac.addr, netdev->dev_addr); + ether_addr_copy(wx->mac.perm_addr, netdev->dev_addr); + } + + wx->mac.max_tx_queues = TXGBEVF_MAX_TX_QUEUES; + wx->mac.max_rx_queues = TXGBEVF_MAX_RX_QUEUES; + /* Enable dynamic interrupt throttling rates */ + wx->rx_itr_setting = 1; + wx->tx_itr_setting = 1; + /* set default ring sizes */ + wx->tx_ring_count = TXGBEVF_DEFAULT_TXD; + wx->rx_ring_count = TXGBEVF_DEFAULT_RXD; + /* set default work limits */ + wx->tx_work_limit = TXGBEVF_DEFAULT_TX_WORK; + wx->rx_work_limit = TXGBEVF_DEFAULT_RX_WORK; + + wx->set_num_queues = txgbevf_set_num_queues; + + return 0; +err_reset_hw: + kfree(wx->vfinfo); +err_init_mbx_params: + kfree(wx->rss_key); + kfree(wx->mac_table); +err_wx_sw_init: + return err; +} + +/** + * txgbevf_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in txgbevf_pci_tbl + * + * Return: return 0 on success, negative on failure + * + * txgbevf_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +static int txgbevf_probe(struct pci_dev *pdev, + const struct pci_device_id __always_unused *ent) +{ + struct net_device *netdev; + struct wx *wx = NULL; + int err; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_pci_disable_dev; + } + + err = pci_request_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM), + dev_driver_string(&pdev->dev)); + if (err) { + dev_err(&pdev->dev, + "pci_request_selected_regions failed 0x%x\n", err); + goto err_pci_disable_dev; + } + + pci_set_master(pdev); + + netdev = devm_alloc_etherdev_mqs(&pdev->dev, + sizeof(struct wx), + TXGBEVF_MAX_TX_QUEUES, + TXGBEVF_MAX_RX_QUEUES); + if (!netdev) { + err = -ENOMEM; + goto err_pci_release_regions; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + wx = netdev_priv(netdev); + wx->netdev = netdev; + wx->pdev = pdev; + + wx->msg_enable = netif_msg_init(-1, NETIF_MSG_DRV | + NETIF_MSG_PROBE | NETIF_MSG_LINK); + wx->hw_addr = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!wx->hw_addr) { + err = -EIO; + goto err_pci_release_regions; + } + + wx->b4_addr = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, 4), + pci_resource_len(pdev, 4)); + if (!wx->b4_addr) { + err = -EIO; + goto err_pci_release_regions; + } + + netdev->netdev_ops = &txgbevf_netdev_ops; + + /* setup the private structure */ + err = txgbevf_sw_init(wx); + if (err) + goto err_pci_release_regions; + + netdev->features |= NETIF_F_HIGHDMA; + + eth_hw_addr_set(netdev, wx->mac.perm_addr); + ether_addr_copy(netdev->perm_addr, wx->mac.addr); + + wxvf_init_service(wx); + err = wx_init_interrupt_scheme(wx); + if (err) + goto err_free_sw_init; + + err = register_netdev(netdev); + if (err) + goto err_register; + + pci_set_drvdata(pdev, wx); + netif_tx_stop_all_queues(netdev); + + return 0; + +err_register: + wx_clear_interrupt_scheme(wx); +err_free_sw_init: + timer_delete_sync(&wx->service_timer); + cancel_work_sync(&wx->service_task); + kfree(wx->vfinfo); + kfree(wx->rss_key); + kfree(wx->mac_table); +err_pci_release_regions: + pci_release_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM)); +err_pci_disable_dev: + pci_disable_device(pdev); + return err; +} + +/** + * txgbevf_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * txgbevf_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ +static void txgbevf_remove(struct pci_dev *pdev) +{ + wxvf_remove(pdev); +} + +static DEFINE_SIMPLE_DEV_PM_OPS(txgbevf_pm_ops, wxvf_suspend, wxvf_resume); + +static struct pci_driver txgbevf_driver = { + .name = KBUILD_MODNAME, + .id_table = txgbevf_pci_tbl, + .probe = txgbevf_probe, + .remove = txgbevf_remove, + .shutdown = wxvf_shutdown, + /* Power Management Hooks */ + .driver.pm = pm_sleep_ptr(&txgbevf_pm_ops) +}; + +module_pci_driver(txgbevf_driver); + +MODULE_DEVICE_TABLE(pci, txgbevf_pci_tbl); +MODULE_AUTHOR("Beijing WangXun Technology Co., Ltd, <software@trustnetic.com>"); +MODULE_DESCRIPTION("WangXun(R) 10/25/40 Gigabit Virtual Function Network Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_type.h b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_type.h new file mode 100644 index 000000000000..1364d2b58bb0 --- /dev/null +++ b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_type.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */ + +#ifndef _TXGBEVF_TYPE_H_ +#define _TXGBEVF_TYPE_H_ + +/* Device IDs */ +#define TXGBEVF_DEV_ID_SP1000 0x1000 +#define TXGBEVF_DEV_ID_WX1820 0x2000 +#define TXGBEVF_DEV_ID_AML500F 0x500F +#define TXGBEVF_DEV_ID_AML510F 0x510F +#define TXGBEVF_DEV_ID_AML5024 0x5024 +#define TXGBEVF_DEV_ID_AML5124 0x5124 +#define TXGBEVF_DEV_ID_AML503F 0x503f +#define TXGBEVF_DEV_ID_AML513F 0x513f + +#define TXGBEVF_MAX_MSIX_VECTORS 2 +#define TXGBEVF_MAX_RSS_NUM 4 +#define TXGBEVF_MAX_RX_QUEUES 4 +#define TXGBEVF_MAX_TX_QUEUES 4 +#define TXGBEVF_DEFAULT_TXD 128 +#define TXGBEVF_DEFAULT_RXD 128 +#define TXGBEVF_DEFAULT_TX_WORK 256 +#define TXGBEVF_DEFAULT_RX_WORK 256 + +#endif /* _TXGBEVF_TYPE_H_ */ diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index f25f6e2cf58c..25500c5a6928 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -9,7 +9,7 @@ #include <linux/io.h> #include <linux/module.h> #include <linux/of.h> -#include <linux/of_address.h> +#include <linux/of_reserved_mem.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/types.h> @@ -586,7 +586,6 @@ static void ipa_deconfig(struct ipa *ipa) static int ipa_firmware_load(struct device *dev) { const struct firmware *fw; - struct device_node *node; struct resource res; phys_addr_t phys; const char *path; @@ -594,14 +593,7 @@ static int ipa_firmware_load(struct device *dev) void *virt; int ret; - node = of_parse_phandle(dev->of_node, "memory-region", 0); - if (!node) { - dev_err(dev, "DT error getting \"memory-region\" property\n"); - return -EINVAL; - } - - ret = of_address_to_resource(node, 0, &res); - of_node_put(node); + ret = of_reserved_mem_region_to_resource(dev->of_node, 0, &res); if (ret) { dev_err(dev, "error %d getting \"memory-region\" resource\n", ret); diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9b1de54fd483..a60e58ef90c4 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -407,7 +407,7 @@ static int bcm5481x_set_brrmode(struct phy_device *phydev, bool on) static int bcm54811_config_init(struct phy_device *phydev) { struct bcm54xx_phy_priv *priv = phydev->priv; - int err, reg; + int err, reg, exp_sync_ethernet; /* Enable CLK125 MUX on LED4 if ref clock is enabled. */ if (!(phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED)) { @@ -424,6 +424,18 @@ static int bcm54811_config_init(struct phy_device *phydev) if (priv->brr_mode) phydev->autoneg = 0; + /* Enable MII Lite (No TXER, RXER, CRS, COL) if configured */ + if (phydev->interface == PHY_INTERFACE_MODE_MIILITE) + exp_sync_ethernet = BCM_EXP_SYNC_ETHERNET_MII_LITE; + else + exp_sync_ethernet = 0; + + err = bcm_phy_modify_exp(phydev, BCM_EXP_SYNC_ETHERNET, + BCM_EXP_SYNC_ETHERNET_MII_LITE, + exp_sync_ethernet); + if (err < 0) + return err; + return bcm5481x_set_brrmode(phydev, priv->brr_mode); } @@ -655,7 +667,7 @@ static int bcm5481x_read_abilities(struct phy_device *phydev) { struct device_node *np = phydev->mdio.dev.of_node; struct bcm54xx_phy_priv *priv = phydev->priv; - int i, val, err; + int i, val, err, aneg; for (i = 0; i < ARRAY_SIZE(bcm54811_linkmodes); i++) linkmode_clear_bit(bcm54811_linkmodes[i], phydev->supported); @@ -676,9 +688,19 @@ static int bcm5481x_read_abilities(struct phy_device *phydev) if (val < 0) return val; + /* BCM54811 is not capable of LDS but the corresponding bit + * in LRESR is set to 1 and marked "Ignore" in the datasheet. + * So we must read the bcm54811 as unable to auto-negotiate + * in BroadR-Reach mode. + */ + if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811) + aneg = 0; + else + aneg = val & LRESR_LDSABILITY; + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported, - val & LRESR_LDSABILITY); + aneg); linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, phydev->supported, val & LRESR_100_1PAIR); @@ -735,8 +757,15 @@ static int bcm54811_config_aneg(struct phy_device *phydev) /* Aneg firstly. */ if (priv->brr_mode) { - /* BCM54811 is only capable of autonegotiation in IEEE mode */ - phydev->autoneg = 0; + /* BCM54811 is only capable of autonegotiation in IEEE mode. + * In BroadR-Reach mode, disable the Long Distance Signaling, + * the BRR mode autoneg as supported in other Broadcom PHYs. + * This bit is marked as "Reserved" and "Default 1, must be + * written to 0 after every device reset" in the datasheet. + */ + ret = phy_modify(phydev, MII_BCM54XX_LRECR, LRECR_LDSEN, 0); + if (ret < 0) + return ret; ret = bcm_config_lre_aneg(phydev, false); } else { ret = genphy_config_aneg(phydev); diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index c480bb40fa73..605ca20ae192 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -115,6 +115,7 @@ int phy_interface_num_ports(phy_interface_t interface) return 0; case PHY_INTERFACE_MODE_INTERNAL: case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_MIILITE: case PHY_INTERFACE_MODE_GMII: case PHY_INTERFACE_MODE_TBI: case PHY_INTERFACE_MODE_REVMII: diff --git a/drivers/net/phy/phy_caps.c b/drivers/net/phy/phy_caps.c index d11ce1c7e712..2cc9ee97e867 100644 --- a/drivers/net/phy/phy_caps.c +++ b/drivers/net/phy/phy_caps.c @@ -316,6 +316,10 @@ unsigned long phy_caps_from_interface(phy_interface_t interface) link_caps |= BIT(LINK_CAPA_100HD) | BIT(LINK_CAPA_100FD); break; + case PHY_INTERFACE_MODE_MIILITE: + link_caps |= BIT(LINK_CAPA_10FD) | BIT(LINK_CAPA_100FD); + break; + case PHY_INTERFACE_MODE_TBI: case PHY_INTERFACE_MODE_MOCA: case PHY_INTERFACE_MODE_RTBI: diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 67218d278ce6..c7f867b361dd 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -237,6 +237,7 @@ static int phylink_interface_max_speed(phy_interface_t interface) case PHY_INTERFACE_MODE_SMII: case PHY_INTERFACE_MODE_REVMII: case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_MIILITE: return SPEED_100; case PHY_INTERFACE_MODE_TBI: @@ -2709,6 +2710,39 @@ static phy_interface_t phylink_sfp_select_interface(struct phylink *pl, return interface; } +static phy_interface_t phylink_sfp_select_interface_speed(struct phylink *pl, + u32 speed) +{ + phy_interface_t best_interface = PHY_INTERFACE_MODE_NA; + phy_interface_t interface; + u32 max_speed; + int i; + + for (i = 0; i < ARRAY_SIZE(phylink_sfp_interface_preference); i++) { + interface = phylink_sfp_interface_preference[i]; + if (!test_bit(interface, pl->sfp_interfaces)) + continue; + + max_speed = phylink_interface_max_speed(interface); + + /* The logic here is: if speed == max_speed, then we've found + * the best interface. Otherwise we find the interface that + * can just support the requested speed. + */ + if (max_speed >= speed) + best_interface = interface; + + if (max_speed <= speed) + break; + } + + if (best_interface == PHY_INTERFACE_MODE_NA) + phylink_err(pl, "selection of interface failed, speed %u\n", + speed); + + return best_interface; +} + static void phylink_merge_link_mode(unsigned long *dst, const unsigned long *b) { __ETHTOOL_DECLARE_LINK_MODE_MASK(mask); @@ -2911,8 +2945,14 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, * link can be configured correctly. */ if (pl->sfp_bus) { - config.interface = phylink_sfp_select_interface(pl, + if (kset->base.autoneg == AUTONEG_ENABLE) + config.interface = + phylink_sfp_select_interface(pl, config.advertising); + else + config.interface = + phylink_sfp_select_interface_speed(pl, + config.speed); if (config.interface == PHY_INTERFACE_MODE_NA) return -EINVAL; @@ -3536,6 +3576,8 @@ static int phylink_sfp_config_phy(struct phylink *pl, struct phy_device *phy) struct phylink_link_state config; int ret; + /* We're not using pl->sfp_interfaces, so clear it. */ + phy_interface_zero(pl->sfp_interfaces); linkmode_copy(support, phy->supported); memset(&config, 0, sizeof(config)); @@ -3582,7 +3624,6 @@ static int phylink_sfp_config_phy(struct phylink *pl, struct phy_device *phy) static int phylink_sfp_config_optical(struct phylink *pl) { __ETHTOOL_DECLARE_LINK_MODE_MASK(support); - DECLARE_PHY_INTERFACE_MASK(interfaces); struct phylink_link_state config; phy_interface_t interface; int ret; @@ -3596,9 +3637,9 @@ static int phylink_sfp_config_optical(struct phylink *pl) /* Find the union of the supported interfaces by the PCS/MAC and * the SFP module. */ - phy_interface_and(interfaces, pl->config->supported_interfaces, + phy_interface_and(pl->sfp_interfaces, pl->config->supported_interfaces, pl->sfp_interfaces); - if (phy_interface_empty(interfaces)) { + if (phy_interface_empty(pl->sfp_interfaces)) { phylink_err(pl, "unsupported SFP module: no common interface modes\n"); return -EINVAL; } @@ -3614,14 +3655,14 @@ static int phylink_sfp_config_optical(struct phylink *pl) * mask to only those link modes that can be supported. */ ret = phylink_validate_mask(pl, NULL, pl->sfp_support, &config, - interfaces); + pl->sfp_interfaces); if (ret) { phylink_err(pl, "unsupported SFP module: validation with support %*pb failed\n", __ETHTOOL_LINK_MODE_MASK_NBITS, support); return ret; } - interface = phylink_choose_sfp_interface(pl, interfaces); + interface = phylink_choose_sfp_interface(pl, pl->sfp_interfaces); if (interface == PHY_INTERFACE_MODE_NA) { phylink_err(pl, "failed to select SFP interface\n"); return -EINVAL; @@ -3674,6 +3715,13 @@ static int phylink_sfp_module_insert(void *upstream, return phylink_sfp_config_optical(pl); } +static void phylink_sfp_module_remove(void *upstream) +{ + struct phylink *pl = upstream; + + phy_interface_zero(pl->sfp_interfaces); +} + static int phylink_sfp_module_start(void *upstream) { struct phylink *pl = upstream; @@ -3758,6 +3806,7 @@ static const struct sfp_upstream_ops sfp_phylink_ops = { .attach = phylink_sfp_attach, .detach = phylink_sfp_detach, .module_insert = phylink_sfp_module_insert, + .module_remove = phylink_sfp_module_remove, .module_start = phylink_sfp_module_start, .module_stop = phylink_sfp_module_stop, .link_up = phylink_sfp_link_up, diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index 43e604171828..51a132242462 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -27,9 +27,6 @@ #define AT803X_LED_CONTROL 0x18 -#define AT803X_PHY_MMD3_WOL_CTRL 0x8012 -#define AT803X_WOL_EN BIT(5) - #define AT803X_REG_CHIP_CONFIG 0x1f #define AT803X_BT_BX_REG_SEL 0x8000 @@ -916,30 +913,6 @@ static int at8031_config_init(struct phy_device *phydev) return at803x_config_init(phydev); } -static int at8031_set_wol(struct phy_device *phydev, - struct ethtool_wolinfo *wol) -{ - int ret; - - /* First setup MAC address and enable WOL interrupt */ - ret = at803x_set_wol(phydev, wol); - if (ret) - return ret; - - if (wol->wolopts & WAKE_MAGIC) - /* Enable WOL function for 1588 */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - AT803X_PHY_MMD3_WOL_CTRL, - 0, AT803X_WOL_EN); - else - /* Disable WoL function for 1588 */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - AT803X_PHY_MMD3_WOL_CTRL, - AT803X_WOL_EN, 0); - - return ret; -} - static int at8031_config_intr(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; diff --git a/drivers/net/phy/qcom/qca808x.c b/drivers/net/phy/qcom/qca808x.c index 71498c518f0f..6de16c0eaa08 100644 --- a/drivers/net/phy/qcom/qca808x.c +++ b/drivers/net/phy/qcom/qca808x.c @@ -633,7 +633,7 @@ static struct phy_driver qca808x_driver[] = { .handle_interrupt = at803x_handle_interrupt, .get_tunable = at803x_get_tunable, .set_tunable = at803x_set_tunable, - .set_wol = at803x_set_wol, + .set_wol = at8031_set_wol, .get_wol = at803x_get_wol, .get_features = qca808x_get_features, .config_aneg = qca808x_config_aneg, diff --git a/drivers/net/phy/qcom/qcom-phy-lib.c b/drivers/net/phy/qcom/qcom-phy-lib.c index d28815ef56bb..af7d0d8e81be 100644 --- a/drivers/net/phy/qcom/qcom-phy-lib.c +++ b/drivers/net/phy/qcom/qcom-phy-lib.c @@ -115,6 +115,31 @@ int at803x_set_wol(struct phy_device *phydev, } EXPORT_SYMBOL_GPL(at803x_set_wol); +int at8031_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + /* First setup MAC address and enable WOL interrupt */ + ret = at803x_set_wol(phydev, wol); + if (ret) + return ret; + + if (wol->wolopts & WAKE_MAGIC) + /* Enable WOL function for 1588 */ + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_PHY_MMD3_WOL_CTRL, + 0, AT803X_WOL_EN); + else + /* Disable WoL function for 1588 */ + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_PHY_MMD3_WOL_CTRL, + AT803X_WOL_EN, 0); + + return ret; +} +EXPORT_SYMBOL_GPL(at8031_set_wol); + void at803x_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { diff --git a/drivers/net/phy/qcom/qcom.h b/drivers/net/phy/qcom/qcom.h index 4bb541728846..7f7151c8baca 100644 --- a/drivers/net/phy/qcom/qcom.h +++ b/drivers/net/phy/qcom/qcom.h @@ -172,6 +172,9 @@ #define AT803X_LOC_MAC_ADDR_16_31_OFFSET 0x804B #define AT803X_LOC_MAC_ADDR_32_47_OFFSET 0x804A +#define AT803X_PHY_MMD3_WOL_CTRL 0x8012 +#define AT803X_WOL_EN BIT(5) + #define AT803X_DEBUG_ADDR 0x1D #define AT803X_DEBUG_DATA 0x1E @@ -215,6 +218,8 @@ int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg, int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data); int at803x_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +int at8031_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol); void at803x_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); int at803x_ack_interrupt(struct phy_device *phydev); diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 31463b9e5697..b6489da5cfcd 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -155,10 +155,29 @@ static int smsc_phy_reset(struct phy_device *phydev) static int lan87xx_config_aneg(struct phy_device *phydev) { - int rc; + u8 mdix_ctrl; int val; + int rc; + + /* When auto-negotiation is disabled (forced mode), the PHY's + * Auto-MDIX will continue toggling the TX/RX pairs. + * + * To establish a stable link, we must select a fixed MDI mode. + * If the user has not specified a fixed MDI mode (i.e., mdix_ctrl is + * 'auto'), we default to ETH_TP_MDI. This choice of a ETH_TP_MDI mode + * mirrors the behavior the hardware would exhibit if the AUTOMDIX_EN + * strap were configured for a fixed MDI connection. + */ + if (phydev->autoneg == AUTONEG_DISABLE) { + if (phydev->mdix_ctrl == ETH_TP_MDI_AUTO) + mdix_ctrl = ETH_TP_MDI; + else + mdix_ctrl = phydev->mdix_ctrl; + } else { + mdix_ctrl = phydev->mdix_ctrl; + } - switch (phydev->mdix_ctrl) { + switch (mdix_ctrl) { case ETH_TP_MDI: val = SPECIAL_CTRL_STS_OVRRD_AMDIX_; break; @@ -167,7 +186,8 @@ static int lan87xx_config_aneg(struct phy_device *phydev) SPECIAL_CTRL_STS_AMDIX_STATE_; break; case ETH_TP_MDI_AUTO: - val = SPECIAL_CTRL_STS_AMDIX_ENABLE_; + val = SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_; break; default: return genphy_config_aneg(phydev); @@ -183,7 +203,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev) rc |= val; phy_write(phydev, SPECIAL_CTRL_STS, rc); - phydev->mdix = phydev->mdix_ctrl; + phydev->mdix = mdix_ctrl; return genphy_config_aneg(phydev); } @@ -261,6 +281,33 @@ int lan87xx_read_status(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(lan87xx_read_status); +static int lan87xx_phy_config_init(struct phy_device *phydev) +{ + int rc; + + /* The LAN87xx PHY's initial MDI-X mode is determined by the AUTOMDIX_EN + * hardware strap, but the driver cannot read the strap's status. This + * creates an unpredictable initial state. + * + * To ensure consistent and reliable behavior across all boards, + * override the strap configuration on initialization and force the PHY + * into a known state with Auto-MDIX enabled, which is the expected + * default for modern hardware. + */ + rc = phy_modify(phydev, SPECIAL_CTRL_STS, + SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_, + SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_); + if (rc < 0) + return rc; + + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + + return smsc_phy_config_init(phydev); +} + static int lan874x_phy_config_init(struct phy_device *phydev) { u16 val; @@ -695,7 +742,7 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .read_status = lan87xx_read_status, - .config_init = smsc_phy_config_init, + .config_init = lan87xx_phy_config_init, .soft_reset = smsc_phy_reset, .config_aneg = lan87xx_config_aneg, diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index 4de004813560..399ce9febda4 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -860,7 +860,7 @@ out: static int pd692x0_of_get_managers(struct pd692x0_priv *priv, - struct pd692x0_manager manager[PD692X0_MAX_MANAGERS]) + struct pd692x0_manager *manager) { struct device_node *managers_node, *node; int ret, nmanagers, i, j; @@ -1164,7 +1164,7 @@ pd692x0_write_ports_matrix(struct pd692x0_priv *priv, static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) { - struct pd692x0_manager manager[PD692X0_MAX_MANAGERS] = {0}; + struct pd692x0_manager *manager __free(kfree) = NULL; struct pd692x0_priv *priv = to_pd692x0_priv(pcdev); struct pd692x0_matrix port_matrix[PD692X0_MAX_PIS]; int ret, i, j, nmanagers; @@ -1174,6 +1174,10 @@ static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) priv->fw_state != PD692X0_FW_COMPLETE) return 0; + manager = kcalloc(PD692X0_MAX_MANAGERS, sizeof(*manager), GFP_KERNEL); + if (!manager) + return -ENOMEM; + ret = pd692x0_of_get_managers(priv, manager); if (ret < 0) return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 447c37959504..49bcd12a4ac8 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -186,7 +186,8 @@ struct tun_struct { struct net_device *dev; netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ - NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4) + NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4 | \ + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM) int align; int vnet_hdr_sz; @@ -925,6 +926,7 @@ static int tun_net_init(struct net_device *dev) dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + dev->hw_enc_features = dev->hw_features; dev->features = dev->hw_features; dev->vlan_features = dev->features & ~(NETIF_F_HW_VLAN_CTAG_TX | @@ -1698,7 +1700,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *skb; size_t total_len = iov_iter_count(from); size_t len = total_len, align = tun->align, linear; - struct virtio_net_hdr gso = { 0 }; + struct virtio_net_hdr_v1_hash_tunnel hdr; + struct virtio_net_hdr *gso; int good_linear; int copylen; int hdr_len = 0; @@ -1708,6 +1711,15 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; + netdev_features_t features = 0; + + /* + * Keep it easy and always zero the whole buffer, even if the + * tunnel-related field will be touched only when the feature + * is enabled and the hdr size id compatible. + */ + memset(&hdr, 0, sizeof(hdr)); + gso = (struct virtio_net_hdr *)&hdr; if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) @@ -1721,7 +1733,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso); + features = tun_vnet_hdr_guest_features(vnet_hdr_sz); + hdr_len = __tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, + features, from, gso); if (hdr_len < 0) return hdr_len; @@ -1755,7 +1769,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, * (e.g gso or jumbo packet), we will do it at after * skb was created with generic XDP routine. */ - skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); + skb = tun_build_skb(tun, tfile, from, gso, len, &skb_xdp); err = PTR_ERR_OR_ZERO(skb); if (err) goto drop; @@ -1799,7 +1813,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } } - if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) { + if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, &hdr)) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; @@ -2050,13 +2064,21 @@ static ssize_t tun_put_user(struct tun_struct *tun, } if (vnet_hdr_sz) { - struct virtio_net_hdr gso; + struct virtio_net_hdr_v1_hash_tunnel hdr; + struct virtio_net_hdr *gso; - ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso); + ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb, + &hdr); if (ret) return ret; - ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + /* + * Drop the packet if the configured header size is too small + * WRT the enabled offloads. + */ + gso = (struct virtio_net_hdr *)&hdr; + ret = __tun_vnet_hdr_put(vnet_hdr_sz, tun->dev->features, + iter, gso); if (ret) return ret; } @@ -2357,9 +2379,11 @@ static int tun_xdp_one(struct tun_struct *tun, { unsigned int datasize = xdp->data_end - xdp->data; struct virtio_net_hdr *gso = xdp->data_hard_start; + struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr; struct bpf_prog *xdp_prog; struct sk_buff *skb = NULL; struct sk_buff_head *queue; + netdev_features_t features; u32 rxhash = 0, act; int buflen = xdp->frame_sz; int metasize = 0; @@ -2425,7 +2449,9 @@ build: if (metasize > 0) skb_metadata_set(skb, metasize); - if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) { + features = tun_vnet_hdr_guest_features(READ_ONCE(tun->vnet_hdr_sz)); + tnl_hdr = (struct virtio_net_hdr_v1_hash_tunnel *)gso; + if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, tnl_hdr)) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; @@ -2811,6 +2837,8 @@ static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) } +#define PLAIN_GSO (NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6) + /* This is like a cut-down ethtool ops, except done via tun fd so no * privs required. */ static int set_offload(struct tun_struct *tun, unsigned long arg) @@ -2840,6 +2868,18 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) features |= NETIF_F_GSO_UDP_L4; arg &= ~(TUN_F_USO4 | TUN_F_USO6); } + + /* + * Tunnel offload is allowed only if some plain offload is + * available, too. + */ + if (features & PLAIN_GSO && arg & TUN_F_UDP_TUNNEL_GSO) { + features |= NETIF_F_GSO_UDP_TUNNEL; + if (arg & TUN_F_UDP_TUNNEL_GSO_CSUM) + features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + arg &= ~(TUN_F_UDP_TUNNEL_GSO | + TUN_F_UDP_TUNNEL_GSO_CSUM); + } } /* This gives the user a way to test for new features in future by diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h index 58b9ac7a5fc4..81662328b2c7 100644 --- a/drivers/net/tun_vnet.h +++ b/drivers/net/tun_vnet.h @@ -6,6 +6,8 @@ #define TUN_VNET_LE 0x80000000 #define TUN_VNET_BE 0x40000000 +#define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel) + static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags) { bool be = IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && @@ -107,16 +109,26 @@ static inline long tun_vnet_ioctl(int *vnet_hdr_sz, unsigned int *flags, } } -static inline int tun_vnet_hdr_get(int sz, unsigned int flags, - struct iov_iter *from, - struct virtio_net_hdr *hdr) +static inline unsigned int tun_vnet_parse_size(netdev_features_t features) +{ + if (!(features & NETIF_F_GSO_UDP_TUNNEL)) + return sizeof(struct virtio_net_hdr); + + return TUN_VNET_TNL_SIZE; +} + +static inline int __tun_vnet_hdr_get(int sz, unsigned int flags, + netdev_features_t features, + struct iov_iter *from, + struct virtio_net_hdr *hdr) { + unsigned int parsed_size = tun_vnet_parse_size(features); u16 hdr_len; if (iov_iter_count(from) < sz) return -EINVAL; - if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) + if (!copy_from_iter_full(hdr, parsed_size, from)) return -EFAULT; hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len); @@ -129,32 +141,70 @@ static inline int tun_vnet_hdr_get(int sz, unsigned int flags, if (hdr_len > iov_iter_count(from)) return -EINVAL; - iov_iter_advance(from, sz - sizeof(*hdr)); + iov_iter_advance(from, sz - parsed_size); return hdr_len; } -static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter, - const struct virtio_net_hdr *hdr) +static inline int tun_vnet_hdr_get(int sz, unsigned int flags, + struct iov_iter *from, + struct virtio_net_hdr *hdr) +{ + return __tun_vnet_hdr_get(sz, flags, 0, from, hdr); +} + +static inline int __tun_vnet_hdr_put(int sz, netdev_features_t features, + struct iov_iter *iter, + const struct virtio_net_hdr *hdr) { + unsigned int parsed_size = tun_vnet_parse_size(features); + if (unlikely(iov_iter_count(iter) < sz)) return -EINVAL; - if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) + if (unlikely(copy_to_iter(hdr, parsed_size, iter) != parsed_size)) return -EFAULT; - if (iov_iter_zero(sz - sizeof(*hdr), iter) != sz - sizeof(*hdr)) + if (iov_iter_zero(sz - parsed_size, iter) != sz - parsed_size) return -EFAULT; return 0; } +static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr) +{ + return __tun_vnet_hdr_put(sz, 0, iter, hdr); +} + static inline int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, const struct virtio_net_hdr *hdr) { return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags)); } +/* + * Tun is not aware of the negotiated guest features, guess them from the + * virtio net hdr size + */ +static inline netdev_features_t tun_vnet_hdr_guest_features(int vnet_hdr_sz) +{ + if (vnet_hdr_sz >= TUN_VNET_TNL_SIZE) + return NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; + return 0; +} + +static inline int +tun_vnet_hdr_tnl_to_skb(unsigned int flags, netdev_features_t features, + struct sk_buff *skb, + const struct virtio_net_hdr_v1_hash_tunnel *hdr) +{ + return virtio_net_hdr_tnl_to_skb(skb, hdr, + features & NETIF_F_GSO_UDP_TUNNEL, + features & NETIF_F_GSO_UDP_TUNNEL_CSUM, + tun_vnet_is_little_endian(flags)); +} + static inline int tun_vnet_hdr_from_skb(unsigned int flags, const struct net_device *dev, const struct sk_buff *skb, @@ -183,4 +233,37 @@ static inline int tun_vnet_hdr_from_skb(unsigned int flags, return 0; } +static inline int +tun_vnet_hdr_tnl_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr) +{ + bool has_tnl_offload = !!(dev->features & NETIF_F_GSO_UDP_TUNNEL); + int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; + + if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload, + tun_vnet_is_little_endian(flags), + vlan_hlen)) { + struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr; + struct skb_shared_info *sinfo = skb_shinfo(skb); + + if (net_ratelimit()) { + int hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len); + + netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, + tun_vnet16_to_cpu(flags, hdr->gso_size), + tun_vnet16_to_cpu(flags, hdr->hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", DUMP_PREFIX_NONE, + 16, 1, skb->head, min(hdr_len, 64), + true); + } + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} + #endif /* TUN_VNET_H */ diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f3347fb0c400..1ff25f57329a 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -26,7 +26,6 @@ #include <linux/irq.h> #include <linux/irqchip/chained_irq.h> #include <linux/microchipphy.h> -#include <linux/phy_fixed.h> #include <linux/of_mdio.h> #include <linux/of_net.h> #include "lan78xx.h" diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 9564478a79cc..6a3cca104af9 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -861,14 +861,14 @@ int usbnet_stop (struct net_device *net) /* deferred work (timer, softirq, task) must also stop */ dev->flags = 0; timer_delete_sync(&dev->delay); - disable_work_sync(&dev->bh_work); + cancel_work_sync(&dev->bh_work); cancel_work_sync(&dev->kevent); /* We have cyclic dependencies. Those calls are needed * to break a cycle. We cannot fall into the gaps because * we have a flag */ - disable_work_sync(&dev->bh_work); + cancel_work_sync(&dev->bh_work); timer_delete_sync(&dev->delay); cancel_work_sync(&dev->kevent); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 07e41dce4203..64453f4da825 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -35,6 +35,23 @@ module_param(csum, bool, 0444); module_param(gso, bool, 0444); module_param(napi_tx, bool, 0644); +#define VIRTIO_OFFLOAD_MAP_MIN 46 +#define VIRTIO_OFFLOAD_MAP_MAX 47 +#define VIRTIO_FEATURES_MAP_MIN 65 +#define VIRTIO_O2F_DELTA (VIRTIO_FEATURES_MAP_MIN - \ + VIRTIO_OFFLOAD_MAP_MIN) + +static bool virtio_is_mapped_offload(unsigned int obit) +{ + return obit >= VIRTIO_OFFLOAD_MAP_MIN && + obit <= VIRTIO_OFFLOAD_MAP_MAX; +} + +static unsigned int virtio_offload_to_feature(unsigned int obit) +{ + return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit; +} + /* FIXME: MTU in config. */ #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) #define GOOD_COPY_LEN 128 @@ -62,15 +79,19 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, - VIRTIO_NET_F_GUEST_HDRLEN + VIRTIO_NET_F_GUEST_HDRLEN, + VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED, + VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED, }; #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ - (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ - (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ - (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ - (1ULL << VIRTIO_NET_F_GUEST_USO6)) + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ + (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_USO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | \ + (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)) struct virtnet_stat_desc { char desc[ETH_GSTRING_LEN]; @@ -423,6 +444,13 @@ struct virtnet_info { /* Work struct for delayed refilling if we run low on memory. */ struct delayed_work refill; + /* UDP tunnel support */ + bool tx_tnl; + + bool rx_tnl; + + bool rx_tnl_csum; + /* Is delayed refill enabled? */ bool refill_enabled; @@ -482,6 +510,7 @@ struct virtio_net_common_hdr { struct virtio_net_hdr hdr; struct virtio_net_hdr_mrg_rxbuf mrg_hdr; struct virtio_net_hdr_v1_hash hash_v1_hdr; + struct virtio_net_hdr_v1_hash_tunnel tnl_hdr; }; }; @@ -778,6 +807,26 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); } +static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, + unsigned int len) +{ + unsigned int headroom, tailroom, room, truesize; + + truesize = mergeable_ctx_to_truesize(mrg_ctx); + headroom = mergeable_ctx_to_headroom(mrg_ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + + if (len > truesize - room) { + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)(truesize - room)); + DEV_STATS_INC(dev, rx_length_errors); + return -1; + } + + return 0; +} + static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, unsigned int headroom, unsigned int len) @@ -1084,7 +1133,7 @@ static bool tx_may_stop(struct virtnet_info *vi, * Since most packets only take 1 or 2 ring slots, stopping the queue * early means 16 slots are typically wasted. */ - if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); netif_tx_stop_queue(txq); @@ -1116,7 +1165,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ free_old_xmit(sq, txq, false); - if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { netif_start_subqueue(dev, qnum); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -1127,15 +1176,29 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } } +/* Note that @len is the length of received data without virtio header */ static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, - struct receive_queue *rq, void *buf, u32 len) + struct receive_queue *rq, void *buf, + u32 len, bool first_buf) { struct xdp_buff *xdp; u32 bufsize; xdp = (struct xdp_buff *)buf; - bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; + /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for + * virtio header and ask the vhost to fill data from + * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len + * The first buffer has virtio header so the remaining region for frame + * data is + * xsk_pool_get_rx_frame_size() + * While other buffers than the first one do not have virtio header, so + * the maximum frame data's length can be + * xsk_pool_get_rx_frame_size() + vi->hdr_len + */ + bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); + if (!first_buf) + bufsize += vi->hdr_len; if (unlikely(len > bufsize)) { pr_debug("%s: rx error: len %u exceeds truesize %u\n", @@ -1145,7 +1208,14 @@ static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, return NULL; } - xsk_buff_set_size(xdp, len); + if (first_buf) { + xsk_buff_set_size(xdp, len); + } else { + xdp_prepare_buff(xdp, xdp->data_hard_start, + XDP_PACKET_HEADROOM - vi->hdr_len, len, 1); + xdp->flags = 0; + } + xsk_buff_dma_sync_for_cpu(xdp); return xdp; @@ -1260,7 +1330,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi, u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, false); if (!xdp) goto err; @@ -1270,7 +1340,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi, goto err; } - memcpy(buf, xdp->data - vi->hdr_len, len); + memcpy(buf, xdp->data, len); xsk_buff_free(xdp); @@ -1358,7 +1428,7 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, true); if (!xdp) return; @@ -1797,7 +1867,8 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi) * across multiple buffers (num_buf > 1), and we make sure buffers * have enough headroom. */ -static struct page *xdp_linearize_page(struct receive_queue *rq, +static struct page *xdp_linearize_page(struct net_device *dev, + struct receive_queue *rq, int *num_buf, struct page *p, int offset, @@ -1817,18 +1888,27 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, memcpy(page_address(page) + page_off, page_address(p) + offset, *len); page_off += *len; + /* Only mergeable mode can go inside this while loop. In small mode, + * *num_buf == 1, so it cannot go inside. + */ while (--*num_buf) { unsigned int buflen; void *buf; + void *ctx; int off; - buf = virtnet_rq_get_buf(rq, &buflen, NULL); + buf = virtnet_rq_get_buf(rq, &buflen, &ctx); if (unlikely(!buf)) goto err_buf; p = virt_to_head_page(buf); off = buf - page_address(p); + if (check_mergeable_len(dev, ctx, buflen)) { + put_page(p); + goto err_buf; + } + /* guard against a misconfigured or uncooperative backend that * is sending packet larger than the MTU. */ @@ -1917,7 +1997,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, headroom = vi->hdr_len + header_offset; buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - xdp_page = xdp_linearize_page(rq, &num_buf, page, + xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, offset, header_offset, &tlen); if (!xdp_page) @@ -2126,10 +2206,9 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; - unsigned int headroom, tailroom, room; - unsigned int truesize, cur_frag_size; struct skb_shared_info *shinfo; unsigned int xdp_frags_truesz = 0; + unsigned int truesize; struct page *page; skb_frag_t *frag; int offset; @@ -2172,21 +2251,14 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, page = virt_to_head_page(buf); offset = buf - page_address(page); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - - cur_frag_size = truesize; - xdp_frags_truesz += cur_frag_size; - if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { + if (check_mergeable_len(dev, ctx, len)) { put_page(page); - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); goto err; } + truesize = mergeable_ctx_to_truesize(ctx); + xdp_frags_truesz += truesize; + frag = &shinfo->frags[shinfo->nr_frags++]; skb_frag_fill_page_desc(frag, page, offset, len); if (page_is_pfmemalloc(page)) @@ -2252,7 +2324,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, */ if (!xdp_prog->aux->xdp_has_frags) { /* linearize data for XDP */ - xdp_page = xdp_linearize_page(rq, num_buf, + xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, *page, offset, XDP_PACKET_HEADROOM, len); @@ -2400,18 +2472,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct sk_buff *head_skb, *curr_skb; unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); - unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); head_skb = NULL; u64_stats_add(&stats->bytes, len - vi->hdr_len); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } if (unlikely(vi->xdp_enabled)) { struct bpf_prog *xdp_prog; @@ -2446,17 +2512,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } + truesize = mergeable_ctx_to_truesize(ctx); curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) @@ -2515,14 +2574,21 @@ static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue * if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) virtio_skb_set_hash(&hdr->hash_v1_hdr, skb); - if (flags & VIRTIO_NET_HDR_F_DATA_VALID) - skb->ip_summed = CHECKSUM_UNNECESSARY; + hdr->hdr.flags = flags; + if (virtio_net_handle_csum_offload(skb, &hdr->hdr, vi->rx_tnl_csum)) { + net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n", + dev->name, hdr->hdr.flags, + hdr->hdr.gso_type, vi->rx_tnl_csum); + goto frame_err; + } - if (virtio_net_hdr_to_skb(skb, &hdr->hdr, - virtio_is_little_endian(vi->vdev))) { - net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", + if (virtio_net_hdr_tnl_to_skb(skb, &hdr->tnl_hdr, vi->rx_tnl, + vi->rx_tnl_csum, + virtio_is_little_endian(vi->vdev))) { + net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n", dev->name, hdr->hdr.gso_type, - hdr->hdr.gso_size); + hdr->hdr.gso_size, hdr->hdr.flags, + vi->rx_tnl, vi->rx_tnl_csum); goto frame_err; } @@ -2998,7 +3064,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) free_old_xmit(sq, txq, !!budget); } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3195,7 +3261,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) else free_old_xmit(sq, txq, !!budget); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3234,9 +3300,9 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) { - struct virtio_net_hdr_mrg_rxbuf *hdr; const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; struct virtnet_info *vi = sq->vq->vdev->priv; + struct virtio_net_hdr_v1_hash_tunnel *hdr; int num_sg; unsigned hdr_len = vi->hdr_len; bool can_push; @@ -3249,17 +3315,17 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) /* Even if we can, don't push here yet as this would skew * csum_start offset below. */ if (can_push) - hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len); + hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data - + hdr_len); else - hdr = &skb_vnet_common_hdr(skb)->mrg_hdr; + hdr = &skb_vnet_common_hdr(skb)->tnl_hdr; - if (virtio_net_hdr_from_skb(skb, &hdr->hdr, - virtio_is_little_endian(vi->vdev), false, - 0)) + if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl, + virtio_is_little_endian(vi->vdev), 0)) return -EPROTO; if (vi->mergeable_rx_bufs) - hdr->num_buffers = 0; + hdr->hash_hdr.hdr.num_buffers = 0; sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); if (can_push) { @@ -3481,6 +3547,12 @@ static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, { int qindex, err; + if (ring_num <= MAX_SKB_FRAGS + 2) { + netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", + ring_num, MAX_SKB_FRAGS + 2); + return -EINVAL; + } + qindex = sq - vi->sq; virtnet_tx_pause(vi, sq); @@ -6748,10 +6820,20 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) dev->hw_features |= NETIF_F_GSO_UDP_L4; + if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + dev->hw_enc_features = dev->hw_features; + } + if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL && + virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + dev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + dev->features |= NETIF_F_GSO_ROBUST; if (gso) - dev->features |= dev->hw_features & NETIF_F_ALL_TSO; + dev->features |= dev->hw_features; /* (!csum && gso) case will be fixed by register_netdev() */ } @@ -6844,7 +6926,10 @@ static int virtnet_probe(struct virtio_device *vdev) dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; } - if (vi->has_rss_hash_report) + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) || + virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) + vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel); + else if (vi->has_rss_hash_report) vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) @@ -6852,6 +6937,13 @@ static int virtnet_probe(struct virtio_device *vdev) else vi->hdr_len = sizeof(struct virtio_net_hdr); + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM)) + vi->rx_tnl_csum = true; + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)) + vi->rx_tnl = true; + if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) + vi->tx_tnl = true; + if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->any_header_sg = true; @@ -7026,9 +7118,13 @@ static int virtnet_probe(struct virtio_device *vdev) netif_carrier_on(dev); } - for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) - if (virtio_has_feature(vi->vdev, guest_offloads[i])) + for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) { + unsigned int fbit; + + fbit = virtio_offload_to_feature(guest_offloads[i]); + if (virtio_has_feature(vi->vdev, fbit)) set_bit(guest_offloads[i], &vi->guest_offloads); + } vi->guest_offloads_capable = vi->guest_offloads; rtnl_unlock(); @@ -7158,6 +7254,10 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTNET_FEATURES, + VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, + VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, + VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, + VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, }; static unsigned int features_legacy[] = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index b5ad6d635fcb..50d454514fe5 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -10,10 +10,10 @@ #include "fw/api/txq.h" /* Highest firmware API version supported */ -#define IWL_BZ_UCODE_API_MAX 99 +#define IWL_BZ_UCODE_API_MAX 102 /* Lowest firmware API version supported */ -#define IWL_BZ_UCODE_API_MIN 94 +#define IWL_BZ_UCODE_API_MIN 98 /* Memory offsets and lengths */ #define IWL_BZ_SMEM_OFFSET 0x400000 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c index 95aa27c35357..807f4e29d55a 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c @@ -9,7 +9,7 @@ #include "fw/api/txq.h" /* Highest firmware API version supported */ -#define IWL_DR_UCODE_API_MAX 99 +#define IWL_DR_UCODE_API_MAX 102 /* Lowest firmware API version supported */ #define IWL_DR_UCODE_API_MIN 98 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index 12c2adb4b5c4..97e503a25eae 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware API version supported */ -#define IWL_SC_UCODE_API_MAX 99 +#define IWL_SC_UCODE_API_MAX 102 /* Lowest firmware API version supported */ #define IWL_SC_UCODE_API_MIN 98 diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/eeprom.c b/drivers/net/wireless/intel/iwlwifi/dvm/eeprom.c index 2423125e5284..8087aee03d1c 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/eeprom.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/eeprom.c @@ -676,10 +676,9 @@ static int iwl_eeprom_acquire_semaphore(struct iwl_trans *trans) CSR_HW_IF_CONFIG_REG_EEPROM_OWN_SEM); /* See if we got it */ - ret = iwl_poll_bit(trans, CSR_HW_IF_CONFIG_REG, - CSR_HW_IF_CONFIG_REG_EEPROM_OWN_SEM, - CSR_HW_IF_CONFIG_REG_EEPROM_OWN_SEM, - IWL_EEPROM_SEM_TIMEOUT); + ret = iwl_poll_bits(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_EEPROM_OWN_SEM, + IWL_EEPROM_SEM_TIMEOUT); if (ret >= 0) { IWL_DEBUG_EEPROM(trans->dev, "Acquired semaphore after %d tries.\n", @@ -797,10 +796,9 @@ static int iwl_read_otp_word(struct iwl_trans *trans, u16 addr, iwl_write32(trans, CSR_EEPROM_REG, CSR_EEPROM_REG_MSK_ADDR & (addr << 1)); - ret = iwl_poll_bit(trans, CSR_EEPROM_REG, - CSR_EEPROM_REG_READ_VALID_MSK, - CSR_EEPROM_REG_READ_VALID_MSK, - IWL_EEPROM_ACCESS_TIMEOUT); + ret = iwl_poll_bits(trans, CSR_EEPROM_REG, + CSR_EEPROM_REG_READ_VALID_MSK, + IWL_EEPROM_ACCESS_TIMEOUT); if (ret < 0) { IWL_ERR(trans, "Time out reading OTP[%d]\n", addr); return ret; @@ -993,10 +991,9 @@ int iwl_read_eeprom(struct iwl_trans *trans, u8 **eeprom, size_t *eeprom_size) iwl_write32(trans, CSR_EEPROM_REG, CSR_EEPROM_REG_MSK_ADDR & (addr << 1)); - ret = iwl_poll_bit(trans, CSR_EEPROM_REG, - CSR_EEPROM_REG_READ_VALID_MSK, - CSR_EEPROM_REG_READ_VALID_MSK, - IWL_EEPROM_ACCESS_TIMEOUT); + ret = iwl_poll_bits(trans, CSR_EEPROM_REG, + CSR_EEPROM_REG_READ_VALID_MSK, + IWL_EEPROM_ACCESS_TIMEOUT); if (ret < 0) { IWL_ERR(trans, "Time out reading EEPROM[%d]\n", addr); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index e015b83bb6e9..2b4dbebc71c2 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1467,7 +1467,8 @@ static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans, /******************** * 6. Setup services ********************/ - if (iwl_setup_deferred_work(priv)) + err = iwl_setup_deferred_work(priv); + if (err) goto out_uninit_drv; iwl_setup_rx_handlers(priv); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h index 9ce819503aed..99554496300a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h @@ -912,7 +912,7 @@ struct iwl_wowlan_mlo_gtk { } __packed; /* WOWLAN_MLO_GTK_KEY_API_S_VER_1 */ /** - * struct iwl_wowlan_info_notif_v4 - WoWLAN information notification + * struct iwl_wowlan_info_notif_v3 - WoWLAN information notification * @gtk: GTK data * @igtk: IGTK data * @bigtk: BIGTK data @@ -927,12 +927,9 @@ struct iwl_wowlan_mlo_gtk { * @tid_tear_down: bit mask of tids whose BA sessions were closed * in suspend state * @station_id: station id - * @num_mlo_link_keys: number of &struct iwl_wowlan_mlo_gtk structs - * following this notif, or reserved in version < 4 * @reserved2: reserved - * @mlo_gtks: array of GTKs of size num_mlo_link_keys for version >= 4 */ -struct iwl_wowlan_info_notif_v4 { +struct iwl_wowlan_info_notif_v3 { struct iwl_wowlan_gtk_status_v3 gtk[WOWLAN_GTK_KEYS_NUM]; struct iwl_wowlan_igtk_status igtk[WOWLAN_IGTK_KEYS_NUM]; struct iwl_wowlan_igtk_status bigtk[WOWLAN_BIGTK_KEYS_NUM]; @@ -946,10 +943,8 @@ struct iwl_wowlan_info_notif_v4 { __le32 received_beacons; u8 tid_tear_down; u8 station_id; - u8 num_mlo_link_keys; - u8 reserved2; - struct iwl_wowlan_mlo_gtk mlo_gtks[]; -} __packed; /* WOWLAN_INFO_NTFY_API_S_VER_3, _VER_4 */ + u8 reserved2[2]; +} __packed; /* WOWLAN_INFO_NTFY_API_S_VER_3 */ /** * struct iwl_wowlan_info_notif - WoWLAN information notification diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 5a1ec880ed72..dc1db563c5eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -104,6 +104,9 @@ enum iwl_ucode_tlv_type { IWL_UCODE_TLV_CURRENT_PC = 68, IWL_UCODE_TLV_FSEQ_BIN_VERSION = 72, + /* contains sub-sections like PNVM file does (did) */ + IWL_UCODE_TLV_PNVM_DATA = 74, + IWL_UCODE_TLV_FW_NUM_STATIONS = IWL_UCODE_TLV_CONST_BASE + 0, IWL_UCODE_TLV_FW_NUM_LINKS = IWL_UCODE_TLV_CONST_BASE + 1, IWL_UCODE_TLV_FW_NUM_BEACONS = IWL_UCODE_TLV_CONST_BASE + 2, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.h b/drivers/net/wireless/intel/iwlwifi/fw/img.h index e055f798a398..5256f20623e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/img.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/img.h @@ -195,6 +195,8 @@ struct iwl_dump_exclude { * @phy_integration_ver_len: length of @phy_integration_ver * @dump_excl: image dump exclusion areas for RT image * @dump_excl_wowlan: image dump exclusion areas for WoWLAN image + * @pnvm_data: PNVM data embedded in the .ucode file, if any + * @pnvm_size: size of the embedded PNVM data */ struct iwl_fw { u32 ucode_ver; @@ -227,6 +229,9 @@ struct iwl_fw { u32 phy_integration_ver_len; struct iwl_dump_exclude dump_excl[2], dump_excl_wowlan[2]; + + const void *pnvm_data; + u32 pnvm_size; }; static inline const char *get_fw_dbg_mode_string(int mode) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index 3bcd375995cc..4d91ae065c8d 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -11,6 +11,7 @@ #include "fw/api/nvm-reg.h" #include "fw/api/alive.h" #include "fw/uefi.h" +#include "fw/img.h" #define IWL_PNVM_REDUCED_CAP_BIT BIT(25) @@ -264,8 +265,8 @@ static int iwl_pnvm_get_from_fs(struct iwl_trans *trans, u8 **data, size_t *len) return 0; } -static u8 *iwl_get_pnvm_image(struct iwl_trans *trans_p, size_t *len, - __le32 sku_id[3]) +static const u8 *iwl_get_pnvm_image(struct iwl_trans *trans_p, size_t *len, + __le32 sku_id[3], const struct iwl_fw *fw) { struct pnvm_sku_package *package; u8 *image = NULL; @@ -290,6 +291,12 @@ static u8 *iwl_get_pnvm_image(struct iwl_trans *trans_p, size_t *len, } } + if (fw->pnvm_data) { + *len = fw->pnvm_size; + + return fw->pnvm_data; + } + /* If it's not available, or for Intel SKU, try from the filesystem */ if (iwl_pnvm_get_from_fs(trans_p, &image, len)) return NULL; @@ -298,11 +305,11 @@ static u8 *iwl_get_pnvm_image(struct iwl_trans *trans_p, size_t *len, static void iwl_pnvm_load_pnvm_to_trans(struct iwl_trans *trans, - const struct iwl_ucode_capabilities *capa, + const struct iwl_fw *fw, __le32 sku_id[3]) { struct iwl_pnvm_image *pnvm_data = NULL; - u8 *data = NULL; + const u8 *data = NULL; size_t length; int ret; @@ -313,7 +320,7 @@ iwl_pnvm_load_pnvm_to_trans(struct iwl_trans *trans, if (trans->pnvm_loaded) goto set; - data = iwl_get_pnvm_image(trans, &length, sku_id); + data = iwl_get_pnvm_image(trans, &length, sku_id, fw); if (!data) { trans->fail_to_parse_pnvm_image = true; return; @@ -329,15 +336,17 @@ iwl_pnvm_load_pnvm_to_trans(struct iwl_trans *trans, goto free; } - ret = iwl_trans_load_pnvm(trans, pnvm_data, capa); + ret = iwl_trans_load_pnvm(trans, pnvm_data, &fw->ucode_capa); if (ret) goto free; IWL_DEBUG_INFO(trans, "loaded PNVM version %08x\n", pnvm_data->version); set: - iwl_trans_set_pnvm(trans, capa); + iwl_trans_set_pnvm(trans, &fw->ucode_capa); free: - kvfree(data); + /* free only if it was allocated, i.e. not just embedded PNVM data */ + if (data != fw->pnvm_data) + kvfree(data); kfree(pnvm_data); } @@ -392,8 +401,7 @@ free: int iwl_pnvm_load(struct iwl_trans *trans, struct iwl_notif_wait_data *notif_wait, - const struct iwl_ucode_capabilities *capa, - __le32 sku_id[3]) + const struct iwl_fw *fw, __le32 sku_id[3]) { struct iwl_notification_wait pnvm_wait; static const u16 ntf_cmds[] = { WIDE_ID(REGULATORY_AND_NVM_GROUP, @@ -403,8 +411,8 @@ int iwl_pnvm_load(struct iwl_trans *trans, if (!sku_id[0] && !sku_id[1] && !sku_id[2]) return 0; - iwl_pnvm_load_pnvm_to_trans(trans, capa, sku_id); - iwl_pnvm_load_reduce_power_to_trans(trans, capa, sku_id); + iwl_pnvm_load_pnvm_to_trans(trans, fw, sku_id); + iwl_pnvm_load_reduce_power_to_trans(trans, &fw->ucode_capa, sku_id); iwl_init_notification_wait(notif_wait, &pnvm_wait, ntf_cmds, ARRAY_SIZE(ntf_cmds), diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h index 9540926e8a0f..ad3b7e2423ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h @@ -7,6 +7,7 @@ #include "iwl-drv.h" #include "fw/notif-wait.h" +#include "fw/img.h" #define MVM_UCODE_PNVM_TIMEOUT (HZ / 4) @@ -14,8 +15,7 @@ int iwl_pnvm_load(struct iwl_trans *trans, struct iwl_notif_wait_data *notif_wait, - const struct iwl_ucode_capabilities *capa, - __le32 sku_id[3]); + const struct iwl_fw *fw, __le32 sku_id[3]); static inline void iwl_pnvm_get_fs_name(struct iwl_trans *trans, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 91f22ce36d74..30e5f5a5cd89 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -488,8 +488,8 @@ struct iwl_dev_info { rf_type:9, match_bw_limit:1, bw_limit:1, - match_rf_step:1, - rf_step:4, + match_discrete:1, + discrete:1, match_rf_id:1, rf_id:4, match_cdb:1, @@ -499,12 +499,13 @@ struct iwl_dev_info { #if IS_ENABLED(CONFIG_IWLWIFI_KUNIT_TESTS) extern const struct iwl_dev_info iwl_dev_info_table[]; extern const unsigned int iwl_dev_info_table_size; -const struct iwl_dev_info * -iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 rf_type, u8 cdb, - u8 rf_id, u8 bw_limit, u8 rf_step); extern const struct pci_device_id iwl_hw_card_ids[]; #endif +const struct iwl_dev_info * +iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 rf_type, u8 cdb, + u8 rf_id, u8 bw_limit, bool discrete); + /* * This list declares the config structures for all devices. */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 6492bc7d1680..f62f7c7ee7f3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -136,6 +136,9 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv) kfree(drv->fw.phy_integration_ver); kfree(drv->trans->dbg.pc_data); drv->trans->dbg.pc_data = NULL; + kvfree(drv->fw.pnvm_data); + drv->fw.pnvm_data = NULL; + drv->fw.pnvm_size = 0; for (i = 0; i < IWL_UCODE_TYPE_MAX; i++) iwl_free_fw_img(drv, drv->fw.img + i); @@ -1400,6 +1403,15 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, drv->trans->dbg.num_pc = tlv_len / sizeof(struct iwl_pc_data); break; + case IWL_UCODE_TLV_PNVM_DATA: + if (drv->fw.pnvm_data) + break; + drv->fw.pnvm_data = + kvmemdup(tlv_data, tlv_len, GFP_KERNEL); + if (!drv->fw.pnvm_data) + return -ENOMEM; + drv->fw.pnvm_size = tlv_len; + break; default: IWL_DEBUG_INFO(drv, "unknown TLV: %d\n", tlv_type); break; @@ -2037,8 +2049,6 @@ static int __init iwl_drv_init(void) for (i = 0; i < ARRAY_SIZE(iwlwifi_opmode_table); i++) INIT_LIST_HEAD(&iwlwifi_opmode_table[i].drv); - pr_info(DRV_DESCRIPTION "\n"); - #ifdef CONFIG_IWLWIFI_DEBUGFS /* Create the root of iwlwifi debugfs subsystem. */ iwl_dbgfs_root = debugfs_create_dir(DRV_NAME, NULL); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c index 80591809164e..47ad447b6226 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c @@ -47,8 +47,8 @@ IWL_EXPORT_SYMBOL(iwl_read32); #define IWL_POLL_INTERVAL 10 /* microseconds */ -int iwl_poll_bit(struct iwl_trans *trans, u32 addr, - u32 bits, u32 mask, int timeout) +int iwl_poll_bits_mask(struct iwl_trans *trans, u32 addr, + u32 bits, u32 mask, int timeout) { int t = 0; @@ -61,7 +61,7 @@ int iwl_poll_bit(struct iwl_trans *trans, u32 addr, return -ETIMEDOUT; } -IWL_EXPORT_SYMBOL(iwl_poll_bit); +IWL_EXPORT_SYMBOL(iwl_poll_bits_mask); u32 iwl_read_direct32(struct iwl_trans *trans, u32 reg) { @@ -477,7 +477,7 @@ int iwl_finish_nic_init(struct iwl_trans *trans) * device-internal resources is supported, e.g. iwl_write_prph() * and accesses to uCode SRAM. */ - err = iwl_poll_bit(trans, CSR_GP_CNTRL, poll_ready, poll_ready, 25000); + err = iwl_poll_bits(trans, CSR_GP_CNTRL, poll_ready, 25000); if (err < 0) { IWL_DEBUG_INFO(trans, "Failed to wake NIC\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.h b/drivers/net/wireless/intel/iwlwifi/iwl-io.h index f4833c5fe86e..731cda1a4e66 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.h @@ -23,8 +23,13 @@ static inline void iwl_clear_bit(struct iwl_trans *trans, u32 reg, u32 mask) iwl_trans_set_bits_mask(trans, reg, mask, 0); } -int iwl_poll_bit(struct iwl_trans *trans, u32 addr, - u32 bits, u32 mask, int timeout); +int iwl_poll_bits_mask(struct iwl_trans *trans, u32 addr, + u32 bits, u32 mask, int timeout); +static inline int iwl_poll_bits(struct iwl_trans *trans, u32 addr, u32 bits, + int timeout) +{ + return iwl_poll_bits_mask(trans, addr, bits, bits, timeout); +} int iwl_poll_direct_bit(struct iwl_trans *trans, u32 addr, u32 mask, int timeout); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 78808c956444..9604781dd0b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -268,7 +268,9 @@ static void iwl_trans_restart_wk(struct work_struct *wk) struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, struct device *dev, - const struct iwl_mac_cfg *mac_cfg) + const struct iwl_mac_cfg *mac_cfg, + unsigned int txcmd_size, + unsigned int txcmd_align) { struct iwl_trans *trans; #ifdef CONFIG_LOCKDEP @@ -290,35 +292,6 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, INIT_DELAYED_WORK(&trans->restart.wk, iwl_trans_restart_wk); - return trans; -} - -int iwl_trans_init(struct iwl_trans *trans) -{ - int txcmd_size, txcmd_align; - - /* check if name/num_rx_queues were set as a proxy for info being set */ - if (WARN_ON(!trans->info.name || !trans->info.num_rxqs)) - return -EINVAL; - - if (!trans->mac_cfg->gen2) { - txcmd_size = sizeof(struct iwl_tx_cmd_v6); - txcmd_align = sizeof(void *); - } else if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) { - txcmd_size = sizeof(struct iwl_tx_cmd_v9); - txcmd_align = 64; - } else { - txcmd_size = sizeof(struct iwl_tx_cmd); - txcmd_align = 128; - } - - txcmd_size += sizeof(struct iwl_cmd_header); - txcmd_size += 36; /* biggest possible 802.11 header */ - - /* Ensure device TX cmd cannot reach/cross a page boundary in gen2 */ - if (WARN_ON(trans->mac_cfg->gen2 && txcmd_size >= txcmd_align)) - return -EINVAL; - snprintf(trans->dev_cmd_pool_name, sizeof(trans->dev_cmd_pool_name), "iwl_cmd_pool:%s", dev_name(trans->dev)); trans->dev_cmd_pool = @@ -326,9 +299,9 @@ int iwl_trans_init(struct iwl_trans *trans) txcmd_size, txcmd_align, SLAB_HWCACHE_ALIGN, NULL); if (!trans->dev_cmd_pool) - return -ENOMEM; + return NULL; - return 0; + return trans; } void iwl_trans_free(struct iwl_trans *trans) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 012b1e44bce3..103a36d8ee30 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -1204,9 +1204,10 @@ static inline void iwl_trans_finish_sw_reset(struct iwl_trans *trans) * transport helper functions *****************************************************/ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, - struct device *dev, - const struct iwl_mac_cfg *cfg_trans); -int iwl_trans_init(struct iwl_trans *trans); + struct device *dev, + const struct iwl_mac_cfg *mac_cfg, + unsigned int txcmd_size, + unsigned int txcmd_align); void iwl_trans_free(struct iwl_trans *trans); static inline bool iwl_trans_is_hw_error_value(u32 val) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index 9d2c087360e7..b372173c4a79 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -294,7 +294,7 @@ static int iwl_mld_run_fw_init_sequence(struct iwl_mld *mld) return ret; ret = iwl_pnvm_load(mld->trans, &mld->notif_wait, - &mld->fw->ucode_capa, alive_data.sku_id); + mld->fw, alive_data.sku_id); if (ret) { IWL_ERR(mld, "Timeout waiting for PNVM load %d\n", ret); return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/ptp.c b/drivers/net/wireless/intel/iwlwifi/mld/ptp.c index 5ee38fc168c1..ffeb37a7f830 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/ptp.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/ptp.c @@ -299,18 +299,18 @@ void iwl_mld_ptp_init(struct iwl_mld *mld) PTR_ERR(mld->ptp_data.ptp_clock)); mld->ptp_data.ptp_clock = NULL; } else { - IWL_INFO(mld, "Registered PHC clock: %s, with index: %d\n", - mld->ptp_data.ptp_clock_info.name, - ptp_clock_index(mld->ptp_data.ptp_clock)); + IWL_DEBUG_INFO(mld, "Registered PHC clock: %s, with index: %d\n", + mld->ptp_data.ptp_clock_info.name, + ptp_clock_index(mld->ptp_data.ptp_clock)); } } void iwl_mld_ptp_remove(struct iwl_mld *mld) { if (mld->ptp_data.ptp_clock) { - IWL_INFO(mld, "Unregistering PHC clock: %s, with index: %d\n", - mld->ptp_data.ptp_clock_info.name, - ptp_clock_index(mld->ptp_data.ptp_clock)); + IWL_DEBUG_INFO(mld, "Unregistering PHC clock: %s, with index: %d\n", + mld->ptp_data.ptp_clock_info.name, + ptp_clock_index(mld->ptp_data.ptp_clock)); ptp_clock_unregister(mld->ptp_data.ptp_clock); mld->ptp_data.ptp_clock = NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index e1070b891300..66749dc38fc5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1474,9 +1474,6 @@ struct iwl_wowlan_status_data { struct iwl_multicast_key_data igtk; struct iwl_multicast_key_data bigtk[WOWLAN_BIGTK_KEYS_NUM]; - int num_mlo_keys; - struct iwl_wowlan_mlo_gtk mlo_keys[WOWLAN_MAX_MLO_KEYS]; - u8 *wake_packet; }; @@ -1790,8 +1787,7 @@ static void iwl_mvm_set_key_rx_seq_idx(struct ieee80211_key_conf *key, } static void iwl_mvm_set_key_rx_seq(struct ieee80211_key_conf *key, - struct iwl_wowlan_status_data *status, - bool installed) + struct iwl_wowlan_status_data *status) { int i; @@ -1815,7 +1811,7 @@ static void iwl_mvm_set_key_rx_seq(struct ieee80211_key_conf *key, /* handle the case where we didn't, last key only */ if (status->gtk_seq[i].key_id == -1 && - (!status->num_of_gtk_rekeys || installed)) + (!status->num_of_gtk_rekeys)) iwl_mvm_set_key_rx_seq_idx(key, status, i); } } @@ -1966,7 +1962,7 @@ static void iwl_mvm_d3_update_keys(struct ieee80211_hw *hw, (status->gtk[1].len && keyidx == status->gtk[1].id))) { ieee80211_remove_key(key); } else { - iwl_mvm_set_key_rx_seq(key, data->status, false); + iwl_mvm_set_key_rx_seq(key, data->status); } break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: @@ -1986,167 +1982,6 @@ static void iwl_mvm_d3_update_keys(struct ieee80211_hw *hw, } } -struct iwl_mvm_d3_mlo_old_keys { - u32 cipher[IEEE80211_MLD_MAX_NUM_LINKS][WOWLAN_MLO_GTK_KEY_NUM_TYPES]; - struct ieee80211_key_conf *key[IEEE80211_MLD_MAX_NUM_LINKS][8]; -}; - -static void iwl_mvm_mlo_key_ciphers(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct ieee80211_key_conf *key, - void *data) -{ - struct iwl_mvm_d3_mlo_old_keys *old_keys = data; - enum iwl_wowlan_mlo_gtk_type key_type; - - if (key->link_id < 0) - return; - - if (WARN_ON(key->link_id >= IEEE80211_MLD_MAX_NUM_LINKS || - key->keyidx >= 8)) - return; - - if (WARN_ON(old_keys->key[key->link_id][key->keyidx])) - return; - - switch (key->cipher) { - case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_GCMP: - case WLAN_CIPHER_SUITE_GCMP_256: - key_type = WOWLAN_MLO_GTK_KEY_TYPE_GTK; - break; - case WLAN_CIPHER_SUITE_BIP_GMAC_128: - case WLAN_CIPHER_SUITE_BIP_GMAC_256: - case WLAN_CIPHER_SUITE_BIP_CMAC_256: - case WLAN_CIPHER_SUITE_AES_CMAC: - if (key->keyidx == 4 || key->keyidx == 5) { - key_type = WOWLAN_MLO_GTK_KEY_TYPE_IGTK; - break; - } else if (key->keyidx == 6 || key->keyidx == 7) { - key_type = WOWLAN_MLO_GTK_KEY_TYPE_BIGTK; - break; - } - return; - default: - /* ignore WEP/TKIP or unknown ciphers */ - return; - } - - old_keys->cipher[key->link_id][key_type] = key->cipher; - old_keys->key[key->link_id][key->keyidx] = key; -} - -static bool iwl_mvm_mlo_gtk_rekey(struct iwl_wowlan_status_data *status, - struct ieee80211_vif *vif, - struct iwl_mvm *mvm) -{ - int i; - struct iwl_mvm_d3_mlo_old_keys *old_keys; - bool ret = true; - - IWL_DEBUG_WOWLAN(mvm, "Num of MLO Keys: %d\n", status->num_mlo_keys); - if (!status->num_mlo_keys) - return true; - - old_keys = kzalloc(sizeof(*old_keys), GFP_KERNEL); - if (!old_keys) - return false; - - /* find the cipher for each mlo key */ - ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_mlo_key_ciphers, old_keys); - - for (i = 0; i < status->num_mlo_keys; i++) { - struct iwl_wowlan_mlo_gtk *mlo_key = &status->mlo_keys[i]; - struct ieee80211_key_conf *key, *old_key; - struct ieee80211_key_seq seq; - DEFINE_RAW_FLEX(struct ieee80211_key_conf, conf, key, - WOWLAN_KEY_MAX_SIZE); - u16 flags = le16_to_cpu(mlo_key->flags); - int j, link_id, key_id, key_type; - - link_id = u16_get_bits(flags, WOWLAN_MLO_GTK_FLAG_LINK_ID_MSK); - key_id = u16_get_bits(flags, WOWLAN_MLO_GTK_FLAG_KEY_ID_MSK); - key_type = u16_get_bits(flags, - WOWLAN_MLO_GTK_FLAG_KEY_TYPE_MSK); - - if (!(vif->valid_links & BIT(link_id))) - continue; - - if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS || - key_id >= 8 || - key_type >= WOWLAN_MLO_GTK_KEY_NUM_TYPES)) - continue; - - conf->cipher = old_keys->cipher[link_id][key_type]; - /* WARN_ON? */ - if (!conf->cipher) - continue; - - conf->keylen = 0; - switch (conf->cipher) { - case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_GCMP: - conf->keylen = WLAN_KEY_LEN_CCMP; - break; - case WLAN_CIPHER_SUITE_GCMP_256: - conf->keylen = WLAN_KEY_LEN_GCMP_256; - break; - case WLAN_CIPHER_SUITE_BIP_GMAC_128: - conf->keylen = WLAN_KEY_LEN_BIP_GMAC_128; - break; - case WLAN_CIPHER_SUITE_BIP_GMAC_256: - conf->keylen = WLAN_KEY_LEN_BIP_GMAC_256; - break; - case WLAN_CIPHER_SUITE_AES_CMAC: - conf->keylen = WLAN_KEY_LEN_AES_CMAC; - break; - case WLAN_CIPHER_SUITE_BIP_CMAC_256: - conf->keylen = WLAN_KEY_LEN_BIP_CMAC_256; - break; - } - - if (WARN_ON(!conf->keylen || - conf->keylen > WOWLAN_KEY_MAX_SIZE)) - continue; - - memcpy(conf->key, mlo_key->key, conf->keylen); - conf->keyidx = key_id; - - old_key = old_keys->key[link_id][key_id]; - if (old_key) { - IWL_DEBUG_WOWLAN(mvm, - "Remove MLO key id %d, link id %d\n", - key_id, link_id); - ieee80211_remove_key(old_key); - } - - IWL_DEBUG_WOWLAN(mvm, "Add MLO key id %d, link id %d\n", - key_id, link_id); - key = ieee80211_gtk_rekey_add(vif, conf, link_id); - if (WARN_ON(IS_ERR(key))) { - ret = false; - goto out; - } - - /* - * mac80211 expects the pn in big-endian - * also note that seq is a union of all cipher types - * (ccmp, gcmp, cmac, gmac), and they all have the same - * pn field (of length 6) so just copy it to ccmp.pn. - */ - for (j = 5; j >= 0; j--) - seq.ccmp.pn[5 - j] = mlo_key->pn[j]; - - /* group keys are non-QoS and use TID 0 */ - ieee80211_set_key_rx_seq(key, 0, &seq); - } - -out: - kfree(old_keys); - return ret; -} - static bool iwl_mvm_gtk_rekey(struct iwl_wowlan_status_data *status, struct ieee80211_vif *vif, struct iwl_mvm *mvm, u32 gtk_cipher) @@ -2346,9 +2181,6 @@ static bool iwl_mvm_setup_connection_keep(struct iwl_mvm *mvm, return false; } - if (!iwl_mvm_mlo_gtk_rekey(status, vif, mvm)) - return false; - ieee80211_gtk_rekey_notify(vif, vif->bss_conf.bssid, (void *)&replay_ctr, GFP_KERNEL); } @@ -2479,10 +2311,11 @@ static void iwl_mvm_parse_wowlan_info_notif(struct iwl_mvm *mvm, struct iwl_wowlan_status_data *status, u32 len) { - u32 expected_len = sizeof(*data) + - data->num_mlo_link_keys * sizeof(status->mlo_keys[0]); + if (IWL_FW_CHECK(mvm, data->num_mlo_link_keys, + "MLO is not supported, shouldn't receive MLO keys\n")) + return; - if (len < expected_len) { + if (len < sizeof(*data)) { IWL_ERR(mvm, "Invalid WoWLAN info notification!\n"); status = NULL; return; @@ -2511,33 +2344,17 @@ static void iwl_mvm_parse_wowlan_info_notif(struct iwl_mvm *mvm, le32_to_cpu(data->num_of_gtk_rekeys); status->received_beacons = le32_to_cpu(data->received_beacons); status->tid_tear_down = data->tid_tear_down; - - if (data->num_mlo_link_keys) { - status->num_mlo_keys = data->num_mlo_link_keys; - if (IWL_FW_CHECK(mvm, - status->num_mlo_keys > WOWLAN_MAX_MLO_KEYS, - "Too many mlo keys: %d, max %d\n", - status->num_mlo_keys, WOWLAN_MAX_MLO_KEYS)) - status->num_mlo_keys = WOWLAN_MAX_MLO_KEYS; - memcpy(status->mlo_keys, data->mlo_gtks, - status->num_mlo_keys * sizeof(status->mlo_keys[0])); - } } static void -iwl_mvm_parse_wowlan_info_notif_v4(struct iwl_mvm *mvm, - struct iwl_wowlan_info_notif_v4 *data, +iwl_mvm_parse_wowlan_info_notif_v3(struct iwl_mvm *mvm, + struct iwl_wowlan_info_notif_v3 *data, struct iwl_wowlan_status_data *status, - u32 len, bool has_mlo_keys) + u32 len) { u32 i; - u32 expected_len = sizeof(*data); - - if (has_mlo_keys) - expected_len += (data->num_mlo_link_keys * - sizeof(status->mlo_keys[0])); - if (len < expected_len) { + if (len < sizeof(*data)) { IWL_ERR(mvm, "Invalid WoWLAN info notification!\n"); status = NULL; return; @@ -2560,17 +2377,6 @@ iwl_mvm_parse_wowlan_info_notif_v4(struct iwl_mvm *mvm, le32_to_cpu(data->num_of_gtk_rekeys); status->received_beacons = le32_to_cpu(data->received_beacons); status->tid_tear_down = data->tid_tear_down; - - if (has_mlo_keys && data->num_mlo_link_keys) { - status->num_mlo_keys = data->num_mlo_link_keys; - if (IWL_FW_CHECK(mvm, - status->num_mlo_keys > WOWLAN_MAX_MLO_KEYS, - "Too many mlo keys: %d, max %d\n", - status->num_mlo_keys, WOWLAN_MAX_MLO_KEYS)) - status->num_mlo_keys = WOWLAN_MAX_MLO_KEYS; - memcpy(status->mlo_keys, data->mlo_gtks, - status->num_mlo_keys * sizeof(status->mlo_keys[0])); - } } static void @@ -3316,19 +3122,23 @@ static bool iwl_mvm_wait_d3_notif(struct iwl_notif_wait_data *notif_wait, iwl_mvm_parse_wowlan_info_notif_v2(mvm, notif_v2, d3_data->status, len); - } else if (wowlan_info_ver < 5) { - struct iwl_wowlan_info_notif_v4 *notif = + } else if (wowlan_info_ver == 3) { + struct iwl_wowlan_info_notif_v3 *notif = (void *)pkt->data; - iwl_mvm_parse_wowlan_info_notif_v4(mvm, notif, - d3_data->status, len, - wowlan_info_ver > 3); - } else { + iwl_mvm_parse_wowlan_info_notif_v3(mvm, notif, + d3_data->status, len); + } else if (wowlan_info_ver == 5) { struct iwl_wowlan_info_notif *notif = (void *)pkt->data; iwl_mvm_parse_wowlan_info_notif(mvm, notif, d3_data->status, len); + } else { + IWL_FW_CHECK(mvm, 1, + "Firmware advertises unknown WoWLAN info notification %d!\n", + wowlan_info_ver); + return false; } d3_data->notif_received |= IWL_D3_NOTIF_WOWLAN_INFO; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 819e3228462a..ab3d78c1e20c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -432,7 +432,7 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, iwl_trans_fw_alive(mvm->trans); ret = iwl_pnvm_load(mvm->trans, &mvm->notif_wait, - &mvm->fw->ucode_capa, alive_data.sku_id); + mvm->fw, alive_data.sku_id); if (ret) { IWL_ERR(mvm, "Timeout waiting for PNVM load!\n"); iwl_fw_set_current_image(&mvm->fwrt, old_type); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c b/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c index e89259de6f4c..06a4c9f74797 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2021 - 2023 Intel Corporation + * Copyright (C) 2021 - 2023, 2025 Intel Corporation */ #include "mvm.h" @@ -298,9 +298,9 @@ void iwl_mvm_ptp_init(struct iwl_mvm *mvm) PTR_ERR(mvm->ptp_data.ptp_clock)); mvm->ptp_data.ptp_clock = NULL; } else if (mvm->ptp_data.ptp_clock) { - IWL_INFO(mvm, "Registered PHC clock: %s, with index: %d\n", - mvm->ptp_data.ptp_clock_info.name, - ptp_clock_index(mvm->ptp_data.ptp_clock)); + IWL_DEBUG_INFO(mvm, "Registered PHC clock: %s, with index: %d\n", + mvm->ptp_data.ptp_clock_info.name, + ptp_clock_index(mvm->ptp_data.ptp_clock)); } } @@ -312,9 +312,9 @@ void iwl_mvm_ptp_init(struct iwl_mvm *mvm) void iwl_mvm_ptp_remove(struct iwl_mvm *mvm) { if (mvm->ptp_data.ptp_clock) { - IWL_INFO(mvm, "Unregistering PHC clock: %s, with index: %d\n", - mvm->ptp_data.ptp_clock_info.name, - ptp_clock_index(mvm->ptp_data.ptp_clock)); + IWL_DEBUG_INFO(mvm, "Unregistering PHC clock: %s, with index: %d\n", + mvm->ptp_data.ptp_clock_info.name, + ptp_clock_index(mvm->ptp_data.ptp_clock)); ptp_clock_unregister(mvm->ptp_data.ptp_clock); mvm->ptp_data.ptp_clock = NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 52a48e82f3bf..0bd9b44d295b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -577,8 +577,10 @@ EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_hw_card_ids); .subdevice_m_h = _HIGHEST_BIT(m) #define RF_TYPE(n) .match_rf_type = 1, \ .rf_type = IWL_CFG_RF_TYPE_##n -#define RF_STEP(n) .match_rf_step = 1, \ - .rf_step = SILICON_##n##_STEP +#define DISCRETE .match_discrete = 1, \ + .discrete = 1 +#define INTEGRATED .match_discrete = 1, \ + .discrete = 0 #define RF_ID(n) .match_rf_id = 1, \ .rf_id = IWL_CFG_RF_ID_##n #define NO_CDB .match_cdb = 1, .cdb = 0 @@ -1032,9 +1034,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { /* FM RF */ IWL_DEV_INFO(iwl_rf_fm, iwl_be201_name, RF_TYPE(FM)), IWL_DEV_INFO(iwl_rf_fm, iwl_be401_name, RF_TYPE(FM), CDB), - /* the discrete NICs got the RF B0, it's only for the name anyway */ IWL_DEV_INFO(iwl_rf_fm, iwl_be200_name, RF_TYPE(FM), - DEVICE(0x272B), RF_STEP(B)), + DEVICE(0x272B), DISCRETE), IWL_DEV_INFO(iwl_rf_fm_160mhz, iwl_be202_name, RF_TYPE(FM), BW_LIMITED), @@ -1074,149 +1075,12 @@ const unsigned int iwl_dev_info_table_size = ARRAY_SIZE(iwl_dev_info_table); EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_dev_info_table_size); #endif -/* - * Read rf id and cdb info from prph register and store it - */ -static void get_crf_id(struct iwl_trans *iwl_trans, - struct iwl_trans_info *info) -{ - u32 sd_reg_ver_addr; - u32 hw_wfpm_id; - u32 val = 0; - u8 step; - - if (iwl_trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) - sd_reg_ver_addr = SD_REG_VER_GEN2; - else - sd_reg_ver_addr = SD_REG_VER; - - /* Enable access to peripheral registers */ - val = iwl_read_umac_prph_no_grab(iwl_trans, WFPM_CTRL_REG); - val |= WFPM_AUX_CTL_AUX_IF_MAC_OWNER_MSK; - iwl_write_umac_prph_no_grab(iwl_trans, WFPM_CTRL_REG, val); - - /* Read crf info */ - info->hw_crf_id = iwl_read_prph_no_grab(iwl_trans, sd_reg_ver_addr); - - /* Read cnv info */ - info->hw_cnv_id = iwl_read_prph_no_grab(iwl_trans, CNVI_AUX_MISC_CHIP); - - /* For BZ-W, take B step also when A step is indicated */ - if (CSR_HW_REV_TYPE(info->hw_rev) == IWL_CFG_MAC_TYPE_BZ_W) - step = SILICON_B_STEP; - - /* In BZ, the MAC step must be read from the CNVI aux register */ - if (CSR_HW_REV_TYPE(info->hw_rev) == IWL_CFG_MAC_TYPE_BZ) { - step = CNVI_AUX_MISC_CHIP_MAC_STEP(info->hw_cnv_id); - - /* For BZ-U, take B step also when A step is indicated */ - if ((CNVI_AUX_MISC_CHIP_PROD_TYPE(info->hw_cnv_id) == - CNVI_AUX_MISC_CHIP_PROD_TYPE_BZ_U) && - step == SILICON_A_STEP) - step = SILICON_B_STEP; - } - - if (CSR_HW_REV_TYPE(info->hw_rev) == IWL_CFG_MAC_TYPE_BZ || - CSR_HW_REV_TYPE(info->hw_rev) == IWL_CFG_MAC_TYPE_BZ_W) { - info->hw_rev_step = step; - info->hw_rev |= step; - } - - /* Read cdb info (also contains the jacket info if needed in the future */ - hw_wfpm_id = iwl_read_umac_prph_no_grab(iwl_trans, WFPM_OTP_CFG1_ADDR); - IWL_INFO(iwl_trans, "Detected crf-id 0x%x, cnv-id 0x%x wfpm id 0x%x\n", - info->hw_crf_id, info->hw_cnv_id, hw_wfpm_id); -} - -/* - * In case that there is no OTP on the NIC, map the rf id and cdb info - * from the prph registers. - */ -static int map_crf_id(struct iwl_trans *iwl_trans, - struct iwl_trans_info *info) -{ - int ret = 0; - u32 val = info->hw_crf_id; - u32 step_id = REG_CRF_ID_STEP(val); - u32 slave_id = REG_CRF_ID_SLAVE(val); - u32 jacket_id_cnv = REG_CRF_ID_SLAVE(info->hw_cnv_id); - u32 hw_wfpm_id = iwl_read_umac_prph_no_grab(iwl_trans, - WFPM_OTP_CFG1_ADDR); - u32 jacket_id_wfpm = WFPM_OTP_CFG1_IS_JACKET(hw_wfpm_id); - u32 cdb_id_wfpm = WFPM_OTP_CFG1_IS_CDB(hw_wfpm_id); - - /* Map between crf id to rf id */ - switch (REG_CRF_ID_TYPE(val)) { - case REG_CRF_ID_TYPE_JF_1: - info->hw_rf_id = (IWL_CFG_RF_TYPE_JF1 << 12); - break; - case REG_CRF_ID_TYPE_JF_2: - info->hw_rf_id = (IWL_CFG_RF_TYPE_JF2 << 12); - break; - case REG_CRF_ID_TYPE_HR_NONE_CDB_1X1: - info->hw_rf_id = (IWL_CFG_RF_TYPE_HR1 << 12); - break; - case REG_CRF_ID_TYPE_HR_NONE_CDB: - info->hw_rf_id = (IWL_CFG_RF_TYPE_HR2 << 12); - break; - case REG_CRF_ID_TYPE_HR_CDB: - info->hw_rf_id = (IWL_CFG_RF_TYPE_HR2 << 12); - break; - case REG_CRF_ID_TYPE_GF: - info->hw_rf_id = (IWL_CFG_RF_TYPE_GF << 12); - break; - case REG_CRF_ID_TYPE_FM: - info->hw_rf_id = (IWL_CFG_RF_TYPE_FM << 12); - break; - case REG_CRF_ID_TYPE_WHP: - info->hw_rf_id = (IWL_CFG_RF_TYPE_WH << 12); - break; - case REG_CRF_ID_TYPE_PE: - info->hw_rf_id = (IWL_CFG_RF_TYPE_PE << 12); - break; - default: - ret = -EIO; - IWL_ERR(iwl_trans, - "Can't find a correct rfid for crf id 0x%x\n", - REG_CRF_ID_TYPE(val)); - goto out; - - } - - /* Set Step-id */ - info->hw_rf_id |= (step_id << 8); - - /* Set CDB capabilities */ - if (cdb_id_wfpm || slave_id) { - info->hw_rf_id += BIT(28); - IWL_INFO(iwl_trans, "Adding cdb to rf id\n"); - } - - /* Set Jacket capabilities */ - if (jacket_id_wfpm || jacket_id_cnv) { - info->hw_rf_id += BIT(29); - IWL_INFO(iwl_trans, "Adding jacket to rf id\n"); - } - - IWL_INFO(iwl_trans, - "Detected rf-type 0x%x step-id 0x%x slave-id 0x%x from crf id 0x%x\n", - REG_CRF_ID_TYPE(val), step_id, slave_id, info->hw_rf_id); - IWL_INFO(iwl_trans, - "Detected cdb-id 0x%x jacket-id 0x%x from wfpm id 0x%x\n", - cdb_id_wfpm, jacket_id_wfpm, hw_wfpm_id); - IWL_INFO(iwl_trans, "Detected jacket-id 0x%x from cnvi id 0x%x\n", - jacket_id_cnv, info->hw_cnv_id); - -out: - return ret; -} - /* PCI registers */ #define PCI_CFG_RETRY_TIMEOUT 0x041 -VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info * +const struct iwl_dev_info * iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 rf_type, u8 cdb, - u8 rf_id, u8 bw_limit, u8 rf_step) + u8 rf_id, u8 bw_limit, bool discrete) { int num_devices = ARRAY_SIZE(iwl_dev_info_table); int i; @@ -1251,7 +1115,7 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 rf_type, u8 cdb, if (dev_info->match_bw_limit && dev_info->bw_limit != bw_limit) continue; - if (dev_info->match_rf_step && dev_info->rf_step != rf_step) + if (dev_info->match_discrete && dev_info->discrete != discrete) continue; return dev_info; @@ -1261,193 +1125,47 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 rf_type, u8 cdb, } EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_pci_find_dev_info); -static void iwl_pcie_recheck_me_status(struct work_struct *wk) -{ - struct iwl_trans_pcie *trans_pcie = container_of(wk, - typeof(*trans_pcie), - me_recheck_wk.work); - u32 val; - - val = iwl_read32(trans_pcie->trans, CSR_HW_IF_CONFIG_REG); - trans_pcie->me_present = !!(val & CSR_HW_IF_CONFIG_REG_IAMT_UP); -} - -static void iwl_pcie_check_me_status(struct iwl_trans *trans) -{ - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - u32 val; - - trans_pcie->me_present = -1; - - INIT_DELAYED_WORK(&trans_pcie->me_recheck_wk, - iwl_pcie_recheck_me_status); - - /* we don't have a good way of determining this until BZ */ - if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_BZ) - return; - - val = iwl_read_prph(trans, CNVI_SCU_REG_FOR_ECO_1); - if (val & CNVI_SCU_REG_FOR_ECO_1_WIAMT_KNOWN) { - trans_pcie->me_present = - !!(val & CNVI_SCU_REG_FOR_ECO_1_WIAMT_PRESENT); - return; - } - - val = iwl_read32(trans, CSR_HW_IF_CONFIG_REG); - if (val & (CSR_HW_IF_CONFIG_REG_ME_OWN | - CSR_HW_IF_CONFIG_REG_IAMT_UP)) { - trans_pcie->me_present = 1; - return; - } - - /* recheck again later, ME might still be initializing */ - schedule_delayed_work(&trans_pcie->me_recheck_wk, HZ); -} - static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - const struct iwl_mac_cfg *trans; - const struct iwl_dev_info *dev_info; - struct iwl_trans_info info = { - .hw_id = (pdev->device << 16) + pdev->subsystem_device, - }; - struct iwl_trans *iwl_trans; - struct iwl_trans_pcie *trans_pcie; + const struct iwl_mac_cfg *mac_cfg = (void *)ent->driver_data; + u8 __iomem *hw_base; + u32 bar0, hw_rev; int ret; - trans = (void *)ent->driver_data; - - iwl_trans = iwl_trans_pcie_alloc(pdev, trans, &info); - if (IS_ERR(iwl_trans)) - return PTR_ERR(iwl_trans); - - trans_pcie = IWL_TRANS_GET_PCIE_TRANS(iwl_trans); - - iwl_trans_pcie_check_product_reset_status(pdev); - iwl_trans_pcie_check_product_reset_mode(pdev); - - /* set the things we know so far for the grab NIC access */ - iwl_trans_set_info(iwl_trans, &info); - - /* - * Let's try to grab NIC access early here. Sometimes, NICs may - * fail to initialize, and if that happens it's better if we see - * issues early on (and can reprobe, per the logic inside), than - * first trying to load the firmware etc. and potentially only - * detecting any problems when the first interface is brought up. - */ - ret = iwl_pcie_prepare_card_hw(iwl_trans); - if (!ret) { - ret = iwl_finish_nic_init(iwl_trans); + /* reassign our BAR 0 if invalid due to possible runtime PM races */ + pci_read_config_dword(pdev, PCI_BASE_ADDRESS_0, &bar0); + if (bar0 == PCI_BASE_ADDRESS_MEM_TYPE_64) { + ret = pci_assign_resource(pdev, 0); if (ret) - goto out_free_trans; - if (iwl_trans_grab_nic_access(iwl_trans)) { - get_crf_id(iwl_trans, &info); - /* all good */ - iwl_trans_release_nic_access(iwl_trans); - } else { - ret = -EIO; - goto out_free_trans; - } - } - - info.hw_rf_id = iwl_read32(iwl_trans, CSR_HW_RF_ID); - - /* - * The RF_ID is set to zero in blank OTP so read version to - * extract the RF_ID. - * This is relevant only for family 9000 and up. - */ - if (iwl_trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_9000 && - !CSR_HW_RFID_TYPE(info.hw_rf_id) && map_crf_id(iwl_trans, &info)) { - ret = -EINVAL; - goto out_free_trans; + return ret; } - IWL_INFO(iwl_trans, "PCI dev %04x/%04x, rev=0x%x, rfid=0x%x\n", - pdev->device, pdev->subsystem_device, - info.hw_rev, info.hw_rf_id); - - dev_info = iwl_pci_find_dev_info(pdev->device, pdev->subsystem_device, - CSR_HW_RFID_TYPE(info.hw_rf_id), - CSR_HW_RFID_IS_CDB(info.hw_rf_id), - IWL_SUBDEVICE_RF_ID(pdev->subsystem_device), - IWL_SUBDEVICE_BW_LIM(pdev->subsystem_device), - CSR_HW_RFID_STEP(info.hw_rf_id)); - if (dev_info) { - iwl_trans->cfg = dev_info->cfg; - info.name = dev_info->name; - } - -#if IS_ENABLED(CONFIG_IWLMVM) - /* - * special-case 7265D, it has the same PCI IDs. - * - * Note that because we already pass the cfg to the transport above, - * all the parameters that the transport uses must, until that is - * changed, be identical to the ones in the 7265D configuration. - */ - if (iwl_trans->cfg == &iwl7265_cfg && - (info.hw_rev & CSR_HW_REV_TYPE_MSK) == CSR_HW_REV_TYPE_7265D) - iwl_trans->cfg = &iwl7265d_cfg; -#endif - if (!iwl_trans->cfg) { - pr_err("No config found for PCI dev %04x/%04x, rev=0x%x, rfid=0x%x\n", - pdev->device, pdev->subsystem_device, - info.hw_rev, info.hw_rf_id); - ret = -EINVAL; - goto out_free_trans; - } + ret = pcim_enable_device(pdev); + if (ret) + return ret; - IWL_INFO(iwl_trans, "Detected %s\n", info.name); + pci_set_master(pdev); - if (iwl_trans->mac_cfg->mq_rx_supported) { - if (WARN_ON(!iwl_trans->cfg->num_rbds)) { - ret = -EINVAL; - goto out_free_trans; - } - trans_pcie->num_rx_bufs = iwl_trans_get_num_rbds(iwl_trans); - } else { - trans_pcie->num_rx_bufs = RX_QUEUE_SIZE; + ret = pcim_request_all_regions(pdev, DRV_NAME); + if (ret) { + dev_err(&pdev->dev, "Requesting all PCI BARs failed.\n"); + return ret; } - if (!iwl_trans->mac_cfg->integrated) { - u16 link_status; - - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &link_status); - - info.pcie_link_speed = - u16_get_bits(link_status, PCI_EXP_LNKSTA_CLS); + hw_base = pcim_iomap(pdev, 0, 0); + if (!hw_base) { + dev_err(&pdev->dev, "Failed to map BAR 0.\n"); + return -ENOMEM; } - iwl_trans_set_info(iwl_trans, &info); - - ret = iwl_trans_init(iwl_trans); - if (ret) - goto out_free_trans; - - pci_set_drvdata(pdev, iwl_trans); - - iwl_pcie_check_me_status(iwl_trans); - - /* try to get ownership so that we'll know if we don't own it */ - iwl_pcie_prepare_card_hw(iwl_trans); - - iwl_trans->drv = iwl_drv_start(iwl_trans); - - if (IS_ERR(iwl_trans->drv)) { - ret = PTR_ERR(iwl_trans->drv); - goto out_free_trans; + /* We can't use iwl_read32 because trans wasn't allocated */ + hw_rev = readl(hw_base + CSR_HW_REV); + if (hw_rev == 0xffffffff) { + dev_err(&pdev->dev, "HW_REV=0xFFFFFFFF, PCI issues?\n"); + return -EIO; } - /* register transport layer debugfs here */ - iwl_trans_pcie_dbgfs_register(iwl_trans); - - return 0; - -out_free_trans: - iwl_trans_pcie_free(iwl_trans); - return ret; + return iwl_pci_gen1_2_probe(pdev, ent, mac_cfg, hw_base, hw_rev); } static void iwl_pci_remove(struct pci_dev *pdev) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/internal.h index 23c0771a4231..7dd11891ccfe 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/internal.h @@ -538,10 +538,6 @@ iwl_trans_pcie_get_trans(struct iwl_trans_pcie *trans_pcie) * Convention: trans API functions: iwl_trans_pcie_XXX * Other functions: iwl_pcie_XXX */ -struct iwl_trans -*iwl_trans_pcie_alloc(struct pci_dev *pdev, - const struct iwl_mac_cfg *mac_cfg, - struct iwl_trans_info *info); void iwl_trans_pcie_free(struct iwl_trans *trans); void iwl_trans_pcie_free_pnvm_dram_regions(struct iwl_dram_regions *dram_regions, struct device *dev); @@ -1081,6 +1077,10 @@ bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans); void __releases(nic_access_nobh) iwl_trans_pcie_release_nic_access(struct iwl_trans *trans); void iwl_pcie_alloc_fw_monitor(struct iwl_trans *trans, u8 max_power); +int iwl_pci_gen1_2_probe(struct pci_dev *pdev, + const struct pci_device_id *ent, + const struct iwl_mac_cfg *mac_cfg, + u8 __iomem *hw_base, u32 hw_rev); /* transport gen 1 exported functions */ void iwl_trans_pcie_fw_alive(struct iwl_trans *trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c index 97e90cbeb6cd..585d845b53fa 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c @@ -25,6 +25,7 @@ #include "fw/dbg.h" #include "fw/api/tx.h" #include "fw/acpi.h" +#include "fw/api/tx.h" #include "mei/iwl-mei.h" #include "internal.h" #include "iwl-fh.h" @@ -380,17 +381,15 @@ void iwl_pcie_apm_stop_master(struct iwl_trans *trans) iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_REQ); - ret = iwl_poll_bit(trans, CSR_GP_CNTRL, - CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS, - CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS, - 100); + ret = iwl_poll_bits(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS, + 100); usleep_range(10000, 20000); } else { iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_STOP_MASTER); - ret = iwl_poll_bit(trans, CSR_RESET, - CSR_RESET_REG_FLAG_MASTER_DISABLED, - CSR_RESET_REG_FLAG_MASTER_DISABLED, 100); + ret = iwl_poll_bits(trans, CSR_RESET, + CSR_RESET_REG_FLAG_MASTER_DISABLED, 100); } if (ret < 0) @@ -492,10 +491,9 @@ static int iwl_pcie_set_hw_ready(struct iwl_trans *trans) CSR_HW_IF_CONFIG_REG_PCI_OWN_SET); /* See if we got it */ - ret = iwl_poll_bit(trans, CSR_HW_IF_CONFIG_REG, - CSR_HW_IF_CONFIG_REG_PCI_OWN_SET, - CSR_HW_IF_CONFIG_REG_PCI_OWN_SET, - HW_READY_TIMEOUT); + ret = iwl_poll_bits(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PCI_OWN_SET, + HW_READY_TIMEOUT); if (ret >= 0) iwl_set_bit(trans, CSR_MBOX_SET_REG, CSR_MBOX_SET_REG_OS_ALIVE); @@ -2354,7 +2352,7 @@ bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent) * 5000 series and later (including 1000 series) have non-volatile SRAM, * and do not save/restore SRAM when power cycling. */ - ret = iwl_poll_bit(trans, CSR_GP_CNTRL, poll, mask, 15000); + ret = iwl_poll_bits_mask(trans, CSR_GP_CNTRL, poll, mask, 15000); if (unlikely(ret < 0)) { u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL); @@ -3686,36 +3684,56 @@ void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans) iwl_trans_sync_nmi_with_addr(trans, inta_addr, sw_err_bit); } -struct iwl_trans * +static int iwl_trans_pcie_set_txcmd_info(const struct iwl_mac_cfg *mac_cfg, + unsigned int *txcmd_size, + unsigned int *txcmd_align) +{ + if (!mac_cfg->gen2) { + *txcmd_size = sizeof(struct iwl_tx_cmd_v6); + *txcmd_align = sizeof(void *); + } else if (mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) { + *txcmd_size = sizeof(struct iwl_tx_cmd_v9); + *txcmd_align = 64; + } else { + *txcmd_size = sizeof(struct iwl_tx_cmd); + *txcmd_align = 128; + } + + *txcmd_size += sizeof(struct iwl_cmd_header); + *txcmd_size += 36; /* biggest possible 802.11 header */ + + /* Ensure device TX cmd cannot reach/cross a page boundary in gen2 */ + if (WARN_ON((mac_cfg->gen2 && *txcmd_size >= *txcmd_align))) + return -EINVAL; + + return 0; +} + +static struct iwl_trans * iwl_trans_pcie_alloc(struct pci_dev *pdev, const struct iwl_mac_cfg *mac_cfg, - struct iwl_trans_info *info) + struct iwl_trans_info *info, u8 __iomem *hw_base) { struct iwl_trans_pcie *trans_pcie, **priv; + unsigned int txcmd_size, txcmd_align; struct iwl_trans *trans; unsigned int bc_tbl_n_entries; int ret, addr_size; - u32 bar0; - /* reassign our BAR 0 if invalid due to possible runtime PM races */ - pci_read_config_dword(pdev, PCI_BASE_ADDRESS_0, &bar0); - if (bar0 == PCI_BASE_ADDRESS_MEM_TYPE_64) { - ret = pci_assign_resource(pdev, 0); - if (ret) - return ERR_PTR(ret); - } - - ret = pcim_enable_device(pdev); + ret = iwl_trans_pcie_set_txcmd_info(mac_cfg, &txcmd_size, + &txcmd_align); if (ret) return ERR_PTR(ret); trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), &pdev->dev, - mac_cfg); + mac_cfg, txcmd_size, txcmd_align); if (!trans) return ERR_PTR(-ENOMEM); trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + trans_pcie->hw_base = hw_base; + /* Initialize the wait queue for commands */ init_waitqueue_head(&trans_pcie->wait_command_queue); @@ -3813,8 +3831,6 @@ iwl_trans_pcie_alloc(struct pci_dev *pdev, PCIE_LINK_STATE_CLKPM); } - pci_set_master(pdev); - addr_size = trans_pcie->txqs.tfd.addr_size; ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_size)); if (ret) { @@ -3826,19 +3842,6 @@ iwl_trans_pcie_alloc(struct pci_dev *pdev, } } - ret = pcim_request_all_regions(pdev, DRV_NAME); - if (ret) { - dev_err(&pdev->dev, "Requesting all PCI BARs failed.\n"); - goto out_no_pci; - } - - trans_pcie->hw_base = pcim_iomap(pdev, 0, 0); - if (!trans_pcie->hw_base) { - dev_err(&pdev->dev, "Could not ioremap PCI BAR 0.\n"); - ret = -ENODEV; - goto out_no_pci; - } - /* We disable the RETRY_TIMEOUT register (0x41) to keep * PCI Tx retries from interfering with C3 CPU state */ pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); @@ -3846,13 +3849,6 @@ iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->pci_dev = pdev; iwl_disable_interrupts(trans); - info->hw_rev = iwl_read32(trans, CSR_HW_REV); - if (info->hw_rev == 0xffffffff) { - dev_err(&pdev->dev, "HW_REV=0xFFFFFFFF, PCI issues?\n"); - ret = -EIO; - goto out_no_pci; - } - /* * In the 8000 HW family the format of the 4 bytes of CSR_HW_REV have * changed, and now the revision step also includes bit 0-1 (no more @@ -3952,3 +3948,326 @@ int iwl_trans_pcie_copy_imr(struct iwl_trans *trans, trans_pcie->imr_status = IMR_D2S_IDLE; return 0; } + +/* + * Read rf id and cdb info from prph register and store it + */ +static void get_crf_id(struct iwl_trans *iwl_trans, + struct iwl_trans_info *info) +{ + u32 sd_reg_ver_addr; + u32 hw_wfpm_id; + u32 val = 0; + u8 step; + + if (iwl_trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) + sd_reg_ver_addr = SD_REG_VER_GEN2; + else + sd_reg_ver_addr = SD_REG_VER; + + /* Enable access to peripheral registers */ + val = iwl_read_umac_prph_no_grab(iwl_trans, WFPM_CTRL_REG); + val |= WFPM_AUX_CTL_AUX_IF_MAC_OWNER_MSK; + iwl_write_umac_prph_no_grab(iwl_trans, WFPM_CTRL_REG, val); + + /* Read crf info */ + info->hw_crf_id = iwl_read_prph_no_grab(iwl_trans, sd_reg_ver_addr); + + /* Read cnv info */ + info->hw_cnv_id = iwl_read_prph_no_grab(iwl_trans, CNVI_AUX_MISC_CHIP); + + /* For BZ-W, take B step also when A step is indicated */ + if (CSR_HW_REV_TYPE(info->hw_rev) == IWL_CFG_MAC_TYPE_BZ_W) + step = SILICON_B_STEP; + + /* In BZ, the MAC step must be read from the CNVI aux register */ + if (CSR_HW_REV_TYPE(info->hw_rev) == IWL_CFG_MAC_TYPE_BZ) { + step = CNVI_AUX_MISC_CHIP_MAC_STEP(info->hw_cnv_id); + + /* For BZ-U, take B step also when A step is indicated */ + if ((CNVI_AUX_MISC_CHIP_PROD_TYPE(info->hw_cnv_id) == + CNVI_AUX_MISC_CHIP_PROD_TYPE_BZ_U) && + step == SILICON_A_STEP) + step = SILICON_B_STEP; + } + + if (CSR_HW_REV_TYPE(info->hw_rev) == IWL_CFG_MAC_TYPE_BZ || + CSR_HW_REV_TYPE(info->hw_rev) == IWL_CFG_MAC_TYPE_BZ_W) { + info->hw_rev_step = step; + info->hw_rev |= step; + } + + /* Read cdb info (also contains the jacket info if needed in the future */ + hw_wfpm_id = iwl_read_umac_prph_no_grab(iwl_trans, WFPM_OTP_CFG1_ADDR); + IWL_INFO(iwl_trans, "Detected crf-id 0x%x, cnv-id 0x%x wfpm id 0x%x\n", + info->hw_crf_id, info->hw_cnv_id, hw_wfpm_id); +} + +/* + * In case that there is no OTP on the NIC, map the rf id and cdb info + * from the prph registers. + */ +static int map_crf_id(struct iwl_trans *iwl_trans, + struct iwl_trans_info *info) +{ + int ret = 0; + u32 val = info->hw_crf_id; + u32 step_id = REG_CRF_ID_STEP(val); + u32 slave_id = REG_CRF_ID_SLAVE(val); + u32 jacket_id_cnv = REG_CRF_ID_SLAVE(info->hw_cnv_id); + u32 hw_wfpm_id = iwl_read_umac_prph_no_grab(iwl_trans, + WFPM_OTP_CFG1_ADDR); + u32 jacket_id_wfpm = WFPM_OTP_CFG1_IS_JACKET(hw_wfpm_id); + u32 cdb_id_wfpm = WFPM_OTP_CFG1_IS_CDB(hw_wfpm_id); + + /* Map between crf id to rf id */ + switch (REG_CRF_ID_TYPE(val)) { + case REG_CRF_ID_TYPE_JF_1: + info->hw_rf_id = (IWL_CFG_RF_TYPE_JF1 << 12); + break; + case REG_CRF_ID_TYPE_JF_2: + info->hw_rf_id = (IWL_CFG_RF_TYPE_JF2 << 12); + break; + case REG_CRF_ID_TYPE_HR_NONE_CDB_1X1: + info->hw_rf_id = (IWL_CFG_RF_TYPE_HR1 << 12); + break; + case REG_CRF_ID_TYPE_HR_NONE_CDB: + info->hw_rf_id = (IWL_CFG_RF_TYPE_HR2 << 12); + break; + case REG_CRF_ID_TYPE_HR_CDB: + info->hw_rf_id = (IWL_CFG_RF_TYPE_HR2 << 12); + break; + case REG_CRF_ID_TYPE_GF: + info->hw_rf_id = (IWL_CFG_RF_TYPE_GF << 12); + break; + case REG_CRF_ID_TYPE_FM: + info->hw_rf_id = (IWL_CFG_RF_TYPE_FM << 12); + break; + case REG_CRF_ID_TYPE_WHP: + info->hw_rf_id = (IWL_CFG_RF_TYPE_WH << 12); + break; + case REG_CRF_ID_TYPE_PE: + info->hw_rf_id = (IWL_CFG_RF_TYPE_PE << 12); + break; + default: + ret = -EIO; + IWL_ERR(iwl_trans, + "Can't find a correct rfid for crf id 0x%x\n", + REG_CRF_ID_TYPE(val)); + goto out; + } + + /* Set Step-id */ + info->hw_rf_id |= (step_id << 8); + + /* Set CDB capabilities */ + if (cdb_id_wfpm || slave_id) { + info->hw_rf_id += BIT(28); + IWL_INFO(iwl_trans, "Adding cdb to rf id\n"); + } + + /* Set Jacket capabilities */ + if (jacket_id_wfpm || jacket_id_cnv) { + info->hw_rf_id += BIT(29); + IWL_INFO(iwl_trans, "Adding jacket to rf id\n"); + } + + IWL_INFO(iwl_trans, + "Detected rf-type 0x%x step-id 0x%x slave-id 0x%x from crf id 0x%x\n", + REG_CRF_ID_TYPE(val), step_id, slave_id, info->hw_rf_id); + IWL_INFO(iwl_trans, + "Detected cdb-id 0x%x jacket-id 0x%x from wfpm id 0x%x\n", + cdb_id_wfpm, jacket_id_wfpm, hw_wfpm_id); + IWL_INFO(iwl_trans, "Detected jacket-id 0x%x from cnvi id 0x%x\n", + jacket_id_cnv, info->hw_cnv_id); + +out: + return ret; +} + +static void iwl_pcie_recheck_me_status(struct work_struct *wk) +{ + struct iwl_trans_pcie *trans_pcie = container_of(wk, + typeof(*trans_pcie), + me_recheck_wk.work); + u32 val; + + val = iwl_read32(trans_pcie->trans, CSR_HW_IF_CONFIG_REG); + trans_pcie->me_present = !!(val & CSR_HW_IF_CONFIG_REG_IAMT_UP); +} + +static void iwl_pcie_check_me_status(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 val; + + trans_pcie->me_present = -1; + + INIT_DELAYED_WORK(&trans_pcie->me_recheck_wk, + iwl_pcie_recheck_me_status); + + /* we don't have a good way of determining this until BZ */ + if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_BZ) + return; + + val = iwl_read_prph(trans, CNVI_SCU_REG_FOR_ECO_1); + if (val & CNVI_SCU_REG_FOR_ECO_1_WIAMT_KNOWN) { + trans_pcie->me_present = + !!(val & CNVI_SCU_REG_FOR_ECO_1_WIAMT_PRESENT); + return; + } + + val = iwl_read32(trans, CSR_HW_IF_CONFIG_REG); + if (val & (CSR_HW_IF_CONFIG_REG_ME_OWN | + CSR_HW_IF_CONFIG_REG_IAMT_UP)) { + trans_pcie->me_present = 1; + return; + } + + /* recheck again later, ME might still be initializing */ + schedule_delayed_work(&trans_pcie->me_recheck_wk, HZ); +} + +int iwl_pci_gen1_2_probe(struct pci_dev *pdev, + const struct pci_device_id *ent, + const struct iwl_mac_cfg *mac_cfg, + u8 __iomem *hw_base, u32 hw_rev) +{ + const struct iwl_dev_info *dev_info; + struct iwl_trans_info info = { + .hw_id = (pdev->device << 16) + pdev->subsystem_device, + .hw_rev = hw_rev, + }; + struct iwl_trans *iwl_trans; + struct iwl_trans_pcie *trans_pcie; + int ret; + + iwl_trans = iwl_trans_pcie_alloc(pdev, mac_cfg, &info, hw_base); + if (IS_ERR(iwl_trans)) + return PTR_ERR(iwl_trans); + + trans_pcie = IWL_TRANS_GET_PCIE_TRANS(iwl_trans); + + iwl_trans_pcie_check_product_reset_status(pdev); + iwl_trans_pcie_check_product_reset_mode(pdev); + + /* set the things we know so far for the grab NIC access */ + iwl_trans_set_info(iwl_trans, &info); + + /* + * Let's try to grab NIC access early here. Sometimes, NICs may + * fail to initialize, and if that happens it's better if we see + * issues early on (and can reprobe, per the logic inside), than + * first trying to load the firmware etc. and potentially only + * detecting any problems when the first interface is brought up. + */ + ret = iwl_pcie_prepare_card_hw(iwl_trans); + if (!ret) { + ret = iwl_finish_nic_init(iwl_trans); + if (ret) + goto out_free_trans; + if (iwl_trans_grab_nic_access(iwl_trans)) { + get_crf_id(iwl_trans, &info); + /* all good */ + iwl_trans_release_nic_access(iwl_trans); + } else { + ret = -EIO; + goto out_free_trans; + } + } + + info.hw_rf_id = iwl_read32(iwl_trans, CSR_HW_RF_ID); + + /* + * The RF_ID is set to zero in blank OTP so read version to + * extract the RF_ID. + * This is relevant only for family 9000 and up. + */ + if (iwl_trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_9000 && + !CSR_HW_RFID_TYPE(info.hw_rf_id) && map_crf_id(iwl_trans, &info)) { + ret = -EINVAL; + goto out_free_trans; + } + + IWL_INFO(iwl_trans, "PCI dev %04x/%04x, rev=0x%x, rfid=0x%x\n", + pdev->device, pdev->subsystem_device, + info.hw_rev, info.hw_rf_id); + + dev_info = iwl_pci_find_dev_info(pdev->device, pdev->subsystem_device, + CSR_HW_RFID_TYPE(info.hw_rf_id), + CSR_HW_RFID_IS_CDB(info.hw_rf_id), + IWL_SUBDEVICE_RF_ID(pdev->subsystem_device), + IWL_SUBDEVICE_BW_LIM(pdev->subsystem_device), + !iwl_trans->mac_cfg->integrated); + if (dev_info) { + iwl_trans->cfg = dev_info->cfg; + info.name = dev_info->name; + } + +#if IS_ENABLED(CONFIG_IWLMVM) + + /* + * special-case 7265D, it has the same PCI IDs. + * + * Note that because we already pass the cfg to the transport above, + * all the parameters that the transport uses must, until that is + * changed, be identical to the ones in the 7265D configuration. + */ + if (iwl_trans->cfg == &iwl7265_cfg && + (info.hw_rev & CSR_HW_REV_TYPE_MSK) == CSR_HW_REV_TYPE_7265D) + iwl_trans->cfg = &iwl7265d_cfg; +#endif + if (!iwl_trans->cfg) { + pr_err("No config found for PCI dev %04x/%04x, rev=0x%x, rfid=0x%x\n", + pdev->device, pdev->subsystem_device, + info.hw_rev, info.hw_rf_id); + ret = -EINVAL; + goto out_free_trans; + } + + IWL_INFO(iwl_trans, "Detected %s\n", info.name); + + if (iwl_trans->mac_cfg->mq_rx_supported) { + if (WARN_ON(!iwl_trans->cfg->num_rbds)) { + ret = -EINVAL; + goto out_free_trans; + } + trans_pcie->num_rx_bufs = iwl_trans_get_num_rbds(iwl_trans); + } else { + trans_pcie->num_rx_bufs = RX_QUEUE_SIZE; + } + + if (!iwl_trans->mac_cfg->integrated) { + u16 link_status; + + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &link_status); + + info.pcie_link_speed = + u16_get_bits(link_status, PCI_EXP_LNKSTA_CLS); + } + + iwl_trans_set_info(iwl_trans, &info); + + pci_set_drvdata(pdev, iwl_trans); + + iwl_pcie_check_me_status(iwl_trans); + + /* try to get ownership so that we'll know if we don't own it */ + iwl_pcie_prepare_card_hw(iwl_trans); + + iwl_trans->drv = iwl_drv_start(iwl_trans); + + if (IS_ERR(iwl_trans->drv)) { + ret = PTR_ERR(iwl_trans->drv); + goto out_free_trans; + } + + /* register transport layer debugfs here */ + iwl_trans_pcie_dbgfs_register(iwl_trans); + + return 0; + +out_free_trans: + iwl_trans_pcie_free(iwl_trans); + return ret; +} diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c index 8676726d789b..3af6e3b3640d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c @@ -590,7 +590,7 @@ static void iwl_pcie_tx_stop_fh(struct iwl_trans *trans) } /* Wait for DMA channels to be idle */ - ret = iwl_poll_bit(trans, FH_TSSR_TX_STATUS_REG, mask, mask, 5000); + ret = iwl_poll_bits(trans, FH_TSSR_TX_STATUS_REG, mask, 5000); if (ret < 0) IWL_ERR(trans, "Failing on timeout while stopping DMA channel %d [0x%08x]\n", diff --git a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c index 784433bb246a..4d660cef3de9 100644 --- a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c +++ b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c @@ -31,15 +31,6 @@ static void iwl_pci_print_dev_info(const char *pfx, const struct iwl_dev_info *d pos += scnprintf(buf + pos, sizeof(buf) - pos, " bw_limit=*"); - if (di->match_rf_step) - pos += scnprintf(buf + pos, sizeof(buf) - pos, - " rf_step=%c", - di->rf_step == SILICON_Z_STEP ? 'Z' : - 'A' + di->rf_step); - else - pos += scnprintf(buf + pos, sizeof(buf) - pos, - " rf_step=*"); - if (di->match_rf_id) pos += scnprintf(buf + pos, sizeof(buf) - pos, " rf_id=0x%x", di->rf_id); @@ -54,6 +45,13 @@ static void iwl_pci_print_dev_info(const char *pfx, const struct iwl_dev_info *d pos += scnprintf(buf + pos, sizeof(buf) - pos, " cdb=*"); + if (di->match_discrete) + pos += scnprintf(buf + pos, sizeof(buf) - pos, + " discrete=%d", + di->discrete); + else + pos += scnprintf(buf + pos, sizeof(buf) - pos, + " discrete=*"); printk(KERN_DEBUG "%sdev=%04x subdev=%04x/%04x%s\n", pfx, di->device, di->subdevice, subdevice_mask, buf); @@ -70,7 +68,7 @@ static void devinfo_table_order(struct kunit *test) ret = iwl_pci_find_dev_info(di->device, di->subdevice, di->rf_type, di->cdb, di->rf_id, di->bw_limit, - di->rf_step); + di->discrete); if (!ret) { iwl_pci_print_dev_info("No entry found for: ", di); KUNIT_FAIL(test, @@ -85,6 +83,32 @@ static void devinfo_table_order(struct kunit *test) } } +static void devinfo_discrete_match(struct kunit *test) +{ + /* + * Validate that any entries with discrete/integrated match have + * the same config with the value inverted (if they match at all.) + */ + + for (int idx = 0; idx < iwl_dev_info_table_size; idx++) { + const struct iwl_dev_info *di = &iwl_dev_info_table[idx]; + const struct iwl_dev_info *ret; + + if (!di->match_discrete) + continue; + + ret = iwl_pci_find_dev_info(di->device, di->subdevice, + di->rf_type, di->cdb, + di->rf_id, di->bw_limit, + !di->discrete); + if (!ret) + continue; + KUNIT_EXPECT_PTR_EQ(test, di->cfg, ret->cfg); + /* and check the name is different, that'd be the point of it */ + KUNIT_EXPECT_NE(test, strcmp(di->name, ret->name), 0); + } +} + static void devinfo_names(struct kunit *test) { int idx; @@ -216,6 +240,7 @@ static void devinfo_no_mac_cfg_dups(struct kunit *test) static struct kunit_case devinfo_test_cases[] = { KUNIT_CASE(devinfo_table_order), + KUNIT_CASE(devinfo_discrete_match), KUNIT_CASE(devinfo_names), KUNIT_CASE(devinfo_no_cfg_dups), KUNIT_CASE(devinfo_no_name_dups), diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index cc2d888e3f17..77b75792eb48 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -173,13 +173,13 @@ void mt76_unassign_vif_chanctx(struct ieee80211_hw *hw, if (!mlink) goto out; - if (link_conf != &vif->bss_conf) + if (mlink != (struct mt76_vif_link *)vif->drv_priv) rcu_assign_pointer(mvif->link[link_id], NULL); dev->drv->vif_link_remove(phy, vif, link_conf, mlink); mlink->ctx = NULL; - if (link_conf != &vif->bss_conf) + if (mlink != (struct mt76_vif_link *)vif->drv_priv) kfree_rcu(mlink, rcu_head); out: diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 35b4ec91979e..87f531297f85 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -643,10 +643,8 @@ mt76_dma_rx_fill_buf(struct mt76_dev *dev, struct mt76_queue *q, while (q->queued < q->ndesc - 1) { struct mt76_queue_buf qbuf = {}; - enum dma_data_direction dir; - dma_addr_t addr; - int offset; void *buf = NULL; + int offset; if (mt76_queue_is_wed_rro_ind(q)) goto done; @@ -655,11 +653,8 @@ mt76_dma_rx_fill_buf(struct mt76_dev *dev, struct mt76_queue *q, if (!buf) break; - addr = page_pool_get_dma_addr(virt_to_head_page(buf)) + offset; - dir = page_pool_get_dma_dir(q->page_pool); - dma_sync_single_for_device(dev->dma_dev, addr, len, dir); - - qbuf.addr = addr + q->buf_offset; + qbuf.addr = page_pool_get_dma_addr(virt_to_head_page(buf)) + + offset + q->buf_offset; done: qbuf.len = len - q->buf_offset; qbuf.skip_unmap = false; diff --git a/drivers/net/wireless/mediatek/mt76/mcu.c b/drivers/net/wireless/mediatek/mt76/mcu.c index 3353012e8542..65d4c2adb538 100644 --- a/drivers/net/wireless/mediatek/mt76/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mcu.c @@ -78,6 +78,10 @@ int mt76_mcu_skb_send_and_get_msg(struct mt76_dev *dev, struct sk_buff *skb, unsigned long expires; int ret, seq; + if (mt76_is_sdio(dev)) + if (test_bit(MT76_RESET, &dev->phy.state) && atomic_read(&dev->bus_hung)) + return -EIO; + if (ret_skb) *ret_skb = NULL; diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 14927a92f9d1..00ac071010aa 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -983,6 +983,8 @@ struct mt76_dev { struct mt76_usb usb; struct mt76_sdio sdio; }; + + atomic_t bus_hung; }; /* per-phy stats. */ @@ -1865,6 +1867,9 @@ mt76_vif_link(struct mt76_dev *dev, struct ieee80211_vif *vif, int link_id) struct mt76_vif_link *mlink = (struct mt76_vif_link *)vif->drv_priv; struct mt76_vif_data *mvif = mlink->mvif; + if (!link_id) + return mlink; + return mt76_dereference(mvif->link[link_id], dev); } @@ -1875,7 +1880,7 @@ mt76_vif_conf_link(struct mt76_dev *dev, struct ieee80211_vif *vif, struct mt76_vif_link *mlink = (struct mt76_vif_link *)vif->drv_priv; struct mt76_vif_data *mvif = mlink->mvif; - if (link_conf == &vif->bss_conf) + if (link_conf == &vif->bss_conf || !link_conf->link_id) return mlink; return mt76_dereference(mvif->link[link_conf->link_id], dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 427542777abc..cf948628e588 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -197,6 +197,8 @@ mt7915_mcu_parse_response(struct mt76_dev *mdev, int cmd, static void mt7915_mcu_set_timeout(struct mt76_dev *mdev, int cmd) { + mdev->mcu.timeout = 5 * HZ; + if ((cmd & __MCU_CMD_FIELD_ID) != MCU_CMD_EXT_CID) return; @@ -208,6 +210,9 @@ mt7915_mcu_set_timeout(struct mt76_dev *mdev, int cmd) case MCU_EXT_CMD_BSS_INFO_UPDATE: mdev->mcu.timeout = 2 * HZ; return; + case MCU_EXT_CMD_EFUSE_BUFFER_MODE: + mdev->mcu.timeout = 10 * HZ; + return; default: break; } @@ -2110,16 +2115,21 @@ static int mt7915_load_firmware(struct mt7915_dev *dev) { int ret; - /* make sure fw is download state */ - if (mt7915_firmware_state(dev, false)) { - /* restart firmware once */ - mt76_connac_mcu_restart(&dev->mt76); - ret = mt7915_firmware_state(dev, false); - if (ret) { - dev_err(dev->mt76.dev, - "Firmware is not ready for download\n"); - return ret; - } + /* Release Semaphore if taken by previous failed attempt */ + ret = mt76_connac_mcu_patch_sem_ctrl(&dev->mt76, false); + if (ret != PATCH_REL_SEM_SUCCESS) { + dev_err(dev->mt76.dev, "Could not release semaphore\n"); + /* Continue anyways */ + } + + /* Always restart MCU firmware */ + mt76_connac_mcu_restart(&dev->mt76); + + /* Check if MCU is ready */ + ret = mt7915_firmware_state(dev, false); + if (ret) { + dev_err(dev->mt76.dev, "Firmware did not enter download state\n"); + return ret; } ret = mt76_connac2_load_patch(&dev->mt76, fw_name_var(dev, ROM_PATCH)); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index 5dd57de59f27..7b75193039f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -675,6 +675,8 @@ void mt7921_mac_reset_work(struct work_struct *work) if (!ret) break; } + if (mt76_is_sdio(&dev->mt76) && atomic_read(&dev->mt76.bus_hung)) + return; if (i == 10) dev_err(dev->mt76.dev, "chip reset failed\n"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c index 45b9f35aab17..d8d36b3c3068 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c @@ -150,6 +150,8 @@ static int mt7921s_probe(struct sdio_func *func, if (ret) goto error; + atomic_set(&mdev->bus_hung, false); + mdev->rev = (mt76_rr(dev, MT_HW_CHIPID) << 16) | (mt76_rr(dev, MT_HW_REV) & 0xff); dev_dbg(mdev->dev, "ASIC revision: %04x\n", mdev->rev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c index 1f77cf71ca70..a9eb6252a904 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c @@ -6,6 +6,8 @@ #include "mt7921.h" #include "../mt76_connac2_mac.h" #include "../sdio.h" +#include <linux/mmc/host.h> +#include <linux/kallsyms.h> static void mt7921s_enable_irq(struct mt76_dev *dev) { @@ -35,6 +37,9 @@ int mt7921s_wfsys_reset(struct mt792x_dev *dev) struct mt76_sdio *sdio = &dev->mt76.sdio; u32 val, status; + if (atomic_read(&dev->mt76.bus_hung)) + return 0; + mt7921s_mcu_drv_pmctrl(dev); sdio_claim_host(sdio->func); @@ -91,11 +96,64 @@ int mt7921s_init_reset(struct mt792x_dev *dev) return 0; } +static struct mt76_sdio *msdio; +static void mt7921s_card_reset(struct work_struct *work) +{ + struct mmc_host *sdio_host = msdio->func->card->host; + + sdio_claim_host(msdio->func); + sdio_release_irq(msdio->func); + sdio_release_host(msdio->func); + + mmc_remove_host(sdio_host); + msleep(50); + mmc_add_host(sdio_host); +} + +static DECLARE_WORK(sdio_reset_work, mt7921s_card_reset); +static int mt7921s_check_bus(struct mt76_dev *dev) +{ + struct mt76_sdio *sdio = &dev->sdio; + int err; + + sdio_claim_host(sdio->func); + sdio_readl(dev->sdio.func, MCR_WHCR, &err); + sdio_release_host(sdio->func); + + return err; +} + +static int mt7921s_host_reset(struct mt792x_dev *dev) +{ + struct mt76_dev *mdev = &dev->mt76; + int err = -1; + + if (!atomic_read(&mdev->bus_hung)) + err = mt7921s_check_bus(&dev->mt76); + + if (err) { + atomic_set(&mdev->bus_hung, true); + msdio = &dev->mt76.sdio; + dev_err(mdev->dev, "SDIO bus problem detected(%d), resetting card!!\n", err); + schedule_work(&sdio_reset_work); + return err; + } + + atomic_set(&mdev->bus_hung, false); + + return 0; +} + int mt7921s_mac_reset(struct mt792x_dev *dev) { int err; mt76_connac_free_pending_tx_skbs(&dev->pm, NULL); + + mt7921s_host_reset(dev); + if (atomic_read(&dev->mt76.bus_hung)) + return 0; + mt76_txq_schedule_all(&dev->mphy); mt76_worker_disable(&dev->mt76.tx_worker); set_bit(MT76_MCU_RESET, &dev->mphy.state); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index b8542be0d945..16f3cc58a192 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -2866,7 +2866,7 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, for (i = 0; i < sreq->n_ssids; i++) { if (!sreq->ssids[i].ssid_len) continue; - if (i > MT7925_RNR_SCAN_MAX_BSSIDS) + if (i >= MT7925_RNR_SCAN_MAX_BSSIDS) break; ssid->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); @@ -2883,7 +2883,7 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, mt76_connac_mcu_build_rnr_scan_param(mdev, sreq); for (j = 0; j < mdev->rnr.bssid_num; j++) { - if (j > MT7925_RNR_SCAN_MAX_BSSIDS) + if (j >= MT7925_RNR_SCAN_MAX_BSSIDS) break; tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_BSSID, diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index 43a7ac0f718e..381009c4b6b6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -666,6 +666,7 @@ int mt792x_init_wiphy(struct ieee80211_hw *hw) ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS); ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW); ieee80211_hw_set(hw, CONNECTION_MONITOR); + ieee80211_hw_set(hw, NO_VIRTUAL_MONITOR); if (is_mt7921(&dev->mt76)) ieee80211_hw_set(hw, CHANCTX_STA_CSA); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 0dbd4662bc84..02e10d744feb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1087,9 +1087,9 @@ int mt7996_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, if (wcid->offchannel) mlink = rcu_dereference(mvif->mt76.offchannel_link); if (!mlink) - mlink = &mvif->deflink.mt76; + mlink = rcu_dereference(mvif->mt76.link[wcid->link_id]); - txp->fw.bss_idx = mlink->idx; + txp->fw.bss_idx = mlink ? mlink->idx : mvif->deflink.mt76.idx; } txp->fw.token = cpu_to_le16(id); @@ -1129,15 +1129,14 @@ u32 mt7996_wed_init_buf(void *ptr, dma_addr_t phys, int token_id) } static void -mt7996_tx_check_aggr(struct ieee80211_sta *sta, struct sk_buff *skb) +mt7996_tx_check_aggr(struct ieee80211_link_sta *link_sta, + struct mt76_wcid *wcid, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP; - struct mt7996_sta_link *msta_link; - struct mt7996_sta *msta; u16 fc, tid; - if (!sta || !(sta->deflink.ht_cap.ht_supported || sta->deflink.he_cap.has_he)) + if (!(link_sta->ht_cap.ht_supported || link_sta->he_cap.has_he)) return; tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; @@ -1146,7 +1145,8 @@ mt7996_tx_check_aggr(struct ieee80211_sta *sta, struct sk_buff *skb) if (is_8023) { fc = IEEE80211_FTYPE_DATA | - (sta->wme ? IEEE80211_STYPE_QOS_DATA : IEEE80211_STYPE_DATA); + (link_sta->sta->wme ? IEEE80211_STYPE_QOS_DATA + : IEEE80211_STYPE_DATA); } else { /* No need to get precise TID for Action/Management Frame, * since it will not meet the following Frame Control @@ -1162,19 +1162,16 @@ mt7996_tx_check_aggr(struct ieee80211_sta *sta, struct sk_buff *skb) if (unlikely(fc != (IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_DATA))) return; - msta = (struct mt7996_sta *)sta->drv_priv; - msta_link = &msta->deflink; - - if (!test_and_set_bit(tid, &msta_link->wcid.ampdu_state)) - ieee80211_start_tx_ba_session(sta, tid, 0); + if (!test_and_set_bit(tid, &wcid->ampdu_state)) + ieee80211_start_tx_ba_session(link_sta->sta, tid, 0); } static void mt7996_txwi_free(struct mt7996_dev *dev, struct mt76_txwi_cache *t, - struct ieee80211_sta *sta, struct list_head *free_list) + struct ieee80211_link_sta *link_sta, + struct mt76_wcid *wcid, struct list_head *free_list) { struct mt76_dev *mdev = &dev->mt76; - struct mt76_wcid *wcid; __le32 *txwi; u16 wcid_idx; @@ -1183,12 +1180,10 @@ mt7996_txwi_free(struct mt7996_dev *dev, struct mt76_txwi_cache *t, goto out; txwi = (__le32 *)mt76_get_txwi_ptr(mdev, t); - if (sta) { - wcid = (struct mt76_wcid *)sta->drv_priv; + if (link_sta) { wcid_idx = wcid->idx; - if (likely(t->skb->protocol != cpu_to_be16(ETH_P_PAE))) - mt7996_tx_check_aggr(sta, t->skb); + mt7996_tx_check_aggr(link_sta, wcid, t->skb); } else { wcid_idx = le32_get_bits(txwi[9], MT_TXD9_WLAN_IDX); } @@ -1207,8 +1202,8 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len) struct mt76_dev *mdev = &dev->mt76; struct mt76_phy *phy2 = mdev->phys[MT_BAND1]; struct mt76_phy *phy3 = mdev->phys[MT_BAND2]; + struct ieee80211_link_sta *link_sta = NULL; struct mt76_txwi_cache *txwi; - struct ieee80211_sta *sta = NULL; struct mt76_wcid *wcid = NULL; LIST_HEAD(free_list); struct sk_buff *skb, *tmp; @@ -1245,7 +1240,7 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len) */ info = le32_to_cpu(*cur_info); if (info & MT_TXFREE_INFO_PAIR) { - struct mt7996_sta_link *msta_link; + struct ieee80211_sta *sta; u16 idx; idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); @@ -1254,9 +1249,11 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len) if (!sta) goto next; - msta_link = container_of(wcid, struct mt7996_sta_link, - wcid); - mt76_wcid_add_poll(&dev->mt76, &msta_link->wcid); + link_sta = rcu_dereference(sta->link[wcid->link_id]); + if (!link_sta) + goto next; + + mt76_wcid_add_poll(&dev->mt76, wcid); next: /* ver 7 has a new DW with pair = 1, skip it */ if (ver == 7 && ((void *)(cur_info + 1) < end) && @@ -1289,7 +1286,8 @@ next: if (!txwi) continue; - mt7996_txwi_free(dev, txwi, sta, &free_list); + mt7996_txwi_free(dev, txwi, link_sta, wcid, + &free_list); } } @@ -1748,7 +1746,7 @@ void mt7996_tx_token_put(struct mt7996_dev *dev) spin_lock_bh(&dev->mt76.token_lock); idr_for_each_entry(&dev->mt76.token, txwi, id) { - mt7996_txwi_free(dev, txwi, NULL, NULL); + mt7996_txwi_free(dev, txwi, NULL, NULL, NULL); dev->mt76.token_count--; } spin_unlock_bh(&dev->mt76.token_lock); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 5283aee619a9..f846b8309ae2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -960,8 +960,8 @@ mt7996_mac_sta_deinit_link(struct mt7996_dev *dev, } static void -mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_sta *sta, - unsigned long links) +mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, + struct ieee80211_sta *sta, unsigned long links) { struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; struct mt76_dev *mdev = &dev->mt76; @@ -969,6 +969,8 @@ mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_sta *sta, for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { struct mt7996_sta_link *msta_link = NULL; + struct mt7996_vif_link *link; + struct mt76_phy *mphy; msta_link = rcu_replace_pointer(msta->link[link_id], msta_link, lockdep_is_held(&mdev->mutex)); @@ -976,6 +978,15 @@ mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_sta *sta, continue; mt7996_mac_sta_deinit_link(dev, msta_link); + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + continue; + + mphy = mt76_vif_link_phy(&link->mt76); + if (!mphy) + continue; + + mphy->num_sta--; if (msta->deflink_id == link_id) { msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; continue; @@ -997,6 +1008,7 @@ mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf; struct ieee80211_link_sta *link_sta; struct mt7996_vif_link *link; + struct mt76_phy *mphy; if (rcu_access_pointer(msta->link[link_id])) continue; @@ -1023,12 +1035,19 @@ mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, link_id); if (err) goto error_unlink; + + mphy = mt76_vif_link_phy(&link->mt76); + if (!mphy) { + err = -EINVAL; + goto error_unlink; + } + mphy->num_sta++; } return 0; error_unlink: - mt7996_mac_sta_remove_links(dev, sta, new_links); + mt7996_mac_sta_remove_links(dev, vif, sta, new_links); return err; } @@ -1045,7 +1064,7 @@ mt7996_mac_sta_change_links(struct ieee80211_hw *hw, struct ieee80211_vif *vif, mutex_lock(&dev->mt76.mutex); - mt7996_mac_sta_remove_links(dev, sta, rem); + mt7996_mac_sta_remove_links(dev, vif, sta, rem); ret = mt7996_mac_sta_add_links(dev, vif, sta, add); mutex_unlock(&dev->mt76.mutex); @@ -1054,25 +1073,21 @@ mt7996_mac_sta_change_links(struct ieee80211_hw *hw, struct ieee80211_vif *vif, } static int -mt7996_mac_sta_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, +mt7996_mac_sta_add(struct mt7996_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { - struct mt76_dev *mdev = mphy->dev; - struct mt7996_dev *dev = container_of(mdev, struct mt7996_dev, mt76); struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; - unsigned long links = sta->mlo ? sta->valid_links : BIT(0); + unsigned long links = sta->valid_links ? sta->valid_links : BIT(0); int err; - mutex_lock(&mdev->mutex); + mutex_lock(&dev->mt76.mutex); msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; msta->vif = mvif; err = mt7996_mac_sta_add_links(dev, vif, sta, links); - if (!err) - mphy->num_sta++; - mutex_unlock(&mdev->mutex); + mutex_unlock(&dev->mt76.mutex); return err; } @@ -1119,7 +1134,6 @@ mt7996_mac_sta_event(struct mt7996_dev *dev, struct ieee80211_vif *vif, return err; msta_link->wcid.tx_info |= MT_WCID_TX_INFO_SET; - msta_link->wcid.sta = 1; break; case MT76_STA_EVENT_AUTHORIZE: err = mt7996_mcu_add_sta(dev, link_conf, link_sta, @@ -1151,19 +1165,14 @@ mt7996_mac_sta_event(struct mt7996_dev *dev, struct ieee80211_vif *vif, } static void -mt7996_mac_sta_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, +mt7996_mac_sta_remove(struct mt7996_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { - struct mt76_dev *mdev = mphy->dev; - struct mt7996_dev *dev = container_of(mdev, struct mt7996_dev, mt76); - unsigned long links = sta->mlo ? sta->valid_links : BIT(0); - - mutex_lock(&mdev->mutex); + unsigned long links = sta->valid_links ? sta->valid_links : BIT(0); - mt7996_mac_sta_remove_links(dev, sta, links); - mphy->num_sta--; - - mutex_unlock(&mdev->mutex); + mutex_lock(&dev->mt76.mutex); + mt7996_mac_sta_remove_links(dev, vif, sta, links); + mutex_unlock(&dev->mt76.mutex); } static int @@ -1171,20 +1180,16 @@ mt7996_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, enum ieee80211_sta_state old_state, enum ieee80211_sta_state new_state) { - struct mt76_phy *mphy = mt76_vif_phy(hw, vif); struct mt7996_dev *dev = mt7996_hw_dev(hw); enum mt76_sta_event ev; - if (!mphy) - return -EINVAL; - if (old_state == IEEE80211_STA_NOTEXIST && new_state == IEEE80211_STA_NONE) - return mt7996_mac_sta_add(mphy, vif, sta); + return mt7996_mac_sta_add(dev, vif, sta); if (old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_NOTEXIST) - mt7996_mac_sta_remove(mphy, vif, sta); + mt7996_mac_sta_remove(dev, vif, sta); if (old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) @@ -1217,10 +1222,17 @@ static void mt7996_tx(struct ieee80211_hw *hw, if (vif) { struct mt7996_vif *mvif = (void *)vif->drv_priv; - struct mt76_vif_link *mlink; + struct mt76_vif_link *mlink = &mvif->deflink.mt76; + + if (link_id < IEEE80211_LINK_UNSPECIFIED) + mlink = rcu_dereference(mvif->mt76.link[link_id]); + + if (!mlink) { + ieee80211_free_txskb(hw, skb); + goto unlock; + } - mlink = rcu_dereference(mvif->mt76.link[link_id]); - if (mlink && mlink->wcid) + if (mlink->wcid) wcid = mlink->wcid; if (mvif->mt76.roc_phy && @@ -1229,7 +1241,7 @@ static void mt7996_tx(struct ieee80211_hw *hw, if (mphy->roc_link) wcid = mphy->roc_link->wcid; } else { - mphy = mt76_vif_link_phy(&mvif->deflink.mt76); + mphy = mt76_vif_link_phy(mlink); } } @@ -1238,7 +1250,7 @@ static void mt7996_tx(struct ieee80211_hw *hw, goto unlock; } - if (control->sta) { + if (control->sta && link_id < IEEE80211_LINK_UNSPECIFIED) { struct mt7996_sta *msta = (void *)control->sta->drv_priv; struct mt7996_sta_link *msta_link; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index f0adc0b4b8b6..0374872db477 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -2216,15 +2216,15 @@ mt7996_mcu_add_group(struct mt7996_dev *dev, struct ieee80211_vif *vif, static void mt7996_mcu_sta_mld_setup_tlv(struct mt7996_dev *dev, struct sk_buff *skb, + struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; - unsigned long links = sta->valid_links; - unsigned int nlinks = hweight16(links); + unsigned int nlinks = hweight16(sta->valid_links); struct mld_setup_link *mld_setup_link; + struct ieee80211_link_sta *link_sta; struct sta_rec_mld_setup *mld_setup; struct mt7996_sta_link *msta_link; - struct ieee80211_vif *vif; unsigned int link_id; struct tlv *tlv; @@ -2242,18 +2242,16 @@ mt7996_mcu_sta_mld_setup_tlv(struct mt7996_dev *dev, struct sk_buff *skb, mld_setup->primary_id = cpu_to_le16(msta_link->wcid.idx); if (nlinks > 1) { - link_id = __ffs(links & ~BIT(msta->deflink_id)); - msta_link = mt76_dereference(msta->link[msta->deflink_id], - &dev->mt76); + link_id = __ffs(sta->valid_links & ~BIT(msta->deflink_id)); + msta_link = mt76_dereference(msta->link[link_id], &dev->mt76); if (!msta_link) return; } mld_setup->seconed_id = cpu_to_le16(msta_link->wcid.idx); mld_setup->link_num = nlinks; - vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv); mld_setup_link = (struct mld_setup_link *)mld_setup->link_info; - for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + for_each_sta_active_link(vif, sta, link_sta, link_id) { struct mt7996_vif_link *link; msta_link = mt76_dereference(msta->link[link_id], &dev->mt76); @@ -2345,7 +2343,8 @@ int mt7996_mcu_add_sta(struct mt7996_dev *dev, mt7996_mcu_sta_muru_tlv(dev, skb, link_conf, link_sta); if (sta->mlo) { - mt7996_mcu_sta_mld_setup_tlv(dev, skb, sta); + mt7996_mcu_sta_mld_setup_tlv(dev, skb, link_conf->vif, + sta); mt7996_mcu_sta_eht_mld_tlv(dev, skb, sta); } } diff --git a/drivers/net/wireless/mediatek/mt76/sdio_txrx.c b/drivers/net/wireless/mediatek/mt76/sdio_txrx.c index 0a927a7313a6..f882d21c9f63 100644 --- a/drivers/net/wireless/mediatek/mt76/sdio_txrx.c +++ b/drivers/net/wireless/mediatek/mt76/sdio_txrx.c @@ -112,6 +112,7 @@ mt76s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, if (err < 0) { dev_err(dev->dev, "sdio read data failed:%d\n", err); + atomic_set(&dev->bus_hung, true); put_page(page); return err; } @@ -234,9 +235,10 @@ static int __mt76s_xmit_queue(struct mt76_dev *dev, u8 *data, int len) err = sdio_writesb(sdio->func, MCR_WTDR1, data, len); sdio_release_host(sdio->func); - if (err) + if (err) { dev_err(dev->dev, "sdio write failed: %d\n", err); - + atomic_set(&dev->bus_hung, true); + } return err; } diff --git a/drivers/net/wireless/mediatek/mt76/wed.c b/drivers/net/wireless/mediatek/mt76/wed.c index f89e4537555c..63f69e152b1c 100644 --- a/drivers/net/wireless/mediatek/mt76/wed.c +++ b/drivers/net/wireless/mediatek/mt76/wed.c @@ -34,11 +34,10 @@ u32 mt76_wed_init_rx_buf(struct mtk_wed_device *wed, int size) struct mt76_dev *dev = container_of(wed, struct mt76_dev, mmio.wed); struct mtk_wed_bm_desc *desc = wed->rx_buf_ring.desc; struct mt76_queue *q = &dev->q_rx[MT_RXQ_MAIN]; - int i, len = SKB_WITH_OVERHEAD(q->buf_size); struct mt76_txwi_cache *t = NULL; + int i; for (i = 0; i < size; i++) { - enum dma_data_direction dir; dma_addr_t addr; u32 offset; int token; @@ -53,9 +52,6 @@ u32 mt76_wed_init_rx_buf(struct mtk_wed_device *wed, int size) goto unmap; addr = page_pool_get_dma_addr(virt_to_head_page(buf)) + offset; - dir = page_pool_get_dma_dir(q->page_pool); - dma_sync_single_for_device(dev->dma_dev, addr, len, dir); - desc->buf0 = cpu_to_le32(addr); token = mt76_rx_token_consume(dev, buf, t, addr); if (token < 0) { diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 92697f98c601..7493e5aa984c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -386,7 +386,7 @@ static void nvme_log_err_passthru(struct request *req) nr->cmd->common.cdw12, nr->cmd->common.cdw13, nr->cmd->common.cdw14, - nr->cmd->common.cdw14); + nr->cmd->common.cdw15); } enum nvme_disposition { @@ -2015,21 +2015,41 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, } -static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns, - struct nvme_id_ns *id, struct queue_limits *lim, - u32 bs, u32 atomic_bs) +static u32 nvme_configure_atomic_write(struct nvme_ns *ns, + struct nvme_id_ns *id, struct queue_limits *lim, u32 bs) { - unsigned int boundary = 0; + u32 atomic_bs, boundary = 0; - if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) { - if (le16_to_cpu(id->nabspf)) + /* + * We do not support an offset for the atomic boundaries. + */ + if (id->nabo) + return bs; + + if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) { + /* + * Use the per-namespace atomic write unit when available. + */ + atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; + if (id->nabspf) boundary = (le16_to_cpu(id->nabspf) + 1) * bs; + } else { + /* + * Use the controller wide atomic write unit. This sucks + * because the limit is defined in terms of logical blocks while + * namespaces can have different formats, and because there is + * no clear language in the specification prohibiting different + * values for different controllers in the subsystem. + */ + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; } + lim->atomic_write_hw_max = atomic_bs; lim->atomic_write_hw_boundary = boundary; lim->atomic_write_hw_unit_min = bs; lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs); lim->features |= BLK_FEAT_ATOMIC_WRITES; + return atomic_bs; } static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) @@ -2067,34 +2087,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, valid = false; } - atomic_bs = phys_bs = bs; - if (id->nabo == 0) { - /* - * Bit 1 indicates whether NAWUPF is defined for this namespace - * and whether it should be used instead of AWUPF. If NAWUPF == - * 0 then AWUPF must be used instead. - */ - if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) - atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; - else - atomic_bs = (1 + ns->ctrl->awupf) * bs; - - /* - * Set subsystem atomic bs. - */ - if (ns->ctrl->subsys->atomic_bs) { - if (atomic_bs != ns->ctrl->subsys->atomic_bs) { - dev_err_ratelimited(ns->ctrl->device, - "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", - ns->disk ? ns->disk->disk_name : "?", - ns->ctrl->subsys->atomic_bs, - atomic_bs); - } - } else - ns->ctrl->subsys->atomic_bs = atomic_bs; - - nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); - } + phys_bs = bs; + atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs); if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { /* NPWG = Namespace Preferred Write Granularity */ @@ -2382,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; - /* - * Validate the max atomic write size fits within the subsystem's - * atomic write capabilities. - */ - if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { - blk_mq_unfreeze_queue(ns->disk->queue, memflags); - ret = -ENXIO; - goto out; - } - nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) @@ -3215,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->model, id->mn, sizeof(subsys->model)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; + subsys->awupf = le16_to_cpu(id->awupf); /* Versions prior to 1.4 don't necessarily report a valid type */ if (id->cntrltype == NVME_CTRL_DISC || @@ -3552,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret) goto out_free; } + + if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) { + dev_err_ratelimited(ctrl->device, + "inconsistent AWUPF, controller not added (%u/%u).\n", + le16_to_cpu(id->awupf), ctrl->subsys->awupf); + ret = -EINVAL; + goto out_free; + } + memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); @@ -3647,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; @@ -4036,6 +4029,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) list_add_tail_rcu(&ns->siblings, &head->list); ns->head = head; mutex_unlock(&ctrl->subsys->lock); + +#ifdef CONFIG_NVME_MULTIPATH + cancel_delayed_work(&head->remove_work); +#endif return 0; out_put_ns_head: @@ -4089,6 +4086,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) struct nvme_ns *ns; struct gendisk *disk; int node = ctrl->numa_node; + bool last_path = false; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) @@ -4181,9 +4179,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); - if (list_empty(&ns->head->list)) + if (list_empty(&ns->head->list)) { list_del_init(&ns->head->entry); + /* + * If multipath is not configured, we still create a namespace + * head (nshead), but head->disk is not initialized in that + * case. As a result, only a single reference to nshead is held + * (via kref_init()) when it is created. Therefore, ensure that + * we do not release the reference to nshead twice if head->disk + * is not present. + */ + if (ns->head->disk) + last_path = true; + } mutex_unlock(&ctrl->subsys->lock); + if (last_path) + nvme_put_ns_head(ns->head); nvme_put_ns_head(ns->head); out_cleanup_disk: put_disk(disk); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index e040e467f9fa..3da980dc60d9 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -690,8 +690,8 @@ static void nvme_remove_head(struct nvme_ns_head *head) nvme_cdev_del(&head->cdev, &head->cdev_device); synchronize_srcu(&head->srcu); del_gendisk(head->disk); - nvme_put_ns_head(head); } + nvme_put_ns_head(head); } static void nvme_remove_head_work(struct work_struct *work) @@ -1200,7 +1200,8 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) */ srcu_idx = srcu_read_lock(&head->srcu); - list_for_each_entry_rcu(ns, &head->list, siblings) { + list_for_each_entry_srcu(ns, &head->list, siblings, + srcu_read_lock_held(&head->srcu)) { /* * Ensure that ns path disk node is already added otherwise we * may get invalid kobj name for target @@ -1291,6 +1292,9 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { bool remove = false; + if (!head->disk) + return; + mutex_lock(&head->subsys->lock); /* * We are called when all paths have been removed, and at that point @@ -1311,7 +1315,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) */ if (!try_module_get(THIS_MODULE)) goto out; - queue_delayed_work(nvme_wq, &head->remove_work, + mod_delayed_work(nvme_wq, &head->remove_work, head->delayed_removal_secs * HZ); } else { list_del_init(&head->entry); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a468cdc5b5cb..7df2ea21851f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,7 +410,6 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; - u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -443,11 +442,11 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; + u16 awupf; /* 0's based value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; #endif - u32 atomic_bs; }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8ff12e415cb5..320aaa41ec39 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2101,8 +2101,6 @@ static void nvme_map_cmb(struct nvme_dev *dev) if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) pci_p2pmem_publish(pdev, true); - - nvme_update_attrs(dev); } static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) @@ -3010,6 +3008,8 @@ static void nvme_reset_work(struct work_struct *work) if (result < 0) goto out; + nvme_update_attrs(dev); + result = nvme_setup_io_queues(dev); if (result) goto out; @@ -3343,6 +3343,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result < 0) goto out_disable; + nvme_update_attrs(dev); + result = nvme_setup_io_queues(dev); if (result) goto out_disable; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index df69a9dee71c..51df72f5e89b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -867,6 +867,8 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio) { if (bio != &req->b.inline_bio) bio_put(bio); + else + bio_uninit(bio); } #ifdef CONFIG_NVME_TARGET_TCP_TLS diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index b78e0e417324..af370628e583 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1676,19 +1676,24 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); - if (!root_ops) - goto free_ri; + if (!root_ops) { + kfree(ri); + return NULL; + } ri->cfg = pci_acpi_setup_ecam_mapping(root); - if (!ri->cfg) - goto free_root_ops; + if (!ri->cfg) { + kfree(ri); + kfree(root_ops); + return NULL; + } root_ops->release_info = pci_acpi_generic_release_info; root_ops->prepare_resources = pci_acpi_root_prepare_resources; root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops; bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); if (!bus) - goto free_cfg; + return NULL; /* If we must preserve the resource configuration, claim now */ host = pci_find_host_bridge(bus); @@ -1705,14 +1710,6 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) pcie_bus_configure_settings(child); return bus; - -free_cfg: - pci_ecam_free(ri->cfg); -free_root_ops: - kfree(root_ops); -free_ri: - kfree(ri); - return NULL; } void pcibios_add_bus(struct pci_bus *bus) diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c index ee5f615a9023..4bd73f038ffb 100644 --- a/drivers/pci/pcie/ptm.c +++ b/drivers/pci/pcie/ptm.c @@ -254,6 +254,7 @@ bool pcie_ptm_enabled(struct pci_dev *dev) } EXPORT_SYMBOL(pcie_ptm_enabled); +#if IS_ENABLED(CONFIG_DEBUG_FS) static ssize_t context_update_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { @@ -552,3 +553,4 @@ void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs) debugfs_remove_recursive(ptm_debugfs->debugfs); } EXPORT_SYMBOL_GPL(pcie_ptm_destroy_debugfs); +#endif diff --git a/drivers/pinctrl/nuvoton/pinctrl-ma35.c b/drivers/pinctrl/nuvoton/pinctrl-ma35.c index 06ae1fe8b8c5..b51704bafd81 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-ma35.c +++ b/drivers/pinctrl/nuvoton/pinctrl-ma35.c @@ -1074,7 +1074,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct u32 idx = 0; int ret; - for_each_gpiochip_node(dev, child) { + device_for_each_child_node(dev, child) { + if (fwnode_property_present(child, "gpio-controller")) + continue; + npctl->nfunctions++; npctl->ngroups += of_get_child_count(to_of_node(child)); } @@ -1092,7 +1095,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct if (!npctl->groups) return -ENOMEM; - for_each_gpiochip_node(dev, child) { + device_for_each_child_node(dev, child) { + if (fwnode_property_present(child, "gpio-controller")) + continue; + ret = ma35_pinctrl_parse_functions(child, npctl, idx++); if (ret) { fwnode_handle_put(child); diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 5cf3db6d78b7..b3f0d02aeeb3 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -979,6 +979,17 @@ static int amd_gpio_suspend_hibernate_common(struct device *dev, bool is_suspend pin, is_suspend ? "suspend" : "hibernate"); } + /* + * debounce enabled over suspend has shown issues with a GPIO + * being unable to wake the system, as we're only interested in + * the actual wakeup event, clear it. + */ + if (gpio_dev->saved_regs[i] & (DB_CNTRl_MASK << DB_CNTRL_OFF)) { + amd_gpio_set_debounce(gpio_dev, pin, 0); + pm_pr_dbg("Clearing debounce for GPIO #%d during %s.\n", + pin, is_suspend ? "suspend" : "hibernate"); + } + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); } diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c index 9bf53de20be8..04afb344e9e5 100644 --- a/drivers/pinctrl/pinctrl-aw9523.c +++ b/drivers/pinctrl/pinctrl-aw9523.c @@ -784,7 +784,7 @@ static int aw9523_init_gpiochip(struct aw9523 *awi, unsigned int npins) gc->set_config = gpiochip_generic_config; gc->parent = dev; gc->owner = THIS_MODULE; - gc->can_sleep = false; + gc->can_sleep = true; return 0; } diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 5c4687de1464..f713c80d7f3e 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -1038,6 +1038,25 @@ static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d, test_bit(d->hwirq, pctrl->skip_wake_irqs); } +static void msm_gpio_irq_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, + unsigned int ngpios) +{ + struct msm_pinctrl *pctrl = gpiochip_get_data(gc); + const struct msm_pingroup *g; + int i; + + bitmap_fill(valid_mask, ngpios); + + for (i = 0; i < ngpios; i++) { + g = &pctrl->soc->groups[i]; + + if (g->intr_detection_width != 1 && + g->intr_detection_width != 2) + clear_bit(i, valid_mask); + } +} + static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1441,6 +1460,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; girq->parents[0] = pctrl->irq; + girq->init_valid_mask = msm_gpio_irq_init_valid_mask; ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 900069eb186e..a1c529f1ff1a 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -715,7 +715,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_llt_events[] = { {101, "GDC_BANK0_HIT_DCL_PARTIAL"}, {102, "GDC_BANK0_EVICT_DCL"}, {103, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA0"}, - {103, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA1"}, + {104, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA1"}, {105, "GDC_BANK0_ARB_STRB"}, {106, "GDC_BANK0_ARB_WAIT"}, {107, "GDC_BANK0_GGA_STRB"}, diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index aae99adb29eb..14aa87b39be5 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -281,7 +281,8 @@ static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo, vring->align = SMP_CACHE_BYTES; vring->index = i; vring->vdev_id = tm_vdev->vdev.id.device; - vring->drop_desc.len = VRING_DROP_DESC_MAX_LEN; + vring->drop_desc.len = cpu_to_virtio32(&tm_vdev->vdev, + VRING_DROP_DESC_MAX_LEN); dev = &tm_vdev->vdev.dev; size = vring_size(vring->num, vring->align); @@ -1287,7 +1288,7 @@ static void mlxbf_tmfifo_get_cfg_mac(u8 *mac) ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac); } -/* Set TmFifo thresolds which is used to trigger interrupts. */ +/* Set TmFifo thresholds which is used to trigger interrupts. */ static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo) { u64 ctl; diff --git a/drivers/platform/mellanox/mlxreg-dpu.c b/drivers/platform/mellanox/mlxreg-dpu.c index 52260106a9f1..39f89c47144a 100644 --- a/drivers/platform/mellanox/mlxreg-dpu.c +++ b/drivers/platform/mellanox/mlxreg-dpu.c @@ -483,7 +483,7 @@ static int mlxreg_dpu_config_init(struct mlxreg_dpu *mlxreg_dpu, void *regmap, mlxreg_dpu->io_data, sizeof(*mlxreg_dpu->io_data)); if (IS_ERR(mlxreg_dpu->io_regs)) { - dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n", + dev_err(dev, "Failed to create region for client %s at bus %d at addr 0x%02x\n", data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); return PTR_ERR(mlxreg_dpu->io_regs); diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c index aee395bb48ae..d1518598dfed 100644 --- a/drivers/platform/mellanox/mlxreg-lc.c +++ b/drivers/platform/mellanox/mlxreg-lc.c @@ -57,9 +57,9 @@ enum mlxreg_lc_state { * @dev: platform device; * @lock: line card lock; * @par_regmap: parent device regmap handle; - * @data: pltaform core data; + * @data: platform core data; * @io_data: register access platform data; - * @led_data: LED platform data ; + * @led_data: LED platform data; * @mux_data: MUX platform data; * @led: LED device; * @io_regs: register access device; @@ -171,7 +171,7 @@ static int mlxreg_lc_chan[] = { 0x4e, 0x4f }; -/* Defaul mux configuration. */ +/* Default mux configuration. */ static struct mlxcpld_mux_plat_data mlxreg_lc_mux_data[] = { { .chan_ids = mlxreg_lc_chan, @@ -181,7 +181,7 @@ static struct mlxcpld_mux_plat_data mlxreg_lc_mux_data[] = { }, }; -/* Defaul mux board info. */ +/* Default mux board info. */ static struct i2c_board_info mlxreg_lc_mux_brdinfo = { I2C_BOARD_INFO("i2c-mux-mlxcpld", 0x32), }; @@ -688,7 +688,7 @@ static int mlxreg_lc_completion_notify(void *handle, struct i2c_adapter *parent, if (regval & mlxreg_lc->data->mask) { mlxreg_lc->state |= MLXREG_LC_SYNCED; mlxreg_lc_state_update_locked(mlxreg_lc, MLXREG_LC_SYNCED, 1); - if (mlxreg_lc->state & ~MLXREG_LC_POWERED) { + if (!(mlxreg_lc->state & MLXREG_LC_POWERED)) { err = mlxreg_lc_power_on_off(mlxreg_lc, 1); if (err) goto mlxreg_lc_regmap_power_on_off_fail; @@ -758,7 +758,7 @@ mlxreg_lc_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap, platform_device_register_resndata(dev, "mlxreg-io", data->hpdev.nr, NULL, 0, mlxreg_lc->io_data, sizeof(*mlxreg_lc->io_data)); if (IS_ERR(mlxreg_lc->io_regs)) { - dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n", + dev_err(dev, "Failed to create region for client %s at bus %d at addr 0x%02x\n", data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); err = PTR_ERR(mlxreg_lc->io_regs); diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c index db31c8bf2255..51504113c17e 100644 --- a/drivers/platform/mellanox/nvsw-sn2201.c +++ b/drivers/platform/mellanox/nvsw-sn2201.c @@ -1181,7 +1181,7 @@ static int nvsw_sn2201_i2c_completion_notify(void *handle, int id) if (!nvsw_sn2201->main_mux_devs->adapter) { err = -ENODEV; dev_err(nvsw_sn2201->dev, "Failed to get adapter for bus %d\n", - nvsw_sn2201->cpld_devs->nr); + nvsw_sn2201->main_mux_devs->nr); goto i2c_get_adapter_main_fail; } diff --git a/drivers/platform/x86/amd/amd_isp4.c b/drivers/platform/x86/amd/amd_isp4.c index 0cc01441bcbb..0d494899502c 100644 --- a/drivers/platform/x86/amd/amd_isp4.c +++ b/drivers/platform/x86/amd/amd_isp4.c @@ -11,6 +11,7 @@ #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/property.h> +#include <linux/soc/amd/isp4_misc.h> #include <linux/string.h> #include <linux/types.h> #include <linux/units.h> @@ -20,6 +21,9 @@ #define AMDISP_OV05C10_REMOTE_EP_NAME "ov05c10_isp_4_1_1" #define AMD_ISP_PLAT_DRV_NAME "amd-isp4" +static const struct software_node isp4_mipi1_endpoint_node; +static const struct software_node ov05c10_endpoint_node; + /* * AMD ISP platform info definition to initialize sensor * specific platform configuration to prepare the amdisp @@ -42,55 +46,116 @@ struct amdisp_platform { struct mutex lock; /* protects i2c client creation */ }; -/* Top-level OV05C10 camera node property table */ +/* Root AMD CAMERA SWNODE */ + +/* Root amd camera node definition */ +static const struct software_node amd_camera_node = { + .name = "amd_camera", +}; + +/* ISP4 SWNODE */ + +/* ISP4 OV05C10 camera node definition */ +static const struct software_node isp4_node = { + .name = "isp4", + .parent = &amd_camera_node, +}; + +/* + * ISP4 Ports node definition. No properties defined for + * ports node. + */ +static const struct software_node isp4_ports = { + .name = "ports", + .parent = &isp4_node, +}; + +/* + * ISP4 Port node definition. No properties defined for + * port node. + */ +static const struct software_node isp4_port_node = { + .name = "port@0", + .parent = &isp4_ports, +}; + +/* + * ISP4 MIPI1 remote endpoint points to OV05C10 endpoint + * node. + */ +static const struct software_node_ref_args isp4_refs[] = { + SOFTWARE_NODE_REFERENCE(&ov05c10_endpoint_node), +}; + +/* ISP4 MIPI1 endpoint node properties table */ +static const struct property_entry isp4_mipi1_endpoint_props[] = { + PROPERTY_ENTRY_REF_ARRAY("remote-endpoint", isp4_refs), + { } +}; + +/* ISP4 MIPI1 endpoint node definition */ +static const struct software_node isp4_mipi1_endpoint_node = { + .name = "endpoint", + .parent = &isp4_port_node, + .properties = isp4_mipi1_endpoint_props, +}; + +/* I2C1 SWNODE */ + +/* I2C1 camera node property table */ +static const struct property_entry i2c1_camera_props[] = { + PROPERTY_ENTRY_U32("clock-frequency", 1 * HZ_PER_MHZ), + { } +}; + +/* I2C1 camera node definition */ +static const struct software_node i2c1_node = { + .name = "i2c1", + .parent = &amd_camera_node, + .properties = i2c1_camera_props, +}; + +/* I2C1 camera node property table */ static const struct property_entry ov05c10_camera_props[] = { PROPERTY_ENTRY_U32("clock-frequency", 24 * HZ_PER_MHZ), { } }; -/* Root AMD ISP OV05C10 camera node definition */ -static const struct software_node camera_node = { +/* OV05C10 camera node definition */ +static const struct software_node ov05c10_camera_node = { .name = AMDISP_OV05C10_HID, + .parent = &i2c1_node, .properties = ov05c10_camera_props, }; /* - * AMD ISP OV05C10 Ports node definition. No properties defined for + * OV05C10 Ports node definition. No properties defined for * ports node for OV05C10. */ -static const struct software_node ports = { +static const struct software_node ov05c10_ports = { .name = "ports", - .parent = &camera_node, -}; - -/* - * AMD ISP OV05C10 Port node definition. No properties defined for - * port node for OV05C10. - */ -static const struct software_node port_node = { - .name = "port@", - .parent = &ports, + .parent = &ov05c10_camera_node, }; /* - * Remote endpoint AMD ISP node definition. No properties defined for - * remote endpoint node for OV05C10. + * OV05C10 Port node definition. */ -static const struct software_node remote_ep_isp_node = { - .name = AMDISP_OV05C10_REMOTE_EP_NAME, +static const struct software_node ov05c10_port_node = { + .name = "port@0", + .parent = &ov05c10_ports, }; /* - * Remote endpoint reference for isp node included in the - * OV05C10 endpoint. + * OV05C10 remote endpoint points to ISP4 MIPI1 endpoint + * node. */ static const struct software_node_ref_args ov05c10_refs[] = { - SOFTWARE_NODE_REFERENCE(&remote_ep_isp_node), + SOFTWARE_NODE_REFERENCE(&isp4_mipi1_endpoint_node), }; /* OV05C10 supports one single link frequency */ static const u64 ov05c10_link_freqs[] = { - 925 * HZ_PER_MHZ, + 900 * HZ_PER_MHZ, }; /* OV05C10 supports only 2-lane configuration */ @@ -110,27 +175,64 @@ static const struct property_entry ov05c10_endpoint_props[] = { { } }; -/* AMD ISP endpoint node definition */ -static const struct software_node endpoint_node = { +/* OV05C10 endpoint node definition */ +static const struct software_node ov05c10_endpoint_node = { .name = "endpoint", - .parent = &port_node, + .parent = &ov05c10_port_node, .properties = ov05c10_endpoint_props, }; /* - * AMD ISP swnode graph uses 5 nodes and also its relationship is - * fixed to align with the structure that v4l2 expects for successful - * endpoint fwnode parsing. + * AMD Camera swnode graph uses 10 nodes and also its relationship is + * fixed to align with the structure that v4l2 and i2c frameworks expects + * for successful parsing of fwnodes and its properties with standard names. * * It is only the node property_entries that will vary for each platform * supporting different sensor modules. + * + * AMD ISP4 SWNODE GRAPH Structure + * + * amd_camera { + * isp4 { + * ports { + * port@0 { + * isp4_mipi1_ep: endpoint { + * remote-endpoint = &OMNI5C10_ep; + * }; + * }; + * }; + * }; + * + * i2c1 { + * clock-frequency = 1 MHz; + * OMNI5C10 { + * clock-frequency = 24MHz; + * ports { + * port@0 { + * OMNI5C10_ep: endpoint { + * bus-type = 4; + * data-lanes = <1 2>; + * link-frequencies = 900MHz; + * remote-endpoint = &isp4_mipi1; + * }; + * }; + * }; + * }; + * }; + * }; + * */ -static const struct software_node *ov05c10_nodes[] = { - &camera_node, - &ports, - &port_node, - &endpoint_node, - &remote_ep_isp_node, +static const struct software_node *amd_isp4_nodes[] = { + &amd_camera_node, + &isp4_node, + &isp4_ports, + &isp4_port_node, + &isp4_mipi1_endpoint_node, + &i2c1_node, + &ov05c10_camera_node, + &ov05c10_ports, + &ov05c10_port_node, + &ov05c10_endpoint_node, NULL }; @@ -140,7 +242,7 @@ static const struct amdisp_platform_info ov05c10_platform_config = { .dev_name = "ov05c10", I2C_BOARD_INFO("ov05c10", AMDISP_OV05C10_I2C_ADDR), }, - .swnodes = ov05c10_nodes, + .swnodes = amd_isp4_nodes, }; static const struct acpi_device_id amdisp_sensor_ids[] = { @@ -151,7 +253,7 @@ MODULE_DEVICE_TABLE(acpi, amdisp_sensor_ids); static inline bool is_isp_i2c_adapter(struct i2c_adapter *adap) { - return !strcmp(adap->owner->name, "i2c_designware_amdisp"); + return !strcmp(adap->name, AMDISP_I2C_ADAP_NAME); } static void instantiate_isp_i2c_client(struct amdisp_platform *isp4_platform, @@ -232,7 +334,8 @@ static struct amdisp_platform *prepare_amdisp_platform(struct device *dev, if (ret) return ERR_PTR(ret); - isp4_platform->board_info.swnode = src->swnodes[0]; + /* initialize ov05c10_camera_node */ + isp4_platform->board_info.swnode = src->swnodes[6]; return isp4_platform; } @@ -257,6 +360,7 @@ static int amd_isp_probe(struct platform_device *pdev) { const struct amdisp_platform_info *pinfo; struct amdisp_platform *isp4_platform; + struct acpi_device *adev; int ret; pinfo = device_get_match_data(&pdev->dev); @@ -274,6 +378,10 @@ static int amd_isp_probe(struct platform_device *pdev) if (ret) goto error_unregister_sw_node; + adev = ACPI_COMPANION(&pdev->dev); + /* initialize root amd_camera_node */ + adev->driver_data = (void *)pinfo->swnodes[0]; + /* check if adapter is already registered and create i2c client instance */ i2c_for_each_dev(isp4_platform, try_to_instantiate_i2c_client); diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index f292111bd065..131f10b68308 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -11,7 +11,7 @@ #include <linux/dmi.h> #include <linux/io.h> #include <linux/ioport.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include "pmc.h" diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 3f8b2a324efd..f84c3d03c1de 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -530,6 +530,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_zenbook_duo_kbd, }, + { + .callback = dmi_matched, + .ident = "ASUS Zenbook Duo UX8406CA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UX8406CA"), + }, + .driver_data = &quirk_asus_zenbook_duo_kbd, + }, {}, }; diff --git a/drivers/platform/x86/dell/dell-lis3lv02d.c b/drivers/platform/x86/dell/dell-lis3lv02d.c index efe26d667973..0791118dd6b7 100644 --- a/drivers/platform/x86/dell/dell-lis3lv02d.c +++ b/drivers/platform/x86/dell/dell-lis3lv02d.c @@ -45,6 +45,7 @@ static const struct dmi_system_id lis3lv02d_devices[] __initconst = { * Additional individual entries were added after verification. */ DELL_LIS3LV02D_DMI_ENTRY("Latitude 5480", 0x29), + DELL_LIS3LV02D_DMI_ENTRY("Latitude 5500", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Latitude E6330", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Latitude E6430", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Precision 3540", 0x29), diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h index 3ad33a094588..817ee7ba07ca 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h +++ b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h @@ -89,6 +89,11 @@ extern struct wmi_sysman_priv wmi_priv; enum { ENUM, INT, STR, PO }; +#define ENUM_MIN_ELEMENTS 8 +#define INT_MIN_ELEMENTS 9 +#define STR_MIN_ELEMENTS 8 +#define PO_MIN_ELEMENTS 4 + enum { ATTR_NAME, DISPL_NAME_LANG_CODE, diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c index 8cc212c85266..fc2f58b4cbc6 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c @@ -23,9 +23,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_ENUMERATION_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < ENUM_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%s\n", obj->package.elements[CURRENT_VAL].string.pointer); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c index 951e75b538fa..735248064239 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c @@ -25,9 +25,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_INTEGER_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_INTEGER) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < INT_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_INTEGER) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%lld\n", obj->package.elements[CURRENT_VAL].integer.value); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c index d8f1bf5e58a0..3167e06d416e 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c @@ -26,9 +26,10 @@ static ssize_t is_enabled_show(struct kobject *kobj, struct kobj_attribute *attr obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_PASSOBJ_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[IS_PASS_SET].type != ACPI_TYPE_INTEGER) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < PO_MIN_ELEMENTS || + obj->package.elements[IS_PASS_SET].type != ACPI_TYPE_INTEGER) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%lld\n", obj->package.elements[IS_PASS_SET].integer.value); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c index c392f0ecf8b5..0d2c74f8d1aa 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c @@ -25,9 +25,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_STRING_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < STR_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%s\n", obj->package.elements[CURRENT_VAL].string.pointer); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c index d00389b860e4..f5402b714657 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c @@ -407,10 +407,10 @@ static int init_bios_attributes(int attr_type, const char *guid) return retval; switch (attr_type) { - case ENUM: min_elements = 8; break; - case INT: min_elements = 9; break; - case STR: min_elements = 8; break; - case PO: min_elements = 4; break; + case ENUM: min_elements = ENUM_MIN_ELEMENTS; break; + case INT: min_elements = INT_MIN_ELEMENTS; break; + case STR: min_elements = STR_MIN_ELEMENTS; break; + case PO: min_elements = PO_MIN_ELEMENTS; break; default: pr_err("Error: Unknown attr_type: %d\n", attr_type); return -EINVAL; @@ -597,7 +597,7 @@ err_release_attributes_data: release_attributes_data(); err_destroy_classdev: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(wmi_priv.class_dev); err_exit_bios_attr_pass_interface: exit_bios_attr_pass_interface(); @@ -611,7 +611,7 @@ err_exit_bios_attr_set_interface: static void __exit sysman_exit(void) { release_attributes_data(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(wmi_priv.class_dev); exit_bios_attr_set_interface(); exit_bios_attr_pass_interface(); } diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 13237890fc92..5bfa7159f5bc 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -1034,7 +1034,7 @@ err_release_attributes_data: release_attributes_data(); err_destroy_classdev: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(bioscfg_drv.class_dev); err_unregister_class: hp_exit_attr_set_interface(); @@ -1045,7 +1045,7 @@ err_unregister_class: static void __exit hp_exit(void) { release_attributes_data(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(bioscfg_drv.class_dev); hp_exit_attr_set_interface(); } diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 0b5e43444ed6..f25a427cccda 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -54,6 +54,7 @@ static const struct acpi_device_id intel_hid_ids[] = { { "INTC107B" }, { "INTC10CB" }, { "INTC10CC" }, + { "INTC10F1" }, { } }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); diff --git a/drivers/platform/x86/portwell-ec.c b/drivers/platform/x86/portwell-ec.c index 8b788822237b..3e019c51913e 100644 --- a/drivers/platform/x86/portwell-ec.c +++ b/drivers/platform/x86/portwell-ec.c @@ -236,6 +236,7 @@ static int pwec_probe(struct platform_device *pdev) return ret; } + ec_wdt_dev.parent = &pdev->dev; ret = devm_watchdog_register_device(&pdev->dev, &ec_wdt_dev); if (ret < 0) { dev_err(&pdev->dev, "failed to register Portwell EC Watchdog\n"); diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 00b1e7c79a3d..b73b84fdb15e 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -973,6 +973,7 @@ static const struct attribute_group auth_attr_group = { .is_visible = auth_attr_is_visible, .attrs = auth_attrs, }; +__ATTRIBUTE_GROUPS(auth_attr); /* ---- Attributes sysfs --------------------------------------------------------- */ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -1188,6 +1189,7 @@ static const struct attribute_group tlmi_attr_group = { .is_visible = attr_is_visible, .attrs = tlmi_attrs, }; +__ATTRIBUTE_GROUPS(tlmi_attr); static void tlmi_attr_setting_release(struct kobject *kobj) { @@ -1207,11 +1209,13 @@ static void tlmi_pwd_setting_release(struct kobject *kobj) static const struct kobj_type tlmi_attr_setting_ktype = { .release = &tlmi_attr_setting_release, .sysfs_ops = &kobj_sysfs_ops, + .default_groups = tlmi_attr_groups, }; static const struct kobj_type tlmi_pwd_setting_ktype = { .release = &tlmi_pwd_setting_release, .sysfs_ops = &kobj_sysfs_ops, + .default_groups = auth_attr_groups, }; static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -1380,21 +1384,18 @@ static struct kobj_attribute debug_cmd = __ATTR_WO(debug_cmd); /* ---- Initialisation --------------------------------------------------------- */ static void tlmi_release_attr(void) { - int i; + struct kobject *pos, *n; /* Attribute structures */ - for (i = 0; i < TLMI_SETTINGS_COUNT; i++) { - if (tlmi_priv.setting[i]) { - sysfs_remove_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group); - kobject_put(&tlmi_priv.setting[i]->kobj); - } - } sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &save_settings.attr); if (tlmi_priv.can_debug_cmd && debug_support) sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &debug_cmd.attr); + list_for_each_entry_safe(pos, n, &tlmi_priv.attribute_kset->list, entry) + kobject_put(pos); + kset_unregister(tlmi_priv.attribute_kset); /* Free up any saved signatures */ @@ -1402,19 +1403,8 @@ static void tlmi_release_attr(void) kfree(tlmi_priv.pwd_admin->save_signature); /* Authentication structures */ - sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_admin->kobj); - sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_power->kobj); - - if (tlmi_priv.opcode_support) { - sysfs_remove_group(&tlmi_priv.pwd_system->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_system->kobj); - sysfs_remove_group(&tlmi_priv.pwd_hdd->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_hdd->kobj); - sysfs_remove_group(&tlmi_priv.pwd_nvme->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_nvme->kobj); - } + list_for_each_entry_safe(pos, n, &tlmi_priv.authentication_kset->list, entry) + kobject_put(pos); kset_unregister(tlmi_priv.authentication_kset); } @@ -1455,6 +1445,14 @@ static int tlmi_sysfs_init(void) goto fail_device_created; } + tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, + &tlmi_priv.class_dev->kobj); + if (!tlmi_priv.authentication_kset) { + kset_unregister(tlmi_priv.attribute_kset); + ret = -ENOMEM; + goto fail_device_created; + } + for (i = 0; i < TLMI_SETTINGS_COUNT; i++) { /* Check if index is a valid setting - skip if it isn't */ if (!tlmi_priv.setting[i]) @@ -1471,12 +1469,8 @@ static int tlmi_sysfs_init(void) /* Build attribute */ tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset; - ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL, - "%s", tlmi_priv.setting[i]->display_name); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group); + ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype, + NULL, "%s", tlmi_priv.setting[i]->display_name); if (ret) goto fail_create_attr; } @@ -1496,55 +1490,34 @@ static int tlmi_sysfs_init(void) } /* Create authentication entries */ - tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, - &tlmi_priv.class_dev->kobj); - if (!tlmi_priv.authentication_kset) { - ret = -ENOMEM; - goto fail_create_attr; - } tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "Admin"); if (ret) goto fail_create_attr; tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "Power-on"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "Power-on"); if (ret) goto fail_create_attr; if (tlmi_priv.opcode_support) { tlmi_priv.pwd_system->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_system->kobj, NULL, "%s", "System"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_system->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_system->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "System"); if (ret) goto fail_create_attr; tlmi_priv.pwd_hdd->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_hdd->kobj, NULL, "%s", "HDD"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_hdd->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_hdd->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "HDD"); if (ret) goto fail_create_attr; tlmi_priv.pwd_nvme->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_nvme->kobj, NULL, "%s", "NVMe"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_nvme->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_nvme->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "NVMe"); if (ret) goto fail_create_attr; } @@ -1554,7 +1527,7 @@ static int tlmi_sysfs_init(void) fail_create_attr: tlmi_release_attr(); fail_device_created: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(tlmi_priv.class_dev); fail_class_created: return ret; } @@ -1577,8 +1550,6 @@ static struct tlmi_pwd_setting *tlmi_create_auth(const char *pwd_type, new_pwd->maxlen = tlmi_priv.pwdcfg.core.max_length; new_pwd->index = 0; - kobject_init(&new_pwd->kobj, &tlmi_pwd_setting_ktype); - return new_pwd; } @@ -1683,7 +1654,6 @@ static int tlmi_analyze(struct wmi_device *wdev) if (setting->possible_values) strreplace(setting->possible_values, ',', ';'); - kobject_init(&setting->kobj, &tlmi_attr_setting_ktype); tlmi_priv.setting[i] = setting; kfree(item); } @@ -1781,7 +1751,7 @@ fail_clear_attr: static void tlmi_remove(struct wmi_device *wdev) { tlmi_release_attr(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(tlmi_priv.class_dev); } static int tlmi_probe(struct wmi_device *wdev, const void *context) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e7350c9fa3aa..b59b4d90b0c7 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3295,6 +3295,7 @@ static const struct key_entry keymap_lenovo[] __initconst = { */ { KE_KEY, 0x131d, { KEY_VENDOR } }, /* System debug info, similar to old ThinkPad key */ { KE_KEY, 0x1320, { KEY_LINK_PHONE } }, + { KE_KEY, 0x1402, { KEY_LINK_PHONE } }, { KE_KEY, TP_HKEY_EV_TRACK_DOUBLETAP /* 0x8036 */, { KEY_PROG4 } }, { KE_END } }; diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index e46453750d5f..03aecf8bb7f8 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -177,16 +177,22 @@ static int wmi_device_enable(struct wmi_device *wdev, bool enable) acpi_handle handle; acpi_status status; - if (!(wblock->gblock.flags & ACPI_WMI_EXPENSIVE)) - return 0; - if (wblock->dev.dev.type == &wmi_type_method) return 0; - if (wblock->dev.dev.type == &wmi_type_event) + if (wblock->dev.dev.type == &wmi_type_event) { + /* + * Windows always enables/disables WMI events, even when they are + * not marked as being expensive. We follow this behavior for + * compatibility reasons. + */ snprintf(method, sizeof(method), "WE%02X", wblock->gblock.notify_id); - else + } else { + if (!(wblock->gblock.flags & ACPI_WMI_EXPENSIVE)) + return 0; + get_acpi_method_name(wblock, 'C', method); + } /* * Not all WMI devices marked as expensive actually implement the diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index e3be40adc0d7..faa0b6bc5b53 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -341,12 +341,28 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode) { struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); struct rapl_defaults *defaults = get_defaults(rd->rp); + u64 val; int ret; cpus_read_lock(); ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode); - if (!ret && defaults->set_floor_freq) + if (ret) + goto end; + + ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, false, &val); + if (ret) + goto end; + + if (mode != val) { + pr_debug("%s cannot be %s\n", power_zone->name, + str_enabled_disabled(mode)); + goto end; + } + + if (defaults->set_floor_freq) defaults->set_floor_freq(rd, mode); + +end: cpus_read_unlock(); return ret; diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 6b611fe5a95e..4ca5a464a46a 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -325,13 +325,22 @@ static long ptp_sys_offset_extended(struct ptp_clock *ptp, void __user *arg) if (IS_ERR(extoff)) return PTR_ERR(extoff); - if (extoff->n_samples > PTP_MAX_SAMPLES || - extoff->rsv[0] || extoff->rsv[1] || - (extoff->clockid != CLOCK_REALTIME && - extoff->clockid != CLOCK_MONOTONIC && - extoff->clockid != CLOCK_MONOTONIC_RAW)) + if (extoff->n_samples > PTP_MAX_SAMPLES || extoff->rsv[0] || extoff->rsv[1]) return -EINVAL; + switch (extoff->clockid) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + case CLOCK_MONOTONIC_RAW: + break; + case CLOCK_AUX ... CLOCK_AUX_LAST: + if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) + break; + fallthrough; + default: + return -EINVAL; + } + sts.clockid = extoff->clockid; for (unsigned int i = 0; i < extoff->n_samples; i++) { struct timespec64 ts; @@ -340,6 +349,11 @@ static long ptp_sys_offset_extended(struct ptp_clock *ptp, void __user *arg) err = ptp->info->gettimex64(ptp->info, &ts, &sts); if (err) return err; + + /* Filter out disabled or unavailable clocks */ + if (sts.pre_ts.tv_sec < 0 || sts.post_ts.tv_sec < 0) + return -EINVAL; + extoff->ts[i][0].sec = sts.pre_ts.tv_sec; extoff->ts[i][0].nsec = sts.pre_ts.tv_nsec; extoff->ts[i][1].sec = ts.tv_sec; diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 4d842c692194..edf776b8ad53 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -596,7 +596,7 @@ static bool pwm_state_valid(const struct pwm_state *state) * and supposed to be ignored. So also ignore any strange values and * consider the state ok. */ - if (state->enabled) + if (!state->enabled) return true; if (!state->period) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 7eaab5831499..33d3554b9197 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -130,8 +130,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, return ret; clk_rate = clk_get_rate(pc->clk_pwms[pwm->hwpwm]); - if (!clk_rate) - return -EINVAL; + if (!clk_rate) { + ret = -EINVAL; + goto out; + } /* Make sure we use the bus clock and not the 26MHz clock */ if (pc->soc->has_ck_26m_sel) @@ -150,9 +152,9 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, } if (clkdiv > PWM_CLK_DIV_MAX) { - pwm_mediatek_clk_disable(chip, pwm); dev_err(pwmchip_parent(chip), "period of %d ns not supported\n", period_ns); - return -EINVAL; + ret = -EINVAL; + goto out; } if (pc->soc->pwm45_fixup && pwm->hwpwm > 2) { @@ -169,9 +171,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period); pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty); +out: pwm_mediatek_clk_disable(chip, pwm); - return 0; + return ret; } static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7a248dc8d2e2..cbd6d53ebfb5 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5639,6 +5639,7 @@ static void regulator_remove_coupling(struct regulator_dev *rdev) ERR_PTR(err)); } + rdev->coupling_desc.n_coupled = 0; kfree(rdev->coupling_desc.coupled_rdevs); rdev->coupling_desc.coupled_rdevs = NULL; } diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index 75bd53445ba7..6351ceefdb3e 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -260,8 +260,10 @@ static int gpio_regulator_probe(struct platform_device *pdev) return -ENOMEM; } - drvdata->gpiods = devm_kzalloc(dev, sizeof(struct gpio_desc *), - GFP_KERNEL); + drvdata->gpiods = devm_kcalloc(dev, config->ngpios, + sizeof(struct gpio_desc *), GFP_KERNEL); + if (!drvdata->gpiods) + return -ENOMEM; if (config->input_supply) { drvdata->desc.supply_name = devm_kstrdup(&pdev->dev, @@ -274,8 +276,6 @@ static int gpio_regulator_probe(struct platform_device *pdev) } } - if (!drvdata->gpiods) - return -ENOMEM; for (i = 0; i < config->ngpios; i++) { drvdata->gpiods[i] = devm_gpiod_get_index(dev, NULL, diff --git a/drivers/regulator/mp886x.c b/drivers/regulator/mp886x.c index 48dcee5287f3..9ad16b04c913 100644 --- a/drivers/regulator/mp886x.c +++ b/drivers/regulator/mp886x.c @@ -348,7 +348,8 @@ static const struct of_device_id mp886x_dt_ids[] = { MODULE_DEVICE_TABLE(of, mp886x_dt_ids); static const struct i2c_device_id mp886x_id[] = { - { "mp886x", (kernel_ulong_t)&mp8869_ci }, + { "mp8867", (kernel_ulong_t)&mp8867_ci }, + { "mp8869", (kernel_ulong_t)&mp8869_ci }, { }, }; MODULE_DEVICE_TABLE(i2c, mp886x_id); diff --git a/drivers/regulator/sy8824x.c b/drivers/regulator/sy8824x.c index c05b67e26ac8..5bec84db25f1 100644 --- a/drivers/regulator/sy8824x.c +++ b/drivers/regulator/sy8824x.c @@ -213,7 +213,10 @@ static const struct of_device_id sy8824_dt_ids[] = { MODULE_DEVICE_TABLE(of, sy8824_dt_ids); static const struct i2c_device_id sy8824_id[] = { - { "sy8824", (kernel_ulong_t)&sy8824c_cfg }, + { "sy8824c", (kernel_ulong_t)&sy8824c_cfg }, + { "sy8824e", (kernel_ulong_t)&sy8824e_cfg }, + { "sy20276", (kernel_ulong_t)&sy20276_cfg }, + { "sy20278", (kernel_ulong_t)&sy20278_cfg }, { } }; MODULE_DEVICE_TABLE(i2c, sy8824_id); diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c index b16b300d7f45..5e67fdc88f49 100644 --- a/drivers/regulator/tps65219-regulator.c +++ b/drivers/regulator/tps65219-regulator.c @@ -436,46 +436,46 @@ static int tps65219_regulator_probe(struct platform_device *pdev) pmic->rdesc[i].name); } - irq_data = devm_kmalloc(tps->dev, pmic->common_irq_size, GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - for (i = 0; i < pmic->common_irq_size; ++i) { irq_type = &pmic->common_irq_types[i]; irq = platform_get_irq_byname(pdev, irq_type->irq_name); if (irq < 0) return -EINVAL; - irq_data[i].dev = tps->dev; - irq_data[i].type = irq_type; + irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + irq_data->dev = tps->dev; + irq_data->type = irq_type; error = devm_request_threaded_irq(tps->dev, irq, NULL, tps65219_regulator_irq_handler, IRQF_ONESHOT, irq_type->irq_name, - &irq_data[i]); + irq_data); if (error) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to request %s IRQ %d: %d\n", irq_type->irq_name, irq, error); } - irq_data = devm_kmalloc(tps->dev, pmic->dev_irq_size, GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - for (i = 0; i < pmic->dev_irq_size; ++i) { irq_type = &pmic->irq_types[i]; irq = platform_get_irq_byname(pdev, irq_type->irq_name); if (irq < 0) return -EINVAL; - irq_data[i].dev = tps->dev; - irq_data[i].type = irq_type; + irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + irq_data->dev = tps->dev; + irq_data->type = irq_type; error = devm_request_threaded_irq(tps->dev, irq, NULL, tps65219_regulator_irq_handler, IRQF_ONESHOT, irq_type->irq_name, - &irq_data[i]); + irq_data); if (error) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to request %s IRQ %d: %d\n", diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 8172869bd3d7..0743c6acd6e2 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -692,8 +692,12 @@ static irqreturn_t cmos_interrupt(int irq, void *p) { u8 irqstat; u8 rtc_control; + unsigned long flags; - spin_lock(&rtc_lock); + /* We cannot use spin_lock() here, as cmos_interrupt() is also called + * in a non-irq context. + */ + spin_lock_irqsave(&rtc_lock, flags); /* When the HPET interrupt handler calls us, the interrupt * status is passed as arg1 instead of the irq number. But @@ -727,7 +731,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p) hpet_mask_rtc_irq_bit(RTC_AIE); CMOS_READ(RTC_INTR_FLAGS); } - spin_unlock(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); if (is_intr(irqstat)) { rtc_update_irq(p, 1, irqstat); @@ -1295,9 +1299,7 @@ static void cmos_check_wkalrm(struct device *dev) * ACK the rtc irq here */ if (t_now >= cmos->alarm_expires && cmos_use_acpi_alarm()) { - local_irq_disable(); cmos_interrupt(0, (void *)cmos->rtc); - local_irq_enable(); return; } diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 31c7dca8f469..2e1ac0c42e93 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -1538,7 +1538,12 @@ static int pcf2127_spi_probe(struct spi_device *spi) variant = &pcf21xx_cfg[type]; } - config.max_register = variant->max_register, + if (variant->type == PCF2131) { + config.read_flag_mask = 0x0; + config.write_flag_mask = 0x0; + } + + config.max_register = variant->max_register; regmap = devm_regmap_init_spi(spi, &config); if (IS_ERR(regmap)) { diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index db5c9b641277..a7220b4d0e8d 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/i2c.h> #include <linux/bcd.h> +#include <linux/reboot.h> #include <linux/regmap.h> #include <linux/rtc.h> #include <linux/platform_device.h> @@ -53,6 +54,7 @@ enum { * Device | Write time | Read time | Write alarm * ================================================= * S5M8767 | UDR + TIME | | UDR + * S2MPG10 | WUDR | RUDR | AUDR * S2MPS11/14 | WUDR | RUDR | WUDR + RUDR * S2MPS13 | WUDR | RUDR | WUDR + AUDR * S2MPS15 | WUDR | RUDR | AUDR @@ -99,6 +101,20 @@ static const struct s5m_rtc_reg_config s5m_rtc_regs = { .write_alarm_udr_mask = S5M_RTC_UDR_MASK, }; +/* Register map for S2MPG10 */ +static const struct s5m_rtc_reg_config s2mpg10_rtc_regs = { + .regs_count = 7, + .time = S2MPG10_RTC_SEC, + .ctrl = S2MPG10_RTC_CTRL, + .alarm0 = S2MPG10_RTC_A0SEC, + .alarm1 = S2MPG10_RTC_A1SEC, + .udr_update = S2MPG10_RTC_UPDATE, + .autoclear_udr_mask = S2MPS15_RTC_WUDR_MASK | S2MPS15_RTC_AUDR_MASK, + .read_time_udr_mask = S2MPS_RTC_RUDR_MASK, + .write_time_udr_mask = S2MPS15_RTC_WUDR_MASK, + .write_alarm_udr_mask = S2MPS15_RTC_AUDR_MASK, +}; + /* Register map for S2MPS13 */ static const struct s5m_rtc_reg_config s2mps13_rtc_regs = { .regs_count = 7, @@ -227,8 +243,8 @@ static int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info) return ret; } -static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, - struct rtc_wkalrm *alarm) +static int s5m_check_pending_alarm_interrupt(struct s5m_rtc_info *info, + struct rtc_wkalrm *alarm) { int ret; unsigned int val; @@ -238,6 +254,7 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val); val &= S5M_ALARM0_STATUS; break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -262,17 +279,9 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info) { int ret; - unsigned int data; - ret = regmap_read(info->regmap, info->regs->udr_update, &data); - if (ret < 0) { - dev_err(info->dev, "failed to read update reg(%d)\n", ret); - return ret; - } - - data |= info->regs->write_time_udr_mask; - - ret = regmap_write(info->regmap, info->regs->udr_update, data); + ret = regmap_set_bits(info->regmap, info->regs->udr_update, + info->regs->write_time_udr_mask); if (ret < 0) { dev_err(info->dev, "failed to write update reg(%d)\n", ret); return ret; @@ -286,20 +295,14 @@ static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info) static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) { int ret; - unsigned int data; + unsigned int udr_mask; - ret = regmap_read(info->regmap, info->regs->udr_update, &data); - if (ret < 0) { - dev_err(info->dev, "%s: fail to read update reg(%d)\n", - __func__, ret); - return ret; - } - - data |= info->regs->write_alarm_udr_mask; + udr_mask = info->regs->write_alarm_udr_mask; switch (info->device_type) { case S5M8767X: - data &= ~S5M_RTC_TIME_EN_MASK; + udr_mask |= S5M_RTC_TIME_EN_MASK; break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -309,7 +312,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) return -EINVAL; } - ret = regmap_write(info->regmap, info->regs->udr_update, data); + ret = regmap_update_bits(info->regmap, info->regs->udr_update, + udr_mask, info->regs->write_alarm_udr_mask); if (ret < 0) { dev_err(info->dev, "%s: fail to write update reg(%d)\n", __func__, ret); @@ -320,8 +324,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) /* On S2MPS13 the AUDR is not auto-cleared */ if (info->device_type == S2MPS13X) - regmap_update_bits(info->regmap, info->regs->udr_update, - S2MPS13_RTC_AUDR_MASK, 0); + regmap_clear_bits(info->regmap, info->regs->udr_update, + S2MPS13_RTC_AUDR_MASK); return ret; } @@ -333,10 +337,8 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm) int ret; if (info->regs->read_time_udr_mask) { - ret = regmap_update_bits(info->regmap, - info->regs->udr_update, - info->regs->read_time_udr_mask, - info->regs->read_time_udr_mask); + ret = regmap_set_bits(info->regmap, info->regs->udr_update, + info->regs->read_time_udr_mask); if (ret) { dev_err(dev, "Failed to prepare registers for time reading: %d\n", @@ -351,6 +353,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -374,6 +377,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -411,6 +415,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -430,7 +435,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) dev_dbg(dev, "%s: %ptR(%d)\n", __func__, &alrm->time, alrm->time.tm_wday); - return s5m_check_peding_alarm_interrupt(info, alrm); + return s5m_check_pending_alarm_interrupt(info, alrm); } static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info) @@ -449,6 +454,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -487,6 +493,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -524,6 +531,7 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -604,6 +612,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2); break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -634,59 +643,92 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) return ret; } +static int s5m_rtc_restart_s2mpg10(struct sys_off_data *data) +{ + struct s5m_rtc_info *info = data->cb_data; + int ret; + + if (data->mode != REBOOT_COLD && data->mode != REBOOT_HARD) + return NOTIFY_DONE; + + /* + * Arm watchdog with maximum timeout (2 seconds), and perform full reset + * on expiry. + */ + ret = regmap_set_bits(info->regmap, S2MPG10_RTC_WTSR, + (S2MPG10_WTSR_COLDTIMER | S2MPG10_WTSR_COLDRST + | S2MPG10_WTSR_WTSRT | S2MPG10_WTSR_WTSR_EN)); + + return ret ? NOTIFY_BAD : NOTIFY_DONE; +} + static int s5m_rtc_probe(struct platform_device *pdev) { struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent); + enum sec_device_type device_type = + platform_get_device_id(pdev)->driver_data; struct s5m_rtc_info *info; - struct i2c_client *i2c; - const struct regmap_config *regmap_cfg; int ret, alarm_irq; info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - switch (platform_get_device_id(pdev)->driver_data) { - case S2MPS15X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps15_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S2MPS14X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps14_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S2MPS13X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps13_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S5M8767X: - regmap_cfg = &s5m_rtc_regmap_config; - info->regs = &s5m_rtc_regs; - alarm_irq = S5M8767_IRQ_RTCA1; - break; - default: - return dev_err_probe(&pdev->dev, -ENODEV, - "Device type %lu is not supported by RTC driver\n", - platform_get_device_id(pdev)->driver_data); - } + info->regmap = dev_get_regmap(pdev->dev.parent, "rtc"); + if (!info->regmap) { + const struct regmap_config *regmap_cfg; + struct i2c_client *i2c; - i2c = devm_i2c_new_dummy_device(&pdev->dev, s5m87xx->i2c->adapter, - RTC_I2C_ADDR); - if (IS_ERR(i2c)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2c), - "Failed to allocate I2C for RTC\n"); + switch (device_type) { + case S2MPS15X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps15_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S2MPS14X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps14_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S2MPS13X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps13_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S5M8767X: + regmap_cfg = &s5m_rtc_regmap_config; + info->regs = &s5m_rtc_regs; + alarm_irq = S5M8767_IRQ_RTCA1; + break; + default: + return dev_err_probe(&pdev->dev, -ENODEV, + "Unsupported device type %d\n", + device_type); + } - info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg); - if (IS_ERR(info->regmap)) - return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap), - "Failed to allocate RTC register map\n"); + i2c = devm_i2c_new_dummy_device(&pdev->dev, + s5m87xx->i2c->adapter, + RTC_I2C_ADDR); + if (IS_ERR(i2c)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c), + "Failed to allocate I2C\n"); + + info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg); + if (IS_ERR(info->regmap)) + return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap), + "Failed to allocate regmap\n"); + } else if (device_type == S2MPG10) { + info->regs = &s2mpg10_rtc_regs; + alarm_irq = S2MPG10_IRQ_RTCA0; + } else { + return dev_err_probe(&pdev->dev, -ENODEV, + "Unsupported device type %d\n", + device_type); + } info->dev = &pdev->dev; info->s5m87xx = s5m87xx; - info->device_type = platform_get_device_id(pdev)->driver_data; + info->device_type = device_type; if (s5m87xx->irq_data) { info->irq = regmap_irq_get_virq(s5m87xx->irq_data, alarm_irq); @@ -721,7 +763,23 @@ static int s5m_rtc_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "Failed to request alarm IRQ %d\n", info->irq); - device_init_wakeup(&pdev->dev, true); + + ret = devm_device_init_wakeup(&pdev->dev); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to init wakeup\n"); + } + + if (of_device_is_system_power_controller(pdev->dev.parent->of_node) && + info->device_type == S2MPG10) { + ret = devm_register_sys_off_handler(&pdev->dev, + SYS_OFF_MODE_RESTART, + SYS_OFF_PRIO_HIGH + 1, + s5m_rtc_restart_s2mpg10, + info); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to register restart handler\n"); } return devm_rtc_register_device(info->rtc_dev); @@ -755,6 +813,7 @@ static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume); static const struct platform_device_id s5m_rtc_id[] = { { "s5m-rtc", S5M8767X }, + { "s2mpg10-rtc", S2MPG10 }, { "s2mps13-rtc", S2MPS13X }, { "s2mps14-rtc", S2MPS14X }, { "s2mps15-rtc", S2MPS15X }, diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index cef60770f68b..b3fcdcae379e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -86,7 +86,7 @@ static void *_copy_apqns_from_user(void __user *uapqns, size_t nr_apqns) if (!uapqns || nr_apqns == 0) return NULL; - return memdup_user(uapqns, nr_apqns * sizeof(struct pkey_apqn)); + return memdup_array_user(uapqns, nr_apqns, sizeof(struct pkey_apqn)); } static int pkey_ioctl_genseck(struct pkey_genseck __user *ugs) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index e021f1106bea..cc5d05dc395c 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -473,10 +473,17 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS; - if (sht->max_segment_size) - shost->max_segment_size = sht->max_segment_size; - else - shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + shost->virt_boundary_mask = sht->virt_boundary_mask; + if (shost->virt_boundary_mask) { + WARN_ON_ONCE(sht->max_segment_size && + sht->max_segment_size != UINT_MAX); + shost->max_segment_size = UINT_MAX; + } else { + if (sht->max_segment_size) + shost->max_segment_size = sht->max_segment_size; + else + shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + } /* 32-byte (dword) is a common minimum for HBAs. */ if (sht->dma_alignment) @@ -492,9 +499,6 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->dma_boundary = 0xffffffff; - if (sht->virt_boundary_mask) - shost->virt_boundary_mask = sht->virt_boundary_mask; - device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 0cd6f3e14882..13b6cb1b93ac 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2147,7 +2147,7 @@ qla24xx_get_port_database(scsi_qla_host_t *vha, u16 nport_handle, pdb_dma = dma_map_single(&vha->hw->pdev->dev, pdb, sizeof(*pdb), DMA_FROM_DEVICE); - if (!pdb_dma) { + if (dma_mapping_error(&vha->hw->pdev->dev, pdb_dma)) { ql_log(ql_log_warn, vha, 0x1116, "Failed to map dma buffer.\n"); return QLA_MEMORY_ALLOC_FAILED; } diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index d4141656b204..a39f1da4ce47 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -3420,6 +3420,8 @@ static int qla4xxx_alloc_pdu(struct iscsi_task *task, uint8_t opcode) task_data->data_dma = dma_map_single(&ha->pdev->dev, task->data, task->data_count, DMA_TO_DEVICE); + if (dma_mapping_error(&ha->pdev->dev, task_data->data_dma)) + return -ENOMEM; } DEBUG2(ql4_printk(KERN_INFO, ha, "%s: MaxRecvLen %u, iscsi hrd %d\n", diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3f6e87705b62..eeaa6af294b8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3384,7 +3384,7 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb7); - if (vpd && vpd->len >= 2) + if (vpd && vpd->len >= 6) sdkp->rscs = vpd->data[5] & 1; rcu_read_unlock(); } diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index aa1932ba17cb..d3c78f59b22c 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1960,11 +1960,6 @@ static int cqspi_probe(struct platform_device *pdev) pm_runtime_enable(dev); - if (cqspi->rx_chan) { - dma_release_channel(cqspi->rx_chan); - goto probe_setup_failed; - } - pm_runtime_set_autosuspend_delay(dev, CQSPI_AUTOSUSPEND_TIMEOUT); pm_runtime_use_autosuspend(dev); pm_runtime_get_noresume(dev); diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 863781ba6c16..0dcd49114095 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -983,11 +983,20 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) { status = dspi_dma_xfer(dspi); } else { + /* + * Reinitialize the completion before transferring data + * to avoid the case where it might remain in the done + * state due to a spurious interrupt from a previous + * transfer. This could falsely signal that the current + * transfer has completed. + */ + if (dspi->irq) + reinit_completion(&dspi->xfer_done); + dspi_fifo_write(dspi); if (dspi->irq) { wait_for_completion(&dspi->xfer_done); - reinit_completion(&dspi->xfer_done); } else { do { status = dspi_poll(dspi); diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 77d9cc65477a..f2e1a27b410d 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -315,6 +315,22 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) mtd_set_ooblayout(mtd, &qcom_spi_ooblayout); + /* + * Free the temporary BAM transaction allocated initially by + * qcom_nandc_alloc(), and allocate a new one based on the + * updated max_cwperpage value. + */ + qcom_free_bam_transaction(snandc); + + snandc->max_cwperpage = cwperpage; + + snandc->bam_txn = qcom_alloc_bam_transaction(snandc); + if (!snandc->bam_txn) { + dev_err(snandc->dev, "failed to allocate BAM transaction\n"); + ret = -ENOMEM; + goto err_free_ecc_cfg; + } + ecc_cfg->cfg0 = FIELD_PREP(CW_PER_PAGE_MASK, (cwperpage - 1)) | FIELD_PREP(UD_SIZE_BYTES_MASK, ecc_cfg->cw_data) | FIELD_PREP(DISABLE_STATUS_AFTER_WRITE, 1) | diff --git a/drivers/staging/rtl8723bs/core/rtw_security.c b/drivers/staging/rtl8723bs/core/rtw_security.c index 1e9eff01b1aa..e9f382c280d9 100644 --- a/drivers/staging/rtl8723bs/core/rtw_security.c +++ b/drivers/staging/rtl8723bs/core/rtw_security.c @@ -868,29 +868,21 @@ static signed int aes_cipher(u8 *key, uint hdrlen, num_blocks, payload_index; u8 pn_vector[6]; - u8 mic_iv[16]; - u8 mic_header1[16]; - u8 mic_header2[16]; - u8 ctr_preload[16]; + u8 mic_iv[16] = {}; + u8 mic_header1[16] = {}; + u8 mic_header2[16] = {}; + u8 ctr_preload[16] = {}; /* Intermediate Buffers */ - u8 chain_buffer[16]; - u8 aes_out[16]; - u8 padded_buffer[16]; + u8 chain_buffer[16] = {}; + u8 aes_out[16] = {}; + u8 padded_buffer[16] = {}; u8 mic[8]; uint frtype = GetFrameType(pframe); uint frsubtype = GetFrameSubType(pframe); frsubtype = frsubtype>>4; - memset((void *)mic_iv, 0, 16); - memset((void *)mic_header1, 0, 16); - memset((void *)mic_header2, 0, 16); - memset((void *)ctr_preload, 0, 16); - memset((void *)chain_buffer, 0, 16); - memset((void *)aes_out, 0, 16); - memset((void *)padded_buffer, 0, 16); - if ((hdrlen == WLAN_HDR_A3_LEN) || (hdrlen == WLAN_HDR_A3_QOS_LEN)) a4_exists = 0; else @@ -1080,15 +1072,15 @@ static signed int aes_decipher(u8 *key, uint hdrlen, num_blocks, payload_index; signed int res = _SUCCESS; u8 pn_vector[6]; - u8 mic_iv[16]; - u8 mic_header1[16]; - u8 mic_header2[16]; - u8 ctr_preload[16]; + u8 mic_iv[16] = {}; + u8 mic_header1[16] = {}; + u8 mic_header2[16] = {}; + u8 ctr_preload[16] = {}; /* Intermediate Buffers */ - u8 chain_buffer[16]; - u8 aes_out[16]; - u8 padded_buffer[16]; + u8 chain_buffer[16] = {}; + u8 aes_out[16] = {}; + u8 padded_buffer[16] = {}; u8 mic[8]; uint frtype = GetFrameType(pframe); @@ -1096,14 +1088,6 @@ static signed int aes_decipher(u8 *key, uint hdrlen, frsubtype = frsubtype>>4; - memset((void *)mic_iv, 0, 16); - memset((void *)mic_header1, 0, 16); - memset((void *)mic_header2, 0, 16); - memset((void *)ctr_preload, 0, 16); - memset((void *)chain_buffer, 0, 16); - memset((void *)aes_out, 0, 16); - memset((void *)padded_buffer, 0, 16); - /* start to decrypt the payload */ num_blocks = (plen-8) / 16; /* plen including LLC, payload_length and mic) */ diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index f3af5666bb11..f9ef7d94cebd 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -728,12 +728,21 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev, return true; } +static void notif_work_fn(struct work_struct *work) +{ + struct optee_ffa *optee_ffa = container_of(work, struct optee_ffa, + notif_work); + struct optee *optee = container_of(optee_ffa, struct optee, ffa); + + optee_do_bottom_half(optee->ctx); +} + static void notif_callback(int notify_id, void *cb_data) { struct optee *optee = cb_data; if (notify_id == optee->ffa.bottom_half_value) - optee_do_bottom_half(optee->ctx); + queue_work(optee->ffa.notif_wq, &optee->ffa.notif_work); else optee_notif_send(optee, notify_id); } @@ -817,9 +826,11 @@ static void optee_ffa_remove(struct ffa_device *ffa_dev) struct optee *optee = ffa_dev_get_drvdata(ffa_dev); u32 bottom_half_id = optee->ffa.bottom_half_value; - if (bottom_half_id != U32_MAX) + if (bottom_half_id != U32_MAX) { ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, bottom_half_id); + destroy_workqueue(optee->ffa.notif_wq); + } optee_remove_common(optee); mutex_destroy(&optee->ffa.mutex); @@ -835,6 +846,13 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev, u32 notif_id = 0; int rc; + INIT_WORK(&optee->ffa.notif_work, notif_work_fn); + optee->ffa.notif_wq = create_workqueue("optee_notification"); + if (!optee->ffa.notif_wq) { + rc = -EINVAL; + goto err; + } + while (true) { rc = ffa_dev->ops->notifier_ops->notify_request(ffa_dev, is_per_vcpu, @@ -851,19 +869,24 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev, * notifications in that case. */ if (rc != -EACCES) - return rc; + goto err_wq; notif_id++; if (notif_id >= OPTEE_FFA_MAX_ASYNC_NOTIF_VALUE) - return rc; + goto err_wq; } optee->ffa.bottom_half_value = notif_id; rc = enable_async_notif(optee); - if (rc < 0) { - ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, - notif_id); - optee->ffa.bottom_half_value = U32_MAX; - } + if (rc < 0) + goto err_rel; + + return 0; +err_rel: + ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, notif_id); +err_wq: + destroy_workqueue(optee->ffa.notif_wq); +err: + optee->ffa.bottom_half_value = U32_MAX; return rc; } diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index dc0f355ef72a..9526087f0e68 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -165,6 +165,8 @@ struct optee_ffa { /* Serializes access to @global_ids */ struct mutex mutex; struct rhashtable global_ids; + struct workqueue_struct *notif_wq; + struct work_struct notif_work; }; struct optee; diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index bd02ee898f5d..500dfc009d03 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -235,6 +235,7 @@ struct imx_port { enum imx_tx_state tx_state; struct hrtimer trigger_start_tx; struct hrtimer trigger_stop_tx; + unsigned int rxtl; }; struct imx_port_ucrs { @@ -1339,6 +1340,7 @@ static void imx_uart_clear_rx_errors(struct imx_port *sport) #define TXTL_DEFAULT 8 #define RXTL_DEFAULT 8 /* 8 characters or aging timer */ +#define RXTL_CONSOLE_DEFAULT 1 #define TXTL_DMA 8 /* DMA burst setting */ #define RXTL_DMA 9 /* DMA burst setting */ @@ -1457,7 +1459,7 @@ static void imx_uart_disable_dma(struct imx_port *sport) ucr1 &= ~(UCR1_RXDMAEN | UCR1_TXDMAEN | UCR1_ATDMAEN); imx_uart_writel(sport, ucr1, UCR1); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); sport->dma_is_enabled = 0; } @@ -1482,7 +1484,12 @@ static int imx_uart_startup(struct uart_port *port) return retval; } - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + if (uart_console(&sport->port)) + sport->rxtl = RXTL_CONSOLE_DEFAULT; + else + sport->rxtl = RXTL_DEFAULT; + + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); /* disable the DREN bit (Data Ready interrupt enable) before * requesting IRQs @@ -1948,7 +1955,7 @@ static int imx_uart_poll_init(struct uart_port *port) if (retval) clk_disable_unprepare(sport->clk_ipg); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); uart_port_lock_irqsave(&sport->port, &flags); @@ -2040,7 +2047,7 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio /* If the receiver trigger is 0, set it to a default value */ ufcr = imx_uart_readl(sport, UFCR); if ((ufcr & UFCR_RXTL_MASK) == 0) - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); imx_uart_start_rx(port); } @@ -2302,7 +2309,7 @@ imx_uart_console_setup(struct console *co, char *options) else imx_uart_console_get_options(sport, &baud, &parity, &bits); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); retval = uart_set_options(&sport->port, co, baud, parity, bits, flow); diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index 5d1677f1b651..cb3b127b06b6 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -72,6 +72,7 @@ static int serial_base_device_init(struct uart_port *port, dev->parent = parent_dev; dev->bus = &serial_base_bus_type; dev->release = release; + device_set_of_node_from_dev(dev, parent_dev); if (!serial_base_initialized) { dev_dbg(port->dev, "uart_add_one_port() called before arch_initcall()?\n"); diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index 6ead622b7713..03877485dfb7 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -206,7 +206,7 @@ static int ucs_page_entry_cmp(const void *key, const void *element) /** * ucs_get_fallback() - Get a substitution for the provided Unicode character - * @base: Base Unicode code point (UCS-4) + * @cp: Unicode code point (UCS-4) * * Get a simpler fallback character for the provided Unicode character. * This is used for terminal display when corresponding glyph is unavailable. diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ed39d9cb4432..62049ceb34de 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4650,6 +4650,7 @@ void do_unblank_screen(int leaving_gfx) set_palette(vc); set_cursor(vc); vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num); + notify_update(vc); } EXPORT_SYMBOL(do_unblank_screen); diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index de8b6acd4058..fcb4b14a710f 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1808,7 +1808,7 @@ UFS_UNIT_DESC_PARAM(logical_block_size, _LOGICAL_BLK_SIZE, 1); UFS_UNIT_DESC_PARAM(logical_block_count, _LOGICAL_BLK_COUNT, 8); UFS_UNIT_DESC_PARAM(erase_block_size, _ERASE_BLK_SIZE, 4); UFS_UNIT_DESC_PARAM(provisioning_type, _PROVISIONING_TYPE, 1); -UFS_UNIT_DESC_PARAM(physical_memory_resourse_count, _PHY_MEM_RSRC_CNT, 8); +UFS_UNIT_DESC_PARAM(physical_memory_resource_count, _PHY_MEM_RSRC_CNT, 8); UFS_UNIT_DESC_PARAM(context_capabilities, _CTX_CAPABILITIES, 2); UFS_UNIT_DESC_PARAM(large_unit_granularity, _LARGE_UNIT_SIZE_M1, 1); UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4); @@ -1825,7 +1825,7 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = { &dev_attr_logical_block_count.attr, &dev_attr_erase_block_size.attr, &dev_attr_provisioning_type.attr, - &dev_attr_physical_memory_resourse_count.attr, + &dev_attr_physical_memory_resource_count.attr, &dev_attr_context_capabilities.attr, &dev_attr_large_unit_granularity.attr, &dev_attr_wb_buf_alloc_units.attr, diff --git a/drivers/usb/cdns3/cdnsp-debug.h b/drivers/usb/cdns3/cdnsp-debug.h index cd138acdcce1..86860686d836 100644 --- a/drivers/usb/cdns3/cdnsp-debug.h +++ b/drivers/usb/cdns3/cdnsp-debug.h @@ -327,12 +327,13 @@ static inline const char *cdnsp_decode_trb(char *str, size_t size, u32 field0, case TRB_RESET_EP: case TRB_HALT_ENDPOINT: ret = scnprintf(str, size, - "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c", + "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c %c", cdnsp_trb_type_string(type), ep_num, ep_id % 2 ? "out" : "in", TRB_TO_EP_INDEX(field3), field1, field0, TRB_TO_SLOT_ID(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + field3 & TRB_CYCLE ? 'C' : 'c', + field3 & TRB_ESP ? 'P' : 'p'); break; case TRB_STOP_RING: ret = scnprintf(str, size, diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c index f317d3c84781..5cd9b898ce97 100644 --- a/drivers/usb/cdns3/cdnsp-ep0.c +++ b/drivers/usb/cdns3/cdnsp-ep0.c @@ -414,6 +414,7 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev, void cdnsp_setup_analyze(struct cdnsp_device *pdev) { struct usb_ctrlrequest *ctrl = &pdev->setup; + struct cdnsp_ep *pep; int ret = -EINVAL; u16 len; @@ -427,10 +428,21 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev) goto out; } + pep = &pdev->eps[0]; + /* Restore the ep0 to Stopped/Running state. */ - if (pdev->eps[0].ep_state & EP_HALTED) { - trace_cdnsp_ep0_halted("Restore to normal state"); - cdnsp_halt_endpoint(pdev, &pdev->eps[0], 0); + if (pep->ep_state & EP_HALTED) { + if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_HALTED) + cdnsp_halt_endpoint(pdev, pep, 0); + + /* + * Halt Endpoint Command for SSP2 for ep0 preserve current + * endpoint state and driver has to synchronize the + * software endpoint state with endpoint output context + * state. + */ + pep->ep_state &= ~EP_HALTED; + pep->ep_state |= EP_STOPPED; } /* diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 2afa3e558f85..a91cca509db0 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -987,6 +987,12 @@ enum cdnsp_setup_dev { #define STREAM_ID_FOR_TRB(p) ((((p)) << 16) & GENMASK(31, 16)) #define SCT_FOR_TRB(p) (((p) << 1) & 0x7) +/* + * Halt Endpoint Command TRB field. + * The ESP bit only exists in the SSP2 controller. + */ +#define TRB_ESP BIT(9) + /* Link TRB specific fields. */ #define TRB_TC BIT(1) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index fd06cb85c4ea..0758f171f73e 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -772,7 +772,9 @@ static int cdnsp_update_port_id(struct cdnsp_device *pdev, u32 port_id) } if (port_id != old_port) { - cdnsp_disable_slot(pdev); + if (pdev->slot_id) + cdnsp_disable_slot(pdev); + pdev->active_port = port; cdnsp_enable_slot(pdev); } @@ -2483,7 +2485,8 @@ void cdnsp_queue_halt_endpoint(struct cdnsp_device *pdev, unsigned int ep_index) { cdnsp_queue_command(pdev, 0, 0, 0, TRB_TYPE(TRB_HALT_ENDPOINT) | SLOT_ID_FOR_TRB(pdev->slot_id) | - EP_ID_FOR_TRB(ep_index)); + EP_ID_FOR_TRB(ep_index) | + (!ep_index ? TRB_ESP : 0)); } void cdnsp_force_header_wakeup(struct cdnsp_device *pdev, int intf_num) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8a9b31fd5c89..1a48e6440e6c 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -2374,6 +2374,10 @@ static void udc_suspend(struct ci_hdrc *ci) */ if (hw_read(ci, OP_ENDPTLISTADDR, ~0) == 0) hw_write(ci, OP_ENDPTLISTADDR, ~0, ~0); + + if (ci->gadget.connected && + (!ci->suspended || !device_may_wakeup(ci->dev))) + usb_gadget_disconnect(&ci->gadget); } static void udc_resume(struct ci_hdrc *ci, bool power_lost) @@ -2384,6 +2388,9 @@ static void udc_resume(struct ci_hdrc *ci, bool power_lost) OTGSC_BSVIS | OTGSC_BSVIE); if (ci->vbus_active) usb_gadget_vbus_disconnect(&ci->gadget); + } else if (ci->vbus_active && ci->driver && + !ci->gadget.connected) { + usb_gadget_connect(&ci->gadget); } /* Restore value 0 if it was set for power lost check */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 770d1e91183c..3e1215f7a9a0 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -68,6 +68,12 @@ */ #define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT 500 /* ms */ +/* + * Give SS hubs 200ms time after wake to train downstream links before + * assuming no port activity and allowing hub to runtime suspend back. + */ +#define USB_SS_PORT_U0_WAKE_TIME 200 /* ms */ + /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ @@ -1095,6 +1101,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth @@ -1343,6 +1350,17 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } + if (type == HUB_RESUME && hub_is_superspeed(hub->hdev)) { + /* give usb3 downstream links training time after hub resume */ + usb_autopm_get_interface_no_resume( + to_usb_interface(hub->intfdev)); + + queue_delayed_work(system_power_efficient_wq, + &hub->post_resume_work, + msecs_to_jiffies(USB_SS_PORT_U0_WAKE_TIME)); + return; + } + hub_put(hub); } @@ -1361,6 +1379,14 @@ static void hub_init_func3(struct work_struct *ws) hub_activate(hub, HUB_INIT3); } +static void hub_post_resume(struct work_struct *ws) +{ + struct usb_hub *hub = container_of(ws, struct usb_hub, post_resume_work.work); + + usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); + hub_put(hub); +} + enum hub_quiescing_type { HUB_DISCONNECT, HUB_PRE_RESET, HUB_SUSPEND }; @@ -1386,6 +1412,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) /* Stop hub_wq and related activity */ timer_delete_sync(&hub->irq_urb_retry); + flush_delayed_work(&hub->post_resume_work); usb_kill_urb(hub->urb); if (hub->has_indicators) cancel_delayed_work_sync(&hub->leds); @@ -1944,6 +1971,7 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) hub->hdev = hdev; INIT_DELAYED_WORK(&hub->leds, led_work); INIT_DELAYED_WORK(&hub->init_work, NULL); + INIT_DELAYED_WORK(&hub->post_resume_work, hub_post_resume); INIT_WORK(&hub->events, hub_event); INIT_LIST_HEAD(&hub->onboard_devs); spin_lock_init(&hub->irq_urb_lock); @@ -2337,6 +2365,9 @@ void usb_disconnect(struct usb_device **pdev) usb_remove_ep_devs(&udev->ep0); usb_unlock_device(udev); + if (udev->usb4_link) + device_link_del(udev->usb4_link); + /* Unregister the device. The device driver is responsible * for de-configuring the device and invoking the remove-device * notifier chain (used by usbfs and possibly others). diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index e6ae73f8a95d..9ebc5ef54a32 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -70,6 +70,7 @@ struct usb_hub { u8 indicator[USB_MAXCHILDREN]; struct delayed_work leds; struct delayed_work init_work; + struct delayed_work post_resume_work; struct work_struct events; spinlock_t irq_urb_lock; struct timer_list irq_urb_retry; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 53d68d20fb62..0cf94c7a2c9c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -227,7 +227,8 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, /* Logitech HD Webcam C270 */ - { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME | + USB_QUIRK_NO_LPM}, /* Logitech HD Pro Webcams C920, C920-C, C922, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index ea1ce8beb0cb..489dbdc96f94 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(usb_acpi_set_power_state); */ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) { - const struct device_link *link; + struct device_link *link; struct usb_port *port_dev; struct usb_hub *hub; @@ -188,6 +188,8 @@ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) dev_dbg(&port_dev->dev, "Created device link from %s to %s\n", dev_name(&port_dev->child->dev), dev_name(nhi_fwnode->dev)); + udev->usb4_link = link; + return 0; } diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 2bc775a747f2..8002c23a5a02 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2422,6 +2422,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { u32 reg; int i; + int ret; if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) { dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) & @@ -2440,7 +2441,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) case DWC3_GCTL_PRTCAP_DEVICE: if (pm_runtime_suspended(dwc->dev)) break; - dwc3_gadget_suspend(dwc); + ret = dwc3_gadget_suspend(dwc); + if (ret) + return ret; synchronize_irq(dwc->irq_gadget); dwc3_core_exit(dwc); break; @@ -2475,7 +2478,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) break; if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) { - dwc3_gadget_suspend(dwc); + ret = dwc3_gadget_suspend(dwc); + if (ret) + return ret; synchronize_irq(dwc->irq_gadget); } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 321361288935..74968f93d4a3 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3516,7 +3516,7 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep, * We're going to do that here to avoid problems of HW trying * to use bogus TRBs for transfers. */ - if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO)) + if (trb->ctrl & DWC3_TRB_CTRL_HWO) trb->ctrl &= ~DWC3_TRB_CTRL_HWO; /* @@ -4821,26 +4821,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) int ret; ret = dwc3_gadget_soft_disconnect(dwc); - if (ret) - goto err; - - spin_lock_irqsave(&dwc->lock, flags); - if (dwc->gadget_driver) - dwc3_disconnect_gadget(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); - - return 0; - -err: /* * Attempt to reset the controller's state. Likely no * communication can be established until the host * performs a port reset. */ - if (dwc->softconnect) + if (ret && dwc->softconnect) { dwc3_gadget_soft_connect(dwc); + return -EAGAIN; + } - return ret; + spin_lock_irqsave(&dwc->lock, flags); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + + return 0; } int dwc3_gadget_resume(struct dwc3 *dwc) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index ab544f6824be..540dc5ab96fc 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -295,8 +295,8 @@ __acquires(&port->port_lock) break; } - if (do_tty_wake && port->port.tty) - tty_wakeup(port->port.tty); + if (do_tty_wake) + tty_port_tty_wakeup(&port->port); return status; } @@ -544,20 +544,16 @@ static int gs_alloc_requests(struct usb_ep *ep, struct list_head *head, static int gs_start_io(struct gs_port *port) { struct list_head *head = &port->read_pool; - struct usb_ep *ep; + struct usb_ep *ep = port->port_usb->out; int status; unsigned started; - if (!port->port_usb || !port->port.tty) - return -EIO; - /* Allocate RX and TX I/O buffers. We can't easily do this much * earlier (with GFP_KERNEL) because the requests are coupled to * endpoints, as are the packet sizes we'll be using. Different * configurations may use different endpoints with a given port; * and high speed vs full speed changes packet sizes too. */ - ep = port->port_usb->out; status = gs_alloc_requests(ep, head, gs_read_complete, &port->read_allocated); if (status) @@ -578,7 +574,7 @@ static int gs_start_io(struct gs_port *port) gs_start_tx(port); /* Unblock any pending writes into our circular buffer, in case * we didn't in gs_start_tx() */ - tty_wakeup(port->port.tty); + tty_port_tty_wakeup(&port->port); } else { /* Free reqs only if we are still connected */ if (port->port_usb) { diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 0d4ce5734165..06a2edb9e86e 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -652,6 +652,10 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc) case DS_DISABLED: return; case DS_CONFIGURED: + spin_lock(&dbc->lock); + xhci_dbc_flush_requests(dbc); + spin_unlock(&dbc->lock); + if (dbc->driver->disconnect) dbc->driver->disconnect(dbc); break; diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index 60ed753c85bb..d894081d8d15 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -617,6 +617,7 @@ int dbc_tty_init(void) dbc_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; dbc_tty_driver->subtype = SERIAL_TYPE_NORMAL; dbc_tty_driver->init_termios = tty_std_termios; + dbc_tty_driver->init_termios.c_lflag &= ~ECHO; dbc_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; dbc_tty_driver->init_termios.c_ispeed = 9600; diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index bd745a0f2f78..6680afa4f596 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1449,6 +1449,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Periodic endpoint bInterval limit quirk */ if (usb_endpoint_xfer_int(&ep->desc) || usb_endpoint_xfer_isoc(&ep->desc)) { + if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_9) && + interval >= 9) { + interval = 8; + } if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) && udev->speed >= USB_SPEED_HIGH && interval >= 7) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0c481cbc8f08..00fac8b233d2 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -71,12 +71,22 @@ #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0 +#define PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI 0x13ed +#define PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI 0x13ee +#define PCI_DEVICE_ID_AMD_STARSHIP_XHCI 0x148c +#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI 0x15d4 +#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI 0x15d5 +#define PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI 0x15e0 +#define PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI 0x15e1 +#define PCI_DEVICE_ID_AMD_RAVEN2_XHCI 0x15e5 #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc +#define PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI 0x7316 + #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242 @@ -280,6 +290,21 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_STARSHIP_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN2_XHCI)) + xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9; + + if (pdev->vendor == PCI_VENDOR_ID_ATI && + pdev->device == PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI) + xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9; + if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96) xhci->quirks |= XHCI_AMD_0x96_HOST; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 6dab142e7278..c79d5ed48a08 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -328,7 +328,8 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s } usb3_hcd = xhci_get_usb3_hcd(xhci); - if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4) + if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4 && + !(xhci->quirks & XHCI_BROKEN_STREAMS)) usb3_hcd->can_do_streams = 1; if (xhci->shared_hcd) { diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index e038ad3375dc..94c9c9271658 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -518,9 +518,8 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) * In the future we should distinguish between -ENODEV and -ETIMEDOUT * and try to recover a -ETIMEDOUT with a host controller reset. */ - ret = xhci_handshake_check_state(xhci, &xhci->op_regs->cmd_ring, - CMD_RING_RUNNING, 0, 5 * 1000 * 1000, - XHCI_STATE_REMOVING); + ret = xhci_handshake(&xhci->op_regs->cmd_ring, + CMD_RING_RUNNING, 0, 5 * 1000 * 1000); if (ret < 0) { xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret); xhci_halt(xhci); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 4e6dbd2375c3..8a819e853288 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -85,29 +85,6 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us) } /* - * xhci_handshake_check_state - same as xhci_handshake but takes an additional - * exit_state parameter, and bails out with an error immediately when xhc_state - * has exit_state flag set. - */ -int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr, - u32 mask, u32 done, int usec, unsigned int exit_state) -{ - u32 result; - int ret; - - ret = readl_poll_timeout_atomic(ptr, result, - (result & mask) == done || - result == U32_MAX || - xhci->xhc_state & exit_state, - 1, usec); - - if (result == U32_MAX || xhci->xhc_state & exit_state) - return -ENODEV; - - return ret; -} - -/* * Disable interrupts and begin the xHCI halting process. */ void xhci_quiesce(struct xhci_hcd *xhci) @@ -227,8 +204,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us) if (xhci->quirks & XHCI_INTEL_HOST) udelay(1000); - ret = xhci_handshake_check_state(xhci, &xhci->op_regs->command, - CMD_RESET, 0, timeout_us, XHCI_STATE_REMOVING); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us); if (ret) return ret; @@ -1182,7 +1158,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool power_lost, bool is_auto_resume) xhci_dbg(xhci, "Stop HCD\n"); xhci_halt(xhci); xhci_zero_64b_regs(xhci); - retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); + if (xhci->xhc_state & XHCI_STATE_REMOVING) + retval = -ENODEV; + else + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); spin_unlock_irq(&xhci->lock); if (retval) return retval; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 49887a303e43..a20f4e7cd43a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1643,6 +1643,7 @@ struct xhci_hcd { #define XHCI_WRITE_64_HI_LO BIT_ULL(47) #define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48) #define XHCI_ETRON_HOST BIT_ULL(49) +#define XHCI_LIMIT_ENDPOINT_INTERVAL_9 BIT_ULL(50) unsigned int num_active_eps; unsigned int limit_active_eps; @@ -1868,8 +1869,6 @@ void xhci_skip_sec_intr_events(struct xhci_hcd *xhci, /* xHCI host controller glue */ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us); -int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr, - u32 mask, u32 done, int usec, unsigned int exit_state); void xhci_quiesce(struct xhci_hcd *xhci); int xhci_halt(struct xhci_hcd *xhci); int xhci_start(struct xhci_hcd *xhci); diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index b09b58d7311d..d8b906ec4d1c 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -394,8 +394,7 @@ static int dp_altmode_vdm(struct typec_altmode *alt, case CMDT_RSP_NAK: switch (cmd) { case DP_CMD_STATUS_UPDATE: - if (typec_altmode_exit(alt)) - dev_err(&dp->alt->dev, "Exit Mode Failed!\n"); + dp->state = DP_STATE_EXIT; break; case DP_CMD_CONFIGURE: dp->data.conf = 0; @@ -677,7 +676,7 @@ static ssize_t pin_assignment_show(struct device *dev, assignments = get_current_pin_assignments(dp); - for (i = 0; assignments; assignments >>= 1, i++) { + for (i = 0; assignments && i < DP_PIN_ASSIGN_MAX; assignments >>= 1, i++) { if (assignments & 1) { if (i == cur) len += sprintf(buf + len, "[%s] ", diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 1a1f9e1f8e4e..1f6fdfaa34bf 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4410,17 +4410,6 @@ static int tcpm_src_attach(struct tcpm_port *port) tcpm_enable_auto_vbus_discharge(port, true); - ret = tcpm_set_roles(port, true, TYPEC_STATE_USB, - TYPEC_SOURCE, tcpm_data_role_for_source(port)); - if (ret < 0) - return ret; - - if (port->pd_supported) { - ret = port->tcpc->set_pd_rx(port->tcpc, true); - if (ret < 0) - goto out_disable_mux; - } - /* * USB Type-C specification, version 1.2, * chapter 4.5.2.2.8.1 (Attached.SRC Requirements) @@ -4430,13 +4419,24 @@ static int tcpm_src_attach(struct tcpm_port *port) (polarity == TYPEC_POLARITY_CC2 && port->cc1 == TYPEC_CC_RA)) { ret = tcpm_set_vconn(port, true); if (ret < 0) - goto out_disable_pd; + return ret; } ret = tcpm_set_vbus(port, true); if (ret < 0) goto out_disable_vconn; + ret = tcpm_set_roles(port, true, TYPEC_STATE_USB, TYPEC_SOURCE, + tcpm_data_role_for_source(port)); + if (ret < 0) + goto out_disable_vbus; + + if (port->pd_supported) { + ret = port->tcpc->set_pd_rx(port->tcpc, true); + if (ret < 0) + goto out_disable_mux; + } + port->pd_capable = false; port->partner = NULL; @@ -4447,14 +4447,14 @@ static int tcpm_src_attach(struct tcpm_port *port) return 0; -out_disable_vconn: - tcpm_set_vconn(port, false); -out_disable_pd: - if (port->pd_supported) - port->tcpc->set_pd_rx(port->tcpc, false); out_disable_mux: tcpm_mux_set(port, TYPEC_STATE_SAFE, USB_ROLE_NONE, TYPEC_ORIENTATION_NONE); +out_disable_vbus: + tcpm_set_vbus(port, false); +out_disable_vconn: + tcpm_set_vconn(port, false); + return ret; } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9dbd88eb9ff4..bfb774c273ea 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -69,12 +69,14 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" #define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN) -enum { - VHOST_NET_FEATURES = VHOST_FEATURES | - (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_F_ACCESS_PLATFORM) | - (1ULL << VIRTIO_F_RING_RESET) +static const u64 vhost_net_features[VIRTIO_FEATURES_DWORDS] = { + VHOST_FEATURES | + (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | + (1ULL << VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | + (1ULL << VIRTIO_F_RING_RESET), + VIRTIO_BIT(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) | + VIRTIO_BIT(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO), }; enum { @@ -1606,16 +1608,23 @@ done: return err; } -static int vhost_net_set_features(struct vhost_net *n, u64 features) +static int vhost_net_set_features(struct vhost_net *n, const u64 *features) { size_t vhost_hlen, sock_hlen, hdr_len; int i; - hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_F_VERSION_1))) ? - sizeof(struct virtio_net_hdr_mrg_rxbuf) : - sizeof(struct virtio_net_hdr); - if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) { + hdr_len = virtio_features_test_bit(features, VIRTIO_NET_F_MRG_RXBUF) || + virtio_features_test_bit(features, VIRTIO_F_VERSION_1) ? + sizeof(struct virtio_net_hdr_mrg_rxbuf) : + sizeof(struct virtio_net_hdr); + + if (virtio_features_test_bit(features, + VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO) || + virtio_features_test_bit(features, + VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO)) + hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel); + + if (virtio_features_test_bit(features, VHOST_NET_F_VIRTIO_NET_HDR)) { /* vhost provides vnet_hdr */ vhost_hlen = hdr_len; sock_hlen = 0; @@ -1625,18 +1634,19 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) sock_hlen = hdr_len; } mutex_lock(&n->dev.mutex); - if ((features & (1 << VHOST_F_LOG_ALL)) && + if (virtio_features_test_bit(features, VHOST_F_LOG_ALL) && !vhost_log_access_ok(&n->dev)) goto out_unlock; - if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { + if (virtio_features_test_bit(features, VIRTIO_F_ACCESS_PLATFORM)) { if (vhost_init_device_iotlb(&n->dev)) goto out_unlock; } for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); - n->vqs[i].vq.acked_features = features; + virtio_features_copy(n->vqs[i].vq.acked_features_array, + features); n->vqs[i].vhost_hlen = vhost_hlen; n->vqs[i].sock_hlen = sock_hlen; mutex_unlock(&n->vqs[i].vq.mutex); @@ -1673,12 +1683,13 @@ out: static long vhost_net_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { + u64 all_features[VIRTIO_FEATURES_DWORDS]; struct vhost_net *n = f->private_data; void __user *argp = (void __user *)arg; u64 __user *featurep = argp; struct vhost_vring_file backend; - u64 features; - int r; + u64 features, count, copied; + int r, i; switch (ioctl) { case VHOST_NET_SET_BACKEND: @@ -1686,16 +1697,60 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; return vhost_net_set_backend(n, backend.index, backend.fd); case VHOST_GET_FEATURES: - features = VHOST_NET_FEATURES; + features = vhost_net_features[0]; if (copy_to_user(featurep, &features, sizeof features)) return -EFAULT; return 0; case VHOST_SET_FEATURES: if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; - if (features & ~VHOST_NET_FEATURES) + if (features & ~vhost_net_features[0]) return -EOPNOTSUPP; - return vhost_net_set_features(n, features); + + virtio_features_from_u64(all_features, features); + return vhost_net_set_features(n, all_features); + case VHOST_GET_FEATURES_ARRAY: + if (copy_from_user(&count, featurep, sizeof(count))) + return -EFAULT; + + /* Copy the net features, up to the user-provided buffer size */ + argp += sizeof(u64); + copied = min(count, VIRTIO_FEATURES_DWORDS); + if (copy_to_user(argp, vhost_net_features, + copied * sizeof(u64))) + return -EFAULT; + + /* Zero the trailing space provided by user-space, if any */ + if (clear_user(argp, size_mul(count - copied, sizeof(u64)))) + return -EFAULT; + return 0; + case VHOST_SET_FEATURES_ARRAY: + if (copy_from_user(&count, featurep, sizeof(count))) + return -EFAULT; + + virtio_features_zero(all_features); + argp += sizeof(u64); + copied = min(count, VIRTIO_FEATURES_DWORDS); + if (copy_from_user(all_features, argp, copied * sizeof(u64))) + return -EFAULT; + + /* + * Any feature specified by user-space above + * VIRTIO_FEATURES_MAX is not supported by definition. + */ + for (i = copied; i < count; ++i) { + if (copy_from_user(&features, featurep + 1 + i, + sizeof(features))) + return -EFAULT; + if (features) + return -EOPNOTSUPP; + } + + for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++) + if (all_features[i] & ~vhost_net_features[i]) + return -EOPNOTSUPP; + + return vhost_net_set_features(n, all_features); case VHOST_GET_BACKEND_FEATURES: features = VHOST_NET_BACKEND_FEATURES; if (copy_to_user(featurep, &features, sizeof(features))) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 3a5ebb973dba..1094256a943c 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -372,7 +372,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->log_used = false; vq->log_addr = -1ull; vq->private_data = NULL; - vq->acked_features = 0; + virtio_features_zero(vq->acked_features_array); vq->acked_backend_features = 0; vq->log_base = NULL; vq->error_ctx = NULL; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index bb75a292d50c..d1aed35c4b07 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -133,7 +133,7 @@ struct vhost_virtqueue { struct vhost_iotlb *umem; struct vhost_iotlb *iotlb; void *private_data; - u64 acked_features; + VIRTIO_DECLARE_FEATURES(acked_features); u64 acked_backend_features; /* Log write descriptors */ void __user *log_base; @@ -291,7 +291,7 @@ static inline void *vhost_vq_get_backend(struct vhost_virtqueue *vq) static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) { - return vq->acked_features & (1ULL << bit); + return virtio_features_test_bit(vq->acked_features_array, bit); } static inline bool vhost_backend_has_feature(struct vhost_virtqueue *vq, int bit) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 95d5d7993e5b..5c48788cdbec 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -53,7 +53,7 @@ static ssize_t features_show(struct device *_d, /* We actually represent this as a bitstring, as it could be * arbitrary length in future. */ - for (i = 0; i < sizeof(dev->features)*8; i++) + for (i = 0; i < VIRTIO_FEATURES_MAX; i++) len += sysfs_emit_at(buf, len, "%c", __virtio_test_bit(dev, i) ? '1' : '0'); len += sysfs_emit_at(buf, len, "\n"); @@ -272,22 +272,22 @@ static int virtio_dev_probe(struct device *_d) int err, i; struct virtio_device *dev = dev_to_virtio(_d); struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); - u64 device_features; - u64 driver_features; + u64 device_features[VIRTIO_FEATURES_DWORDS]; + u64 driver_features[VIRTIO_FEATURES_DWORDS]; u64 driver_features_legacy; /* We have a driver! */ virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); /* Figure out what features the device supports. */ - device_features = dev->config->get_features(dev); + virtio_get_features(dev, device_features); /* Figure out what features the driver supports. */ - driver_features = 0; + virtio_features_zero(driver_features); for (i = 0; i < drv->feature_table_size; i++) { unsigned int f = drv->feature_table[i]; - BUG_ON(f >= 64); - driver_features |= (1ULL << f); + if (!WARN_ON_ONCE(f >= VIRTIO_FEATURES_MAX)) + virtio_features_set_bit(driver_features, f); } /* Some drivers have a separate feature table for virtio v1.0 */ @@ -295,24 +295,29 @@ static int virtio_dev_probe(struct device *_d) driver_features_legacy = 0; for (i = 0; i < drv->feature_table_size_legacy; i++) { unsigned int f = drv->feature_table_legacy[i]; - BUG_ON(f >= 64); - driver_features_legacy |= (1ULL << f); + if (!WARN_ON_ONCE(f >= 64)) + driver_features_legacy |= (1ULL << f); } } else { - driver_features_legacy = driver_features; + driver_features_legacy = driver_features[0]; } - if (device_features & (1ULL << VIRTIO_F_VERSION_1)) - dev->features = driver_features & device_features; - else - dev->features = driver_features_legacy & device_features; + if (virtio_features_test_bit(device_features, VIRTIO_F_VERSION_1)) { + for (i = 0; i < VIRTIO_FEATURES_DWORDS; ++i) + dev->features_array[i] = driver_features[i] & + device_features[i]; + } else { + virtio_features_from_u64(dev->features_array, + driver_features_legacy & + device_features[0]); + } /* When debugging, user may filter some features by hand. */ virtio_debug_device_filter_features(dev); /* Transport features always preserved to pass to finalize_features. */ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) - if (device_features & (1ULL << i)) + if (virtio_features_test_bit(device_features, i)) __virtio_set_bit(dev, i); err = dev->config->finalize_features(dev); @@ -320,14 +325,15 @@ static int virtio_dev_probe(struct device *_d) goto err; if (drv->validate) { - u64 features = dev->features; + u64 features[VIRTIO_FEATURES_DWORDS]; + virtio_features_copy(features, dev->features_array); err = drv->validate(dev); if (err) goto err; /* Did validation change any features? Then write them again. */ - if (features != dev->features) { + if (!virtio_features_equal(features, dev->features_array)) { err = dev->config->finalize_features(dev); if (err) goto err; @@ -701,6 +707,9 @@ EXPORT_SYMBOL_GPL(virtio_device_reset_done); static int virtio_init(void) { + BUILD_BUG_ON(offsetof(struct virtio_device, features) != + offsetof(struct virtio_device, features_array[0])); + if (bus_register(&virtio_bus) != 0) panic("virtio bus registration failed"); virtio_debug_init(); diff --git a/drivers/virtio/virtio_debug.c b/drivers/virtio/virtio_debug.c index 95c8fc7705bb..d58713ddf2e5 100644 --- a/drivers/virtio/virtio_debug.c +++ b/drivers/virtio/virtio_debug.c @@ -8,13 +8,13 @@ static struct dentry *virtio_debugfs_dir; static int virtio_debug_device_features_show(struct seq_file *s, void *data) { + u64 device_features[VIRTIO_FEATURES_DWORDS]; struct virtio_device *dev = s->private; - u64 device_features; unsigned int i; - device_features = dev->config->get_features(dev); - for (i = 0; i < BITS_PER_LONG_LONG; i++) { - if (device_features & (1ULL << i)) + virtio_get_features(dev, device_features); + for (i = 0; i < VIRTIO_FEATURES_MAX; i++) { + if (virtio_features_test_bit(device_features, i)) seq_printf(s, "%u\n", i); } return 0; @@ -26,8 +26,8 @@ static int virtio_debug_filter_features_show(struct seq_file *s, void *data) struct virtio_device *dev = s->private; unsigned int i; - for (i = 0; i < BITS_PER_LONG_LONG; i++) { - if (dev->debugfs_filter_features & (1ULL << i)) + for (i = 0; i < VIRTIO_FEATURES_MAX; i++) { + if (virtio_features_test_bit(dev->debugfs_filter_features, i)) seq_printf(s, "%u\n", i); } return 0; @@ -39,7 +39,7 @@ static int virtio_debug_filter_features_clear(void *data, u64 val) struct virtio_device *dev = data; if (val == 1) - dev->debugfs_filter_features = 0; + virtio_features_zero(dev->debugfs_filter_features); return 0; } @@ -50,9 +50,10 @@ static int virtio_debug_filter_feature_add(void *data, u64 val) { struct virtio_device *dev = data; - if (val >= BITS_PER_LONG_LONG) + if (val >= VIRTIO_FEATURES_MAX) return -EINVAL; - dev->debugfs_filter_features |= BIT_ULL_MASK(val); + + virtio_features_set_bit(dev->debugfs_filter_features, val); return 0; } @@ -63,9 +64,10 @@ static int virtio_debug_filter_feature_del(void *data, u64 val) { struct virtio_device *dev = data; - if (val >= BITS_PER_LONG_LONG) + if (val >= VIRTIO_FEATURES_MAX) return -EINVAL; - dev->debugfs_filter_features &= ~BIT_ULL_MASK(val); + + virtio_features_clear_bit(dev->debugfs_filter_features, val); return 0; } @@ -91,7 +93,8 @@ EXPORT_SYMBOL_GPL(virtio_debug_device_init); void virtio_debug_device_filter_features(struct virtio_device *dev) { - dev->features &= ~dev->debugfs_filter_features; + virtio_features_andnot(dev->features_array, dev->features_array, + dev->debugfs_filter_features); } EXPORT_SYMBOL_GPL(virtio_debug_device_filter_features); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 7182f43ed055..dd0e65f71d41 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -22,11 +22,11 @@ #define VIRTIO_AVQ_SGS_MAX 4 -static u64 vp_get_features(struct virtio_device *vdev) +static void vp_get_features(struct virtio_device *vdev, u64 *features) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return vp_modern_get_features(&vp_dev->mdev); + vp_modern_get_extended_features(&vp_dev->mdev, features); } static int vp_avq_index(struct virtio_device *vdev, u16 *index, u16 *num) @@ -437,7 +437,7 @@ static int vp_finalize_features(struct virtio_device *vdev) if (vp_check_common_size(vdev)) return -EINVAL; - vp_modern_set_features(&vp_dev->mdev, vdev->features); + vp_modern_set_extended_features(&vp_dev->mdev, vdev->features_array); return 0; } @@ -1234,7 +1234,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .find_vqs = vp_modern_find_vqs, .del_vqs = vp_del_vqs, .synchronize_cbs = vp_synchronize_vectors, - .get_features = vp_get_features, + .get_extended_features = vp_get_features, .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, @@ -1254,7 +1254,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .find_vqs = vp_modern_find_vqs, .del_vqs = vp_del_vqs, .synchronize_cbs = vp_synchronize_vectors, - .get_features = vp_get_features, + .get_extended_features = vp_get_features, .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index 0d3dbfaf4b23..d665f8f73ea8 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -388,63 +388,74 @@ void vp_modern_remove(struct virtio_pci_modern_device *mdev) EXPORT_SYMBOL_GPL(vp_modern_remove); /* - * vp_modern_get_features - get features from device + * vp_modern_get_extended_features - get features from device * @mdev: the modern virtio-pci device + * @features: the features array to be filled * - * Returns the features read from the device + * Fill the specified features array with the features read from the device */ -u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev) +void vp_modern_get_extended_features(struct virtio_pci_modern_device *mdev, + u64 *features) { struct virtio_pci_common_cfg __iomem *cfg = mdev->common; + int i; - u64 features; + virtio_features_zero(features); + for (i = 0; i < VIRTIO_FEATURES_WORDS; i++) { + u64 cur; - vp_iowrite32(0, &cfg->device_feature_select); - features = vp_ioread32(&cfg->device_feature); - vp_iowrite32(1, &cfg->device_feature_select); - features |= ((u64)vp_ioread32(&cfg->device_feature) << 32); - - return features; + vp_iowrite32(i, &cfg->device_feature_select); + cur = vp_ioread32(&cfg->device_feature); + features[i >> 1] |= cur << (32 * (i & 1)); + } } -EXPORT_SYMBOL_GPL(vp_modern_get_features); +EXPORT_SYMBOL_GPL(vp_modern_get_extended_features); /* * vp_modern_get_driver_features - get driver features from device * @mdev: the modern virtio-pci device + * @features: the features array to be filled * - * Returns the driver features read from the device + * Fill the specified features array with the driver features read from the + * device */ -u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev) +void +vp_modern_get_driver_extended_features(struct virtio_pci_modern_device *mdev, + u64 *features) { struct virtio_pci_common_cfg __iomem *cfg = mdev->common; + int i; - u64 features; - - vp_iowrite32(0, &cfg->guest_feature_select); - features = vp_ioread32(&cfg->guest_feature); - vp_iowrite32(1, &cfg->guest_feature_select); - features |= ((u64)vp_ioread32(&cfg->guest_feature) << 32); + virtio_features_zero(features); + for (i = 0; i < VIRTIO_FEATURES_WORDS; i++) { + u64 cur; - return features; + vp_iowrite32(i, &cfg->guest_feature_select); + cur = vp_ioread32(&cfg->guest_feature); + features[i >> 1] |= cur << (32 * (i & 1)); + } } -EXPORT_SYMBOL_GPL(vp_modern_get_driver_features); +EXPORT_SYMBOL_GPL(vp_modern_get_driver_extended_features); /* - * vp_modern_set_features - set features to device + * vp_modern_set_extended_features - set features to device * @mdev: the modern virtio-pci device * @features: the features set to device */ -void vp_modern_set_features(struct virtio_pci_modern_device *mdev, - u64 features) +void vp_modern_set_extended_features(struct virtio_pci_modern_device *mdev, + const u64 *features) { struct virtio_pci_common_cfg __iomem *cfg = mdev->common; + int i; + + for (i = 0; i < VIRTIO_FEATURES_WORDS; i++) { + u32 cur = features[i >> 1] >> (32 * (i & 1)); - vp_iowrite32(0, &cfg->guest_feature_select); - vp_iowrite32((u32)features, &cfg->guest_feature); - vp_iowrite32(1, &cfg->guest_feature_select); - vp_iowrite32(features >> 32, &cfg->guest_feature); + vp_iowrite32(i, &cfg->guest_feature_select); + vp_iowrite32(cur, &cfg->guest_feature); + } } -EXPORT_SYMBOL_GPL(vp_modern_set_features); +EXPORT_SYMBOL_GPL(vp_modern_set_extended_features); /* * vp_modern_generation - get the device genreation diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b784aab66867..4397392bfef0 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2797,7 +2797,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle_done)(struct virtqueue *vq)) { struct vring_virtqueue *vq = to_vvq(_vq); - int err; + int err, err_reset; if (num > vq->vq.num_max) return -E2BIG; @@ -2819,7 +2819,11 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, else err = virtqueue_resize_split(_vq, num); - return virtqueue_enable_after_reset(_vq); + err_reset = virtqueue_enable_after_reset(_vq); + if (err_reset) + return err_reset; + + return err; } EXPORT_SYMBOL_GPL(virtqueue_resize); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e51e7d88980a..1d847a939f29 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = { .kill_sb = kill_anon_super, }; -static struct inode *anon_inode_make_secure_inode( - const char *name, - const struct inode *context_inode) +/** + * anon_inode_make_secure_inode - allocate an anonymous inode with security context + * @sb: [in] Superblock to allocate from + * @name: [in] Name of the class of the newfile (e.g., "secretmem") + * @context_inode: + * [in] Optional parent inode for security inheritance + * + * The function ensures proper security initialization through the LSM hook + * security_inode_init_security_anon(). + * + * Return: Pointer to new inode on success, ERR_PTR on failure. + */ +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode) { struct inode *inode; int error; - inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + inode = alloc_anon_inode(sb); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; @@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode( } return inode; } +EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, @@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name, return ERR_PTR(-ENOENT); if (make_inode) { - inode = anon_inode_make_secure_inode(name, context_inode); + inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb, + name, context_inode); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index b228a5a64479..66de46318620 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1406,6 +1406,9 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite : BCH_DATA_free; struct printbuf buf = PRINTBUF; + unsigned fsck_flags = (async_repair ? FSCK_ERR_NO_LOG : 0)| + FSCK_CAN_FIX|FSCK_CAN_IGNORE; + struct bpos bucket = iter->pos; bucket.offset &= ~(~0ULL << 56); u64 genbits = iter->pos.offset & (~0ULL << 56); @@ -1419,9 +1422,10 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite return ret; if (!bch2_dev_bucket_exists(c, bucket)) { - if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket, - "entry in %s btree for nonexistant dev:bucket %llu:%llu", - bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) + if (__fsck_err(trans, fsck_flags, + need_discard_freespace_key_to_invalid_dev_bucket, + "entry in %s btree for nonexistant dev:bucket %llu:%llu", + bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) goto delete; ret = 1; goto out; @@ -1433,7 +1437,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite if (a->data_type != state || (state == BCH_DATA_free && genbits != alloc_freespace_genbits(*a))) { - if (fsck_err(trans, need_discard_freespace_key_bad, + if (__fsck_err(trans, fsck_flags, + need_discard_freespace_key_bad, "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), bch2_btree_id_str(iter->btree_id), diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index e76809e71858..77d93beb3c8f 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -353,7 +353,7 @@ static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } else { struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit); - if (b == ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node))) + if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node)) return bkey_s_c_null; if (IS_ERR_OR_NULL(b)) return ((struct bkey_s_c) { .k = ERR_CAST(b) }); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5a1cede2febf..ddfacad0f70c 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -767,7 +767,8 @@ struct btree_trans_buf { x(sysfs) \ x(btree_write_buffer) \ x(btree_node_scrub) \ - x(async_recovery_passes) + x(async_recovery_passes) \ + x(ioctl_data) enum bch_write_ref { #define x(n) BCH_WRITE_REF_##n, @@ -862,9 +863,7 @@ struct bch_fs { DARRAY(enum bcachefs_metadata_version) incompat_versions_requested; -#ifdef CONFIG_UNICODE struct unicode_map *cf_encoding; -#endif struct bch_sb_handle disk_sb; @@ -1284,4 +1283,13 @@ static inline bool bch2_discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca : ca->mi.discard; } +static inline bool bch2_fs_casefold_enabled(struct bch_fs *c) +{ +#ifdef CONFIG_UNICODE + return !c->opts.casefold_disabled; +#else + return false; +#endif +} + #endif /* _BCACHEFS_H */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index e92cf3928c63..bac108e93823 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -503,8 +503,14 @@ again: prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + /* + * XXX: we're not passing the trans object here because we're not set up + * to handle a transaction restart - this code needs to be rewritten + * when we start doing online topology repair + */ + bch2_trans_unlock_long(trans); if (mustfix_fsck_err_on(!have_child, - trans, btree_node_topology_interior_node_empty, + c, btree_node_topology_interior_node_empty, "empty interior btree node at %s", buf.buf)) ret = DROP_THIS_NODE; err: @@ -528,32 +534,39 @@ fsck_err: return ret; } -static int bch2_check_root(struct btree_trans *trans, enum btree_id i, +static int bch2_check_root(struct btree_trans *trans, enum btree_id btree, bool *reconstructed_root) { struct bch_fs *c = trans->c; - struct btree_root *r = bch2_btree_id_root(c, i); + struct btree_root *r = bch2_btree_id_root(c, btree); struct printbuf buf = PRINTBUF; int ret = 0; - bch2_btree_id_to_text(&buf, i); + bch2_btree_id_to_text(&buf, btree); if (r->error) { bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - r->alive = false; - r->error = 0; + ret = bch2_btree_has_scanned_nodes(c, btree); + if (ret < 0) + goto err; - if (!bch2_btree_has_scanned_nodes(c, i)) { + if (!ret) { __fsck_err(trans, - FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + FSCK_CAN_FIX|(!btree_id_important(btree) ? FSCK_AUTOFIX : 0), btree_root_unreadable_and_scan_found_nothing, "no nodes found for btree %s, continue?", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); + + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 0); } else { - bch2_btree_root_alloc_fake_trans(trans, i, 1); - bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); + r->alive = false; + r->error = 0; + bch2_btree_root_alloc_fake_trans(trans, btree, 1); + + bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); if (ret) goto err; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d8f3c4c65e90..e874a4357f64 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -557,7 +557,9 @@ static int __btree_err(int ret, const char *fmt, ...) { if (c->recovery.curr_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) - return bch_err_throw(c, fsck_fix); + return ret == -BCH_ERR_btree_node_read_err_fixable + ? bch_err_throw(c, fsck_fix) + : ret; bool have_retry = false; int ret2; @@ -723,12 +725,11 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, - unsigned offset, unsigned sectors, int write, + unsigned offset, int write, struct bch_io_failures *failed, struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); - unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; int ret = 0; @@ -778,15 +779,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_node_unsupported_version, "BSET_SEPARATE_WHITEOUTS no longer supported"); - if (!write && - btree_err_on(offset + sectors > (ptr_written ?: btree_sectors(c)), - -BCH_ERR_btree_node_read_err_fixable, - c, ca, b, i, NULL, - bset_past_end_of_btree_node, - "bset past end of btree node (offset %u len %u but written %zu)", - offset, sectors, ptr_written ?: btree_sectors(c))) - i->u64s = 0; - btree_err_on(offset && !i->u64s, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, @@ -1151,6 +1143,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, "unknown checksum type %llu", BSET_CSUM_TYPE(i)); if (first) { + sectors = vstruct_sectors(b->data, c->block_bits); + if (btree_err_on(b->written + sectors > (ptr_written ?: btree_sectors(c)), + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, NULL, + bset_past_end_of_btree_node, + "bset past end of btree node (offset %u len %u but written %zu)", + b->written, sectors, ptr_written ?: btree_sectors(c))) + i->u64s = 0; if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); bool csum_bad = bch2_crc_cmp(b->data->csum, csum); @@ -1178,9 +1178,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, c, NULL, b, NULL, NULL, btree_node_unsupported_version, "btree node does not have NEW_EXTENT_OVERWRITE set"); - - sectors = vstruct_sectors(b->data, c->block_bits); } else { + sectors = vstruct_sectors(bne, c->block_bits); + if (btree_err_on(b->written + sectors > (ptr_written ?: btree_sectors(c)), + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, NULL, + bset_past_end_of_btree_node, + "bset past end of btree node (offset %u len %u but written %zu)", + b->written, sectors, ptr_written ?: btree_sectors(c))) + i->u64s = 0; if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); bool csum_bad = bch2_crc_cmp(bne->csum, csum); @@ -1201,14 +1207,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, "decrypting btree node: %s", bch2_err_str(ret))) goto fsck_err; } - - sectors = vstruct_sectors(bne, c->block_bits); } b->version_ondisk = min(b->version_ondisk, le16_to_cpu(i->version)); - ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg); + ret = validate_bset(c, ca, b, i, b->written, READ, failed, err_msg); if (ret) goto fsck_err; @@ -1333,15 +1337,42 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); - scoped_guard(rcu) - bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { - struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - - if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { - set_btree_node_need_rewrite(b); - set_btree_node_need_rewrite_degraded(b); + /* + * XXX: + * + * We deadlock if too many btree updates require node rewrites while + * we're still in journal replay. + * + * This is because btree node rewrites generate more updates for the + * interior updates (alloc, backpointers), and if those updates touch + * new nodes and generate more rewrites - well, you see the problem. + * + * The biggest cause is that we don't use the btree write buffer (for + * the backpointer updates - this needs some real thought on locking in + * order to fix. + * + * The problem with this workaround (not doing the rewrite for degraded + * nodes in journal replay) is that those degraded nodes persist, and we + * don't want that (this is a real bug when a btree node write completes + * with fewer replicas than we wanted and leaves a degraded node due to + * device _removal_, i.e. the device went away mid write). + * + * It's less of a bug here, but still a problem because we don't yet + * have a way of tracking degraded data - we another index (all + * extents/btree nodes, by replicas entry) in order to fix properly + * (re-replicate degraded data at the earliest possible time). + */ + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) { + scoped_guard(rcu) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { + struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); + + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { + set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_degraded(b); + } } - } + } if (!ptr_written) { set_btree_node_need_rewrite(b); @@ -1982,28 +2013,12 @@ static void btree_node_scrub_work(struct work_struct *work) prt_newline(&err); if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) { - struct btree_trans *trans = bch2_trans_get(c); - - struct btree_iter iter; - bch2_trans_node_iter_init(trans, &iter, scrub->btree, - scrub->key.k->k.p, 0, scrub->level - 1, 0); - - struct btree *b; - int ret = lockrestart_do(trans, - PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(trans, &iter))); - if (ret) - goto err; - - if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) { - bch_err(c, "error validating btree node during scrub on %s at btree %s", - scrub->ca->name, err.buf); - - ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0); - } -err: - bch2_trans_iter_exit(trans, &iter); - bch2_trans_begin(trans); - bch2_trans_put(trans); + int ret = bch2_trans_do(c, + bch2_btree_node_rewrite_key(trans, scrub->btree, scrub->level - 1, + scrub->key.k, 0)); + if (!bch2_err_matches(ret, ENOENT) && + !bch2_err_matches(ret, EROFS)) + bch_err_fn_ratelimited(c, ret); } printbuf_exit(&err); @@ -2267,7 +2282,7 @@ static void btree_node_write_endio(struct bio *bio) } static int validate_bset_for_write(struct bch_fs *c, struct btree *b, - struct bset *i, unsigned sectors) + struct bset *i) { int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), (struct bkey_validate_context) { @@ -2282,7 +2297,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, } ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?: - validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL); + validate_bset(c, NULL, b, i, b->written, WRITE, NULL, NULL); if (ret) { bch2_inconsistent_error(c); dump_stack(); @@ -2475,7 +2490,7 @@ do_write: /* if we're going to be encrypting, check metadata validity first: */ if (validate_before_checksum && - validate_bset_for_write(c, b, i, sectors_to_write)) + validate_bset_for_write(c, b, i)) goto err; ret = bset_encrypt(c, i, b->written << 9); @@ -2492,7 +2507,7 @@ do_write: /* if we're not encrypting, check metadata after checksumming: */ if (!validate_before_checksum && - validate_bset_for_write(c, b, i, sectors_to_write)) + validate_bset_for_write(c, b, i)) goto err; /* diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b78403376c07..f8829b667ad3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2076,14 +2076,14 @@ inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter static noinline void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c *k) + struct bpos search_key, struct bkey_s_c *k) { struct bpos end = path_l(btree_iter_path(trans, iter))->b->data->min_key; trans_for_each_update(trans, i) if (!i->key_cache_already_flushed && i->btree_id == iter->btree_id && - bpos_le(i->k->k.p, iter->pos) && + bpos_le(i->k->k.p, search_key) && bpos_ge(i->k->k.p, k->k ? k->k->p : end)) { iter->k = i->k->k; *k = bkey_i_to_s_c(i->k); @@ -2092,6 +2092,7 @@ void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_ static noinline void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); @@ -2100,7 +2101,7 @@ void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter trans_for_each_update(trans, i) if (!i->key_cache_already_flushed && i->btree_id == iter->btree_id && - bpos_ge(i->k->k.p, path->pos) && + bpos_ge(i->k->k.p, search_key) && bpos_le(i->k->k.p, k->k ? k->k->p : end)) { iter->k = i->k->k; *k = bkey_i_to_s_c(i->k); @@ -2122,13 +2123,14 @@ void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_ static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_pos, struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); return bch2_journal_keys_peek_max(trans->c, iter->btree_id, path->level, - path->pos, + search_pos, end_pos, &iter->journal_idx); } @@ -2138,7 +2140,7 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, struct btree_iter *iter) { struct btree_path *path = btree_iter_path(trans, iter); - struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos); + struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos, path->pos); if (k) { iter->k = k->k; @@ -2151,11 +2153,12 @@ struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, static noinline void btree_trans_peek_journal(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); struct bkey_i *next_journal = - bch2_btree_journal_peek(trans, iter, + bch2_btree_journal_peek(trans, iter, search_key, k->k ? k->k->p : path_l(path)->b->key.k.p); if (next_journal) { iter->k = next_journal->k; @@ -2165,13 +2168,14 @@ void btree_trans_peek_journal(struct btree_trans *trans, static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bpos end_pos) { struct btree_path *path = btree_iter_path(trans, iter); return bch2_journal_keys_peek_prev_min(trans->c, iter->btree_id, path->level, - path->pos, + search_key, end_pos, &iter->journal_idx); } @@ -2179,12 +2183,13 @@ static struct bkey_i *bch2_btree_journal_peek_prev(struct btree_trans *trans, static noinline void btree_trans_peek_prev_journal(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key, struct bkey_s_c *k) { struct btree_path *path = btree_iter_path(trans, iter); struct bkey_i *next_journal = - bch2_btree_journal_peek_prev(trans, iter, - k->k ? k->k->p : path_l(path)->b->key.k.p); + bch2_btree_journal_peek_prev(trans, iter, search_key, + k->k ? k->k->p : path_l(path)->b->data->min_key); if (next_journal) { iter->k = next_journal->k; @@ -2292,11 +2297,11 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct } if (unlikely(iter->flags & BTREE_ITER_with_journal)) - btree_trans_peek_journal(trans, iter, &k); + btree_trans_peek_journal(trans, iter, search_key, &k); if (unlikely((iter->flags & BTREE_ITER_with_updates) && trans->nr_updates)) - bch2_btree_trans_peek_updates(trans, iter, &k); + bch2_btree_trans_peek_updates(trans, iter, search_key, &k); if (k.k && bkey_deleted(k.k)) { /* @@ -2326,6 +2331,20 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct } bch2_btree_iter_verify(trans, iter); + + if (trace___btree_iter_peek_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace___btree_iter_peek(trans->c, buf.buf); + } + return k; } @@ -2484,6 +2503,19 @@ out_no_locked: bch2_btree_iter_verify_entry_exit(iter); + if (trace_btree_iter_peek_max_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_max(trans->c, buf.buf); + } + return k; end: bch2_btree_iter_set_pos(trans, iter, end); @@ -2557,11 +2589,11 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, st } if (unlikely(iter->flags & BTREE_ITER_with_journal)) - btree_trans_peek_prev_journal(trans, iter, &k); + btree_trans_peek_prev_journal(trans, iter, search_key, &k); if (unlikely((iter->flags & BTREE_ITER_with_updates) && trans->nr_updates)) - bch2_btree_trans_peek_prev_updates(trans, iter, &k); + bch2_btree_trans_peek_prev_updates(trans, iter, search_key, &k); if (likely(k.k && !bkey_deleted(k.k))) { break; @@ -2724,6 +2756,19 @@ out_no_locked: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(trans, iter); + + if (trace_btree_iter_peek_prev_min_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_prev_min(trans->c, buf.buf); + } return k; end: bch2_btree_iter_set_pos(trans, iter, end); @@ -2767,8 +2812,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_is_extents) && unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { - if (iter->pos.inode == KEY_INODE_MAX) - return bkey_s_c_null; + if (iter->pos.inode == KEY_INODE_MAX) { + k = bkey_s_c_null; + goto out2; + } bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); } @@ -2785,8 +2832,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre } struct btree_path *path = btree_iter_path(trans, iter); - if (unlikely(!btree_path_node(path, path->level))) - return bkey_s_c_null; + if (unlikely(!btree_path_node(path, path->level))) { + k = bkey_s_c_null; + goto out2; + } btree_path_set_should_be_locked(trans, path); @@ -2879,7 +2928,20 @@ out: bch2_btree_iter_verify(trans, iter); ret = bch2_btree_iter_verify_ret(trans, iter, k); if (unlikely(ret)) - return bkey_s_c_err(ret); + k = bkey_s_c_err(ret); +out2: + if (trace_btree_iter_peek_slot_enabled()) { + CLASS(printbuf, buf)(); + + int ret = bkey_err(k); + if (ret) + prt_str(&buf, bch2_err_str(ret)); + else if (k.k) + bch2_bkey_val_to_text(&buf, trans->c, k); + else + prt_str(&buf, "(null)"); + trace_btree_iter_peek_slot(trans->c, buf.buf); + } return k; } @@ -3132,6 +3194,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n", + BTREE_TRANS_MEM_MAX); + bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); @@ -3159,46 +3225,32 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long mutex_unlock(&s->lock); } - if (trans->used_mempool) { - if (trans->mem_bytes >= new_bytes) - goto out_change_top; - - /* No more space from mempool item, need malloc new one */ - new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); - if (unlikely(!new_mem)) { - bch2_trans_unlock(trans); - - new_mem = kmalloc(new_bytes, GFP_KERNEL); - if (!new_mem) - return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) { + EBUG_ON(trans->mem_bytes >= new_bytes); + return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + } - ret = bch2_trans_relock(trans); - if (ret) { - kfree(new_mem); - return ERR_PTR(ret); - } - } - memcpy(new_mem, trans->mem, trans->mem_top); - trans->used_mempool = false; - mempool_free(trans->mem, &c->btree_trans_mem_pool); - goto out_new_mem; + if (old_bytes) { + trans->realloc_bytes_required = new_bytes; + trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); + return ERR_PTR(btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); } - new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); + EBUG_ON(trans->mem); + + new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN); if (unlikely(!new_mem)) { bch2_trans_unlock(trans); - new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); + new_mem = kmalloc(new_bytes, GFP_KERNEL); if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); new_bytes = BTREE_TRANS_MEM_MAX; - memcpy(new_mem, trans->mem, trans->mem_top); trans->used_mempool = true; - kfree(trans->mem); } - if (!new_mem) - return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); + EBUG_ON(!new_mem); trans->mem = new_mem; trans->mem_bytes = new_bytes; @@ -3207,18 +3259,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long if (ret) return ERR_PTR(ret); } -out_new_mem: + trans->mem = new_mem; trans->mem_bytes = new_bytes; - if (old_bytes) { - trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); - return ERR_PTR(btree_trans_restart_ip(trans, - BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); - } -out_change_top: - bch2_trans_kmalloc_trace(trans, size, ip); - p = trans->mem + trans->mem_top; trans->mem_top += size; memset(p, 0, size); @@ -3279,6 +3323,27 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->restart_count++; trans->mem_top = 0; + if (trans->restarted == BCH_ERR_transaction_restart_mem_realloced) { + EBUG_ON(!trans->mem || !trans->mem_bytes); + unsigned new_bytes = trans->realloc_bytes_required; + void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); + if (unlikely(!new_mem)) { + bch2_trans_unlock(trans); + new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); + + EBUG_ON(new_bytes > BTREE_TRANS_MEM_MAX); + + if (!new_mem) { + new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); + new_bytes = BTREE_TRANS_MEM_MAX; + trans->used_mempool = true; + kfree(trans->mem); + } + } + trans->mem = new_mem; + trans->mem_bytes = new_bytes; + } + trans_for_each_path(trans, path, i) { path->should_be_locked = false; diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index cf7398751644..ea839560a136 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -137,12 +137,15 @@ struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id b struct journal_key *k; BUG_ON(*idx > keys->nr); + + if (!keys->nr) + return NULL; search: if (!*idx) *idx = __bch2_journal_key_search(keys, btree_id, level, pos); - while (*idx && - __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { + while (*idx < keys->nr && + __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) { (*idx)++; iters++; if (iters == 10) { @@ -151,18 +154,23 @@ search: } } + if (*idx == keys->nr) + --(*idx); + struct bkey_i *ret = NULL; rcu_read_lock(); /* for overwritten_ranges */ - while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { + while (true) { + k = idx_to_key(keys, *idx); if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) break; if (k->overwritten) { if (k->overwritten_range) - *idx = rcu_dereference(k->overwritten_range)->start - 1; - else - *idx -= 1; + *idx = rcu_dereference(k->overwritten_range)->start; + if (!*idx) + break; + --(*idx); continue; } @@ -171,6 +179,8 @@ search: break; } + if (!*idx) + break; --(*idx); iters++; if (iters == 10) { @@ -641,10 +651,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) { const struct journal_key *l = _l; const struct journal_key *r = _r; + int rewind = l->rewind && r->rewind ? -1 : 1; return journal_key_cmp(l, r) ?: - cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->journal_offset, r->journal_offset); + ((cmp_int(l->journal_seq, r->journal_seq) ?: + cmp_int(l->journal_offset, r->journal_offset)) * rewind); } void bch2_journal_keys_put(struct bch_fs *c) @@ -713,6 +724,8 @@ int bch2_journal_keys_sort(struct bch_fs *c) struct journal_keys *keys = &c->journal_keys; size_t nr_read = 0; + u64 rewind_seq = c->opts.journal_rewind ?: U64_MAX; + genradix_for_each(&c->journal_entries, iter, _i) { i = *_i; @@ -721,28 +734,43 @@ int bch2_journal_keys_sort(struct bch_fs *c) cond_resched(); - for_each_jset_key(k, entry, &i->j) { - struct journal_key n = (struct journal_key) { - .btree_id = entry->btree_id, - .level = entry->level, - .k = k, - .journal_seq = le64_to_cpu(i->j.seq), - .journal_offset = k->_data - i->j._data, - }; - - if (darray_push(keys, n)) { - __journal_keys_sort(keys); - - if (keys->nr * 8 > keys->size * 7) { - bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", - keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); - return bch_err_throw(c, ENOMEM_journal_keys_sort); + vstruct_for_each(&i->j, entry) { + bool rewind = !entry->level && + !btree_id_is_alloc(entry->btree_id) && + le64_to_cpu(i->j.seq) >= rewind_seq; + + if (entry->type != (rewind + ? BCH_JSET_ENTRY_overwrite + : BCH_JSET_ENTRY_btree_keys)) + continue; + + if (!rewind && le64_to_cpu(i->j.seq) < c->journal_replay_seq_start) + continue; + + jset_entry_for_each_key(entry, k) { + struct journal_key n = (struct journal_key) { + .btree_id = entry->btree_id, + .level = entry->level, + .rewind = rewind, + .k = k, + .journal_seq = le64_to_cpu(i->j.seq), + .journal_offset = k->_data - i->j._data, + }; + + if (darray_push(keys, n)) { + __journal_keys_sort(keys); + + if (keys->nr * 8 > keys->size * 7) { + bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", + keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); + return bch_err_throw(c, ENOMEM_journal_keys_sort); + } + + BUG_ON(darray_push(keys, n)); } - BUG_ON(darray_push(keys, n)); + nr_read++; } - - nr_read++; } } diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h index 8b773823704f..86aacb254fb2 100644 --- a/fs/bcachefs/btree_journal_iter_types.h +++ b/fs/bcachefs/btree_journal_iter_types.h @@ -11,8 +11,9 @@ struct journal_key { u32 journal_offset; enum btree_id btree_id:8; unsigned level:8; - bool allocated; - bool overwritten; + bool allocated:1; + bool overwritten:1; + bool rewind:1; struct journal_key_range_overwritten __rcu * overwritten_range; struct bkey_i *k; diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 91a51aef82f1..bed2b4b6ffb9 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -771,7 +771,7 @@ static inline void __bch2_trans_unlock(struct btree_trans *trans) } static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path, - struct get_locks_fail *f, bool trace) + struct get_locks_fail *f, bool trace, ulong ip) { if (!trace) goto out; @@ -796,7 +796,7 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); } - trace_trans_restart_relock(trans, _RET_IP_, buf.buf); + trace_trans_restart_relock(trans, ip, buf.buf); printbuf_exit(&buf); } @@ -806,7 +806,7 @@ out: bch2_trans_verify_locks(trans); } -static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) +static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace, ulong ip) { bch2_trans_verify_locks(trans); @@ -825,7 +825,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) if (path->should_be_locked && (ret = btree_path_get_locks(trans, path, false, &f, BCH_ERR_transaction_restart_relock))) { - bch2_trans_relock_fail(trans, path, &f, trace); + bch2_trans_relock_fail(trans, path, &f, trace, ip); return ret; } } @@ -838,12 +838,12 @@ out: int bch2_trans_relock(struct btree_trans *trans) { - return __bch2_trans_relock(trans, true); + return __bch2_trans_relock(trans, true, _RET_IP_); } int bch2_trans_relock_notrace(struct btree_trans *trans) { - return __bch2_trans_relock(trans, false); + return __bch2_trans_relock(trans, false, _RET_IP_); } void bch2_trans_unlock(struct btree_trans *trans) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index a35847734a60..23d8c62ea4b6 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -521,8 +521,12 @@ bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b) return false; } -bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) +int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree) { + int ret = bch2_run_print_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); + if (ret) + return ret; + struct found_btree_node search = { .btree_id = btree, .level = 0, diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h index 08687b209787..66e6f9ed19d0 100644 --- a/fs/bcachefs/btree_node_scan.h +++ b/fs/bcachefs/btree_node_scan.h @@ -4,7 +4,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *); bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *); -bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id); +int bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id); int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos); void bch2_find_btree_nodes_exit(struct find_btree_nodes *); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index d9710801e3ee..639ef75b3dbd 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -595,12 +595,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, int ret = 0; bch2_trans_verify_not_unlocked_or_in_restart(trans); - +#if 0 + /* todo: bring back dynamic fault injection */ if (race_fault()) { trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); } - +#endif /* * Check if the insert will fit in the leaf node with the write lock * held, otherwise another thread could write the node changing the @@ -757,6 +758,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, btree_trans_journal_entries_start(trans), trans->journal_entries.u64s); + EBUG_ON(trans->journal_res.u64s < trans->journal_entries.u64s); + trans->journal_res.offset += trans->journal_entries.u64s; trans->journal_res.u64s -= trans->journal_entries.u64s; @@ -1003,6 +1006,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) { struct btree_insert_entry *errored_at = NULL; struct bch_fs *c = trans->c; + unsigned journal_u64s = 0; int ret = 0; bch2_trans_verify_not_unlocked_or_in_restart(trans); @@ -1031,10 +1035,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); - trans->journal_u64s = trans->journal_entries.u64s + jset_u64s(trans->accounting.u64s); + journal_u64s = jset_u64s(trans->accounting.u64s); trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); if (trans->journal_transaction_names) - trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); + journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); trans_for_each_update(trans, i) { struct btree_path *path = trans->paths + i->path; @@ -1054,11 +1058,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) continue; /* we're going to journal the key being updated: */ - trans->journal_u64s += jset_u64s(i->k->k.u64s); + journal_u64s += jset_u64s(i->k->k.u64s); /* and we're also going to log the overwrite: */ if (trans->journal_transaction_names) - trans->journal_u64s += jset_u64s(i->old_k.u64s); + journal_u64s += jset_u64s(i->old_k.u64s); } if (trans->extra_disk_res) { @@ -1076,6 +1080,8 @@ retry: memset(&trans->journal_res, 0, sizeof(trans->journal_res)); memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); + trans->journal_u64s = journal_u64s + trans->journal_entries.u64s; + ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_); /* make sure we didn't drop or screw up locks: */ diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 3aa4a602bd02..112170fd9c8f 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -497,6 +497,7 @@ struct btree_trans { void *mem; unsigned mem_top; unsigned mem_bytes; + unsigned realloc_bytes_required; #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE darray_trans_kmalloc_trace trans_kmalloc_trace; #endif diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index e97e78c10f49..ee657b9f4b96 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -549,20 +549,26 @@ void *__bch2_trans_subbuf_alloc(struct btree_trans *trans, unsigned u64s) { unsigned new_top = buf->u64s + u64s; - unsigned old_size = buf->size; + unsigned new_size = buf->size; - if (new_top > buf->size) - buf->size = roundup_pow_of_two(new_top); + BUG_ON(roundup_pow_of_two(new_top) > U16_MAX); - void *n = bch2_trans_kmalloc_nomemzero(trans, buf->size * sizeof(u64)); + if (new_top > new_size) + new_size = roundup_pow_of_two(new_top); + + void *n = bch2_trans_kmalloc_nomemzero(trans, new_size * sizeof(u64)); if (IS_ERR(n)) return n; + unsigned offset = (u64 *) n - (u64 *) trans->mem; + BUG_ON(offset > U16_MAX); + if (buf->u64s) memcpy(n, btree_trans_subbuf_base(trans, buf), - old_size * sizeof(u64)); + buf->size * sizeof(u64)); buf->base = (u64 *) n - (u64 *) trans->mem; + buf->size = new_size; void *p = btree_trans_subbuf_top(trans, buf); buf->u64s = new_top; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 9feef1dc4de5..0b98ab959719 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -170,8 +170,7 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *); -int bch2_btree_write_buffer_insert_err(struct btree_trans *, - enum btree_id, struct bkey_i *); +int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *); static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, @@ -182,7 +181,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); if (unlikely(!btree_type_uses_write_buffer(btree))) { - int ret = bch2_btree_write_buffer_insert_err(trans, btree, k); + int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k); dump_stack(); return ret; } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index e77584607f0d..553059b33bfd 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1287,10 +1287,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, do { ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl); - + if (!bch2_err_matches(ret, BCH_ERR_operation_blocked)) + break; bch2_trans_unlock(trans); bch2_wait_on_allocator(c, &cl); - } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); + } while (1); } if (ret) { @@ -2293,9 +2294,9 @@ err: goto out; } -static int bch2_btree_node_rewrite_key(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_i *k, unsigned flags) +int bch2_btree_node_rewrite_key(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_i *k, unsigned flags) { struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, @@ -2367,9 +2368,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work) int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans, a->btree_id, a->level, a->key.k, 0)); - if (ret != -ENOENT && - !bch2_err_matches(ret, EROFS) && - ret != -BCH_ERR_journal_shutdown) + if (!bch2_err_matches(ret, ENOENT) && + !bch2_err_matches(ret, EROFS)) bch_err_fn_ratelimited(c, ret); spin_lock(&c->btree_node_rewrites_lock); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index b649c36c3fbb..ac04e45a8515 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -176,6 +176,9 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, struct btree *, unsigned, unsigned); +int bch2_btree_node_rewrite_key(struct btree_trans *, + enum btree_id, unsigned, + struct bkey_i *, unsigned); int bch2_btree_node_rewrite_pos(struct btree_trans *, enum btree_id, unsigned, struct bpos, unsigned, unsigned); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 90b21e61d2b6..4b095235a0d2 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -267,10 +267,9 @@ out: BUG_ON(wb->sorted.size < wb->flushing.keys.nr); } -int bch2_btree_write_buffer_insert_err(struct btree_trans *trans, +int bch2_btree_write_buffer_insert_err(struct bch_fs *c, enum btree_id btree, struct bkey_i *k) { - struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; prt_printf(&buf, "attempting to do write buffer update on non wb btree="); @@ -332,7 +331,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; if (unlikely(!btree_type_uses_write_buffer(k->btree))) { - ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k); + ret = bch2_btree_write_buffer_insert_err(trans->c, k->btree, &k->k); goto err; } @@ -676,6 +675,9 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, goto err; bch2_bkey_buf_copy(last_flushed, c, tmp.k); + + /* can we avoid the unconditional restart? */ + trace_and_count(c, trans_restart_write_buffer_flush, trans, _RET_IP_); ret = bch_err_throw(c, transaction_restart_write_buffer_flush); } err: diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index 05f56fd1eed0..c351d21aca0b 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -89,6 +89,12 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, struct journal_keys_to_wb *dst, enum btree_id btree, struct bkey_i *k) { + if (unlikely(!btree_type_uses_write_buffer(btree))) { + int ret = bch2_btree_write_buffer_insert_err(c, btree, k); + dump_stack(); + return ret; + } + EBUG_ON(!dst->seq); return k->k.type == KEY_TYPE_accounting diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index fde3c2380e28..5ea89aa2b0c4 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -319,6 +319,7 @@ static int bch2_data_thread(void *arg) ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done; ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done; } + enumerated_ref_put(&ctx->c->writes, BCH_WRITE_REF_ioctl_data); return 0; } @@ -378,15 +379,24 @@ static long bch2_ioctl_data(struct bch_fs *c, struct bch_data_ctx *ctx; int ret; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_ioctl_data)) + return -EROFS; - if (arg.op >= BCH_DATA_OP_NR || arg.flags) - return -EINVAL; + if (!capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto put_ref; + } + + if (arg.op >= BCH_DATA_OP_NR || arg.flags) { + ret = -EINVAL; + goto put_ref; + } ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + if (!ctx) { + ret = -ENOMEM; + goto put_ref; + } ctx->c = c; ctx->arg = arg; @@ -395,7 +405,12 @@ static long bch2_ioctl_data(struct bch_fs *c, &bcachefs_data_ops, bch2_data_thread); if (ret < 0) - kfree(ctx); + goto cleanup; + return ret; +cleanup: + kfree(ctx); +put_ref: + enumerated_ref_put(&c->writes, BCH_WRITE_REF_ioctl_data); return ret; } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5f1174348974..e848e210a9bf 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -249,6 +249,7 @@ static int data_update_invalid_bkey(struct data_update *m, bch2_bkey_val_to_text(&buf, c, k); prt_str(&buf, "\nnew: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + prt_newline(&buf); bch2_fs_emergency_read_only2(c, &buf); diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 300f7cc8abdf..a18d0f78704d 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -18,7 +18,9 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); -#ifdef CONFIG_UNICODE + if (!bch2_fs_casefold_enabled(trans->c)) + return -EOPNOTSUPP; + unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1); int ret = PTR_ERR_OR_ZERO(buf); if (ret) @@ -30,9 +32,6 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, *out_cf = (struct qstr) QSTR_INIT(buf, ret); return 0; -#else - return -EOPNOTSUPP; -#endif } static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) @@ -231,7 +230,8 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type)); } -int bch2_dirent_init_name(struct bkey_i_dirent *dirent, +int bch2_dirent_init_name(struct bch_fs *c, + struct bkey_i_dirent *dirent, const struct bch_hash_info *hash_info, const struct qstr *name, const struct qstr *cf_name) @@ -251,7 +251,9 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent, offsetof(struct bch_dirent, d_name) - name->len); } else { -#ifdef CONFIG_UNICODE + if (!bch2_fs_casefold_enabled(c)) + return -EOPNOTSUPP; + memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len]; @@ -277,9 +279,6 @@ int bch2_dirent_init_name(struct bkey_i_dirent *dirent, dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len); EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len); -#else - return -EOPNOTSUPP; -#endif } unsigned u64s = dirent_val_u64s(name->len, cf_len); @@ -313,7 +312,7 @@ struct bkey_i_dirent *bch2_dirent_create_key(struct btree_trans *trans, dirent->v.d_type = type; dirent->v.d_unused = 0; - int ret = bch2_dirent_init_name(dirent, hash_info, name, cf_name); + int ret = bch2_dirent_init_name(trans->c, dirent, hash_info, name, cf_name); if (ret) return ERR_PTR(ret); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 70fb0b581221..1e17199cc5c7 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -59,7 +59,8 @@ static inline void dirent_copy_target(struct bkey_i_dirent *dst, dst->v.d_type = src.v->d_type; } -int bch2_dirent_init_name(struct bkey_i_dirent *, +int bch2_dirent_init_name(struct bch_fs *, + struct bkey_i_dirent *, const struct bch_hash_info *, const struct qstr *, const struct qstr *); diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index ac3264134a15..86a842f1e88e 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -137,7 +137,6 @@ x(BCH_ERR_transaction_restart, transaction_restart_relock) \ x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \ x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \ - x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \ x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \ x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \ x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \ @@ -148,11 +147,8 @@ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\ x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\ x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \ - x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \ - x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\ - x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \ x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \ x(BCH_ERR_transaction_restart, transaction_restart_nested) \ @@ -241,7 +237,6 @@ x(BCH_ERR_journal_res_blocked, journal_buf_enomem) \ x(BCH_ERR_journal_res_blocked, journal_stuck) \ x(BCH_ERR_journal_res_blocked, journal_retry_open) \ - x(BCH_ERR_journal_res_blocked, journal_preres_get_blocked) \ x(BCH_ERR_journal_res_blocked, bucket_alloc_blocked) \ x(BCH_ERR_journal_res_blocked, stripe_alloc_blocked) \ x(BCH_ERR_invalid, invalid_sb) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a8ec6aae5738..b2a6c041e165 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -621,7 +621,9 @@ print: if (s) s->ret = ret; - if (trans) + if (trans && + !(flags & FSCK_ERR_NO_LOG) && + ret == -BCH_ERR_fsck_fix) ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret; err_unlock: mutex_unlock(&c->fsck_error_msgs_lock); diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index b899ee75f5b9..e76e58a568bf 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -139,6 +139,17 @@ int bch2_extent_trim_atomic(struct btree_trans *trans, if (ret) return ret; - bch2_cut_back(end, k); + /* tracepoint */ + + if (bpos_lt(end, k->k.p)) { + if (trace_extent_trim_atomic_enabled()) { + CLASS(printbuf, buf)(); + bch2_bpos_to_text(&buf, end); + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k)); + trace_extent_trim_atomic(trans->c, buf.buf); + } + bch2_cut_back(end, k); + } return 0; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3063a8ddc2df..e54e4f255b22 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -722,7 +722,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, if (IS_ERR(inode)) inode = NULL; -#ifdef CONFIG_UNICODE if (!inode && IS_CASEFOLDED(vdir)) { /* * Do not cache a negative dentry in casefolded directories @@ -737,7 +736,6 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, */ return NULL; } -#endif return d_splice_alias(&inode->v, dentry); } @@ -1732,7 +1730,8 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); - return ret; + + return bch2_err_class(ret); } static const struct file_operations bch_file_operations = { @@ -2565,9 +2564,10 @@ got_sb: sb->s_shrink->seeks = 0; #ifdef CONFIG_UNICODE - sb->s_encoding = c->cf_encoding; -#endif + if (bch2_fs_casefold_enabled(c)) + sb->s_encoding = c->cf_encoding; generic_set_sb_d_ops(sb); +#endif vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 68ed69a255e1..dbf161e4311a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -327,7 +327,8 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) (inode->bi_flags & BCH_INODE_has_child_snapshot)) return false; - return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked); + return !bch2_inode_has_backpointer(inode) && + !(inode->bi_flags & BCH_INODE_unlinked); } static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot) @@ -372,6 +373,18 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (inode->bi_subvol) { inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; + struct btree_iter subvol_iter; + struct bkey_i_subvolume *subvol = + bch2_bkey_get_mut_typed(trans, &subvol_iter, + BTREE_ID_subvolumes, POS(0, inode->bi_subvol), + 0, subvolume); + ret = PTR_ERR_OR_ZERO(subvol); + if (ret) + return ret; + + subvol->v.fs_path_parent = BCACHEFS_ROOT_SUBVOL; + bch2_trans_iter_exit(trans, &subvol_iter); + u64 root_inum; ret = subvol_lookup(trans, inode->bi_parent_subvol, &dirent_snapshot, &root_inum); @@ -387,6 +400,8 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (ret) return ret; + bch_verbose(c, "got lostfound inum %llu", lostfound.bi_inum); + lostfound.bi_nlink += S_ISDIR(inode->bi_mode); /* ensure lost+found inode is also present in inode snapshot */ @@ -423,6 +438,16 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * if (ret) return ret; + { + CLASS(printbuf, buf)(); + ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, + inode->bi_snapshot, NULL, &buf); + if (ret) + return ret; + + bch_info(c, "reattached at %s", buf.buf); + } + /* * Fix up inodes in child snapshots: if they should also be reattached * update the backpointer field, if they should not be we need to emit @@ -490,13 +515,21 @@ static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans, struct bch_inode_unpacked *inode) { - if (!inode->bi_dir) + if (!bch2_inode_has_backpointer(inode)) return 0; + u32 snapshot = inode->bi_snapshot; + + if (inode->bi_parent_subvol) { + int ret = bch2_subvolume_get_snapshot(trans, inode->bi_parent_subvol, &snapshot); + if (ret) + return ret; + } + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, - SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot)); + SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot)); int ret = bkey_err(d) ?: dirent_points_to_inode(c, d, inode) ?: bch2_fsck_remove_dirent(trans, d.k->p); @@ -695,14 +728,8 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, u32 id, u32 ancestor) { - ssize_t i; - EBUG_ON(id > ancestor); - /* @ancestor should be the snapshot most recently added to @seen */ - EBUG_ON(ancestor != seen->pos.snapshot); - EBUG_ON(ancestor != darray_last(seen->ids)); - if (id == ancestor) return true; @@ -718,11 +745,8 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see * numerically, since snapshot ID lists are kept sorted, so if we find * an id that's an ancestor of @id we're done: */ - - for (i = seen->ids.nr - 2; - i >= 0 && seen->ids.data[i] >= id; - --i) - if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) + darray_for_each_reverse(seen->ids, i) + if (*i != ancestor && bch2_snapshot_is_ancestor(c, id, *i)) return false; return true; @@ -806,7 +830,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w, if (!n->whiteout) { return bch2_inode_unpack(inode, &n->inode); } else { - n->inode.bi_inum = inode.k->p.inode; + n->inode.bi_inum = inode.k->p.offset; n->inode.bi_snapshot = inode.k->p.snapshot; return 0; } @@ -903,17 +927,15 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - struct bch_inode_unpacked new = i->inode; - struct bkey_i whiteout; - - new.bi_snapshot = k.k->p.snapshot; - if (!i->whiteout) { + struct bch_inode_unpacked new = i->inode; + new.bi_snapshot = k.k->p.snapshot; ret = __bch2_fsck_write_inode(trans, &new); } else { + struct bkey_i whiteout; bkey_init(&whiteout.k); whiteout.k.type = KEY_TYPE_whiteout; - whiteout.k.p = SPOS(0, i->inode.bi_inum, i->inode.bi_snapshot); + whiteout.k.p = SPOS(0, i->inode.bi_inum, k.k->p.snapshot); ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, &whiteout, BTREE_UPDATE_internal_snapshot_node); @@ -1135,13 +1157,14 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; - if (u.bi_dir || u.bi_dir_offset) { + if (bch2_inode_has_backpointer(&u)) { ret = check_inode_dirent_inode(trans, &u, &do_update); if (ret) goto err; } - if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked), + if (fsck_err_on(bch2_inode_has_backpointer(&u) && + (u.bi_flags & BCH_INODE_unlinked), trans, inode_unlinked_but_has_dirent, "inode unlinked but has dirent\n%s", (printbuf_reset(&buf), @@ -1438,6 +1461,7 @@ static int check_key_has_inode(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; + struct btree_iter iter2 = {}; int ret = PTR_ERR_OR_ZERO(i); if (ret) return ret; @@ -1447,40 +1471,105 @@ static int check_key_has_inode(struct btree_trans *trans, bool have_inode = i && !i->whiteout; - if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { - ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); - if (ret) - goto err; + if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) + goto reconstruct; - inode->last_pos.inode--; - ret = bch_err_throw(c, transaction_restart_nested); - goto err; + if (have_inode && btree_matches_i_mode(iter->btree_id, i->inode.bi_mode)) + goto out; + + prt_printf(&buf, ", "); + + bool have_old_inode = false; + darray_for_each(inode->inodes, i2) + if (!i2->whiteout && + bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i2->inode.bi_snapshot) && + btree_matches_i_mode(iter->btree_id, i2->inode.bi_mode)) { + prt_printf(&buf, "but found good inode in older snapshot\n"); + bch2_inode_unpacked_to_text(&buf, &i2->inode); + prt_newline(&buf); + have_old_inode = true; + break; + } + + struct bkey_s_c k2; + unsigned nr_keys = 0; + + prt_printf(&buf, "found keys:\n"); + + for_each_btree_key_max_norestart(trans, iter2, iter->btree_id, + SPOS(k.k->p.inode, 0, k.k->p.snapshot), + POS(k.k->p.inode, U64_MAX), + 0, k2, ret) { + nr_keys++; + if (nr_keys <= 10) { + bch2_bkey_val_to_text(&buf, c, k2); + prt_newline(&buf); + } + if (nr_keys >= 100) + break; } - if (fsck_err_on(!have_inode, - trans, key_in_missing_inode, - "key in missing inode:\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - goto delete; + if (ret) + goto err; - if (fsck_err_on(have_inode && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), - trans, key_in_wrong_inode_type, - "key for wrong inode mode %o:\n%s", - i->inode.bi_mode, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - goto delete; + if (nr_keys > 100) + prt_printf(&buf, "found > %u keys for this missing inode\n", nr_keys); + else if (nr_keys > 10) + prt_printf(&buf, "found %u keys for this missing inode\n", nr_keys); + + if (!have_inode) { + if (fsck_err_on(!have_inode, + trans, key_in_missing_inode, + "key in missing inode%s", buf.buf)) { + /* + * Maybe a deletion that raced with data move, or something + * weird like that? But if we know the inode was deleted, or + * it's just a few keys, we can safely delete them. + * + * If it's many keys, we should probably recreate the inode + */ + if (have_old_inode || nr_keys <= 2) + goto delete; + else + goto reconstruct; + } + } else { + /* + * not autofix, this one would be a giant wtf - bit error in the + * inode corrupting i_mode? + * + * may want to try repairing inode instead of deleting + */ + if (fsck_err_on(!btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), + trans, key_in_wrong_inode_type, + "key for wrong inode mode %o%s", + i->inode.bi_mode, buf.buf)) + goto delete; + } out: err: fsck_err: + bch2_trans_iter_exit(trans, &iter2); printbuf_exit(&buf); bch_err_fn(c, ret); return ret; delete: + /* + * XXX: print out more info + * count up extents for this inode, check if we have different inode in + * an older snapshot version, perhaps decide if we want to reconstitute + */ ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); goto out; +reconstruct: + ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret) + goto err; + + inode->last_pos.inode--; + ret = bch_err_throw(c, transaction_restart_nested); + goto out; } static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) @@ -1822,18 +1911,39 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, !key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot)) continue; - if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && + u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9; + + if (fsck_err_on(k.k->p.offset > last_block && !bkey_extent_is_reservation(k), trans, extent_past_end_of_inode, "extent type past end of inode %llu:%u, i_size %llu\n%s", i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - struct btree_iter iter2; + struct bkey_i *whiteout = bch2_trans_kmalloc(trans, sizeof(*whiteout)); + ret = PTR_ERR_OR_ZERO(whiteout); + if (ret) + goto err; + + bkey_init(&whiteout->k); + whiteout->k.p = SPOS(k.k->p.inode, + last_block, + i->inode.bi_snapshot); + bch2_key_resize(&whiteout->k, + min(KEY_SIZE_MAX & (~0 << c->block_bits), + U64_MAX - whiteout->k.p.offset)); + - bch2_trans_copy_iter(trans, &iter2, iter); - bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot); + /* + * Need a normal (not BTREE_ITER_all_snapshots) + * iterator, if we're deleting in a different + * snapshot and need to emit a whiteout + */ + struct btree_iter iter2; + bch2_trans_iter_init(trans, &iter2, BTREE_ID_extents, + bkey_start_pos(&whiteout->k), + BTREE_ITER_intent); ret = bch2_btree_iter_traverse(trans, &iter2) ?: - bch2_btree_delete_at(trans, &iter2, + bch2_trans_update(trans, &iter2, whiteout, BTREE_UPDATE_internal_snapshot_node); bch2_trans_iter_exit(trans, &iter2); if (ret) @@ -1949,14 +2059,22 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ continue; } - if (fsck_err_on(i->inode.bi_nlink != i->count, - trans, inode_dir_wrong_nlink, - "directory %llu:%u with wrong i_nlink: got %u, should be %llu", - w->last_pos.inode, i->inode.bi_snapshot, i->inode.bi_nlink, i->count)) { - i->inode.bi_nlink = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode); - if (ret) - break; + if (i->inode.bi_nlink != i->count) { + CLASS(printbuf, buf)(); + + lockrestart_do(trans, + bch2_inum_snapshot_to_path(trans, w->last_pos.inode, + i->inode.bi_snapshot, NULL, &buf)); + + if (fsck_err_on(i->inode.bi_nlink != i->count, + trans, inode_dir_wrong_nlink, + "directory with wrong i_nlink: got %u, should be %llu\n%s", + i->inode.bi_nlink, i->count, buf.buf)) { + i->inode.bi_nlink = i->count; + ret = bch2_fsck_write_inode(trans, &i->inode); + if (ret) + break; + } } } fsck_err: @@ -2184,9 +2302,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &i->inode); dir->first_this_inode = false; -#ifdef CONFIG_UNICODE hash_info->cf_encoding = bch2_inode_casefold(c, &i->inode) ? c->cf_encoding : NULL; -#endif ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k, need_second_pass); @@ -2493,6 +2609,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, if (k.k->type != KEY_TYPE_subvolume) return 0; + subvol_inum start = { + .subvol = k.k->p.offset, + .inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode), + }; + while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { ret = darray_push(&subvol_path, k.k->p.offset); if (ret) @@ -2511,11 +2632,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, if (darray_u32_has(&subvol_path, parent)) { printbuf_reset(&buf); - prt_printf(&buf, "subvolume loop:\n"); + prt_printf(&buf, "subvolume loop: "); - darray_for_each_reverse(subvol_path, i) - prt_printf(&buf, "%u ", *i); - prt_printf(&buf, "%u", parent); + ret = bch2_inum_to_path(trans, start, &buf); + if (ret) + goto err; if (fsck_err(trans, subvol_loop, "%s", buf.buf)) ret = reattach_subvol(trans, s); @@ -2559,19 +2680,13 @@ int bch2_check_subvolume_structure(struct bch_fs *c) return ret; } -struct pathbuf_entry { - u64 inum; - u32 snapshot; -}; - -typedef DARRAY(struct pathbuf_entry) pathbuf; - -static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p, +static int bch2_bi_depth_renumber_one(struct btree_trans *trans, + u64 inum, u32 snapshot, u32 new_depth) { struct btree_iter iter; struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, p->inum, p->snapshot), 0); + SPOS(0, inum, snapshot), 0); struct bch_inode_unpacked inode; int ret = bkey_err(k) ?: @@ -2590,14 +2705,15 @@ err: return ret; } -static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth) +static int bch2_bi_depth_renumber(struct btree_trans *trans, darray_u64 *path, + u32 snapshot, u32 new_bi_depth) { u32 restart_count = trans->restart_count; int ret = 0; darray_for_each_reverse(*path, i) { ret = nested_lockrestart_do(trans, - bch2_bi_depth_renumber_one(trans, i, new_bi_depth)); + bch2_bi_depth_renumber_one(trans, *i, snapshot, new_bi_depth)); bch_err_fn(trans->c, ret); if (ret) break; @@ -2608,37 +2724,36 @@ static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 return ret ?: trans_was_restarted(trans, restart_count); } -static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) -{ - darray_for_each(*p, i) - if (i->inum == inum && - i->snapshot == snapshot) - return true; - return false; -} - static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) { struct bch_fs *c = trans->c; struct btree_iter inode_iter = {}; - pathbuf path = {}; + darray_u64 path = {}; struct printbuf buf = PRINTBUF; u32 snapshot = inode_k.k->p.snapshot; bool redo_bi_depth = false; u32 min_bi_depth = U32_MAX; int ret = 0; + struct bpos start = inode_k.k->p; + struct bch_inode_unpacked inode; ret = bch2_inode_unpack(inode_k, &inode); if (ret) return ret; - while (!inode.bi_subvol) { + /* + * If we're running full fsck, check_dirents() will have already ran, + * and we shouldn't see any missing backpointers here - otherwise that's + * handled separately, by check_unreachable_inodes + */ + while (!inode.bi_subvol && + bch2_inode_has_backpointer(&inode)) { struct btree_iter dirent_iter; struct bkey_s_c_dirent d; - u32 parent_snapshot = snapshot; - d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); + d = dirent_get_by_pos(trans, &dirent_iter, + SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot)); ret = bkey_err(d.s_c); if (ret && !bch2_err_matches(ret, ENOENT)) goto out; @@ -2656,15 +2771,10 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) bch2_trans_iter_exit(trans, &dirent_iter); - ret = darray_push(&path, ((struct pathbuf_entry) { - .inum = inode.bi_inum, - .snapshot = snapshot, - })); + ret = darray_push(&path, inode.bi_inum); if (ret) return ret; - snapshot = parent_snapshot; - bch2_trans_iter_exit(trans, &inode_iter); inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, SPOS(0, inode.bi_dir, snapshot), 0); @@ -2686,21 +2796,28 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) break; inode = parent_inode; - snapshot = inode_k.k->p.snapshot; redo_bi_depth = true; - if (path_is_dup(&path, inode.bi_inum, snapshot)) { + if (darray_find(path, inode.bi_inum)) { printbuf_reset(&buf); - prt_printf(&buf, "directory structure loop:\n"); - darray_for_each_reverse(path, i) - prt_printf(&buf, "%llu:%u ", i->inum, i->snapshot); - prt_printf(&buf, "%llu:%u", inode.bi_inum, snapshot); + prt_printf(&buf, "directory structure loop in snapshot %u: ", + snapshot); + + ret = bch2_inum_snapshot_to_path(trans, start.offset, start.snapshot, NULL, &buf); + if (ret) + goto out; + + if (c->opts.verbose) { + prt_newline(&buf); + darray_for_each(path, i) + prt_printf(&buf, "%llu ", *i); + } if (fsck_err(trans, dir_loop, "%s", buf.buf)) { ret = remove_backpointer(trans, &inode); bch_err_msg(c, ret, "removing dirent"); if (ret) - break; + goto out; ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); @@ -2714,7 +2831,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) min_bi_depth = 0; if (redo_bi_depth) - ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth); + ret = bch2_bi_depth_renumber(trans, &path, snapshot, min_bi_depth); out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); @@ -2731,7 +2848,7 @@ fsck_err: int bch2_check_directory_structure(struct bch_fs *c) { int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, + for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_intent| BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 53e5dc1f6ac1..ef4cc7395b86 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1265,7 +1265,14 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, { struct bch_fs *c = trans->c; -#ifdef CONFIG_UNICODE +#ifndef CONFIG_UNICODE + bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); + return -EOPNOTSUPP; +#endif + + if (c->opts.casefold_disabled) + return -EOPNOTSUPP; + int ret = 0; /* Not supported on individual files. */ if (!S_ISDIR(bi->bi_mode)) @@ -1289,10 +1296,6 @@ int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, bi->bi_fields_set |= BIT(Inode_opt_casefold); return bch2_maybe_propagate_has_case_insensitive(trans, inum, bi); -#else - bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); - return -EOPNOTSUPP; -#endif } static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 82cec2836cbd..b8ec3e628d90 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -254,6 +254,11 @@ static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_ : c->opts.casefold; } +static inline bool bch2_inode_has_backpointer(const struct bch_inode_unpacked *bi) +{ + return bi->bi_dir || bi->bi_dir_offset; +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 04bbdcf58e40..cd184b219a65 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -1491,7 +1491,12 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) prt_printf(out, "have_ioref:\t%u\n", rbio->have_ioref); prt_printf(out, "narrow_crcs:\t%u\n", rbio->narrow_crcs); prt_printf(out, "context:\t%u\n", rbio->context); - prt_printf(out, "ret:\t%s\n", bch2_err_str(rbio->ret)); + + int ret = READ_ONCE(rbio->ret); + if (ret < 0) + prt_printf(out, "ret:\t%s\n", bch2_err_str(ret)); + else + prt_printf(out, "ret:\t%i\n", ret); prt_printf(out, "flags:\t"); bch2_prt_bitflags(out, bch2_read_bio_flags, rbio->flags); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index dda802a656cf..f22b05e02c1e 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1283,7 +1283,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca ret = 0; /* wait and retry */ bch2_disk_reservation_put(c, &disk_res); - closure_sync(&cl); + bch2_wait_on_allocator(c, &cl); } return ret; @@ -1474,14 +1474,13 @@ void bch2_fs_journal_stop(struct journal *j) clear_bit(JOURNAL_running, &j->flags); } -int bch2_fs_journal_start(struct journal *j, u64 cur_seq) +int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_entry_pin_list *p; struct journal_replay *i, **_i; struct genradix_iter iter; bool had_entries = false; - u64 last_seq = cur_seq, nr, seq; /* * @@ -1495,17 +1494,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) return -EINVAL; } - genradix_for_each_reverse(&c->journal_entries, iter, _i) { - i = *_i; - - if (journal_replay_ignore(i)) - continue; + /* Clean filesystem? */ + if (!last_seq) + last_seq = cur_seq; - last_seq = le64_to_cpu(i->j.last_seq); - break; - } - - nr = cur_seq - last_seq; + u64 nr = cur_seq - last_seq; /* * Extra fudge factor, in case we crashed when the journal pin fifo was @@ -1532,6 +1525,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->pin.back = cur_seq; atomic64_set(&j->seq, cur_seq - 1); + u64 seq; fifo_for_each_entry_ptr(p, &j->pin, seq) journal_pin_list_init(p, 1); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 83734fe4331f..977907038d98 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -453,7 +453,7 @@ int bch2_fs_journal_alloc(struct bch_fs *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); -int bch2_fs_journal_start(struct journal *, u64); +int bch2_fs_journal_start(struct journal *, u64, u64); void bch2_journal_set_replay_done(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 0b15d71a8d2d..dd3f3434c1b0 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -160,6 +160,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, struct printbuf buf = PRINTBUF; int ret = JOURNAL_ENTRY_ADD_OK; + if (last_seq && c->opts.journal_rewind) + last_seq = min(last_seq, c->opts.journal_rewind); + if (!c->journal.oldest_seq_found_ondisk || le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk) c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq); @@ -1430,11 +1433,21 @@ int bch2_journal_read(struct bch_fs *c, printbuf_reset(&buf); prt_printf(&buf, "journal read done, replaying entries %llu-%llu", *last_seq, *blacklist_seq - 1); + + /* + * Drop blacklisted entries and entries older than last_seq (or start of + * journal rewind: + */ + u64 drop_before = *last_seq; + if (c->opts.journal_rewind) { + drop_before = min(drop_before, c->opts.journal_rewind); + prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + } + + *last_seq = drop_before; if (*start_seq != *blacklist_seq) prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); bch_info(c, "%s", buf.buf); - - /* Drop blacklisted entries and entries older than last_seq: */ genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; @@ -1442,7 +1455,7 @@ int bch2_journal_read(struct bch_fs *c, continue; seq = le64_to_cpu(i->j.seq); - if (seq < *last_seq) { + if (seq < drop_before) { journal_replay_free(c, i, false); continue; } @@ -1455,7 +1468,7 @@ int bch2_journal_read(struct bch_fs *c, } } - ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1); + ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1); if (ret) goto err; @@ -1703,9 +1716,10 @@ static CLOSURE_CALLBACK(journal_write_done) bch2_log_msg_start(c, &buf); if (err == -BCH_ERR_journal_write_err) - prt_printf(&buf, "unable to write journal to sufficient devices"); + prt_printf(&buf, "unable to write journal to sufficient devices\n"); else - prt_printf(&buf, "journal write error marking replicas: %s", bch2_err_str(err)); + prt_printf(&buf, "journal write error marking replicas: %s\n", + bch2_err_str(err)); bch2_fs_emergency_read_only2(c, &buf); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 779c22eb3979..c3f87c59922d 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -625,14 +625,26 @@ static int __bch2_inum_to_path(struct btree_trans *trans, { unsigned orig_pos = path->pos; int ret = 0; + DARRAY(subvol_inum) inums = {}; + + if (!snapshot) { + ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot); + if (ret) + goto disconnected; + } while (true) { - if (!snapshot) { - ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot); - if (ret) - goto disconnected; + subvol_inum n = (subvol_inum) { subvol ?: snapshot, inum }; + + if (darray_find_p(inums, i, i->subvol == n.subvol && i->inum == n.inum)) { + prt_str_reversed(path, "(loop)"); + break; } + ret = darray_push(&inums, n); + if (ret) + goto err; + struct bch_inode_unpacked inode; ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0); if (ret) @@ -650,7 +662,9 @@ static int __bch2_inum_to_path(struct btree_trans *trans, inum = inode.bi_dir; if (inode.bi_parent_subvol) { subvol = inode.bi_parent_subvol; - snapshot = 0; + ret = bch2_subvolume_get_snapshot(trans, inode.bi_parent_subvol, &snapshot); + if (ret) + goto disconnected; } struct btree_iter d_iter; @@ -662,6 +676,7 @@ static int __bch2_inum_to_path(struct btree_trans *trans, goto disconnected; struct qstr dirent_name = bch2_dirent_get_name(d); + prt_bytes_reversed(path, dirent_name.name, dirent_name.len); prt_char(path, '/'); @@ -677,8 +692,10 @@ out: goto err; reverse_bytes(path->buf + orig_pos, path->pos - orig_pos); + darray_exit(&inums); return 0; err: + darray_exit(&inums); return ret; disconnected: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -717,8 +734,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, if (inode_points_to_dirent(target, d)) return 0; - if (!target->bi_dir && - !target->bi_dir_offset) { + if (!bch2_inode_has_backpointer(target)) { fsck_err_on(S_ISDIR(target->bi_mode), trans, inode_dir_missing_backpointer, "directory with missing backpointer\n%s", diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 2a02606254b3..63f8e254495c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -234,6 +234,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_CASEFOLD, false, \ NULL, "Dirent lookups are casefolded") \ + x(casefold_disabled, u8, \ + OPT_FS|OPT_MOUNT, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, false, \ + NULL, "Disable casefolding filesystem wide") \ x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ @@ -379,6 +384,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Exit recovery immediately prior to journal replay")\ + x(journal_rewind, u64, \ + OPT_FS|OPT_MOUNT, \ + OPT_UINT(0, U64_MAX), \ + BCH2_NO_SB_OPT, 0, \ + NULL, "Rewind journal") \ x(recovery_passes, u64, \ OPT_FS|OPT_MOUNT, \ OPT_BITFIELD(bch2_recovery_passes), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0b21fa6ff062..d0b7e3a36a54 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -607,6 +607,7 @@ static int read_btree_roots(struct bch_fs *c) buf.buf, bch2_err_str(ret))) { if (btree_id_is_alloc(i)) r->error = 0; + ret = 0; } } @@ -692,7 +693,7 @@ static bool check_version_upgrade(struct bch_fs *c) ret = true; } - if (new_version > c->sb.version_incompat && + if (new_version > c->sb.version_incompat_allowed && c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { struct printbuf buf = PRINTBUF; @@ -757,6 +758,21 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.nochanges) c->opts.read_only = true; + if (c->opts.journal_rewind) { + bch_info(c, "rewinding journal, fsck required"); + c->opts.fsck = true; + } + + if (go_rw_in_recovery(c)) { + /* + * start workqueues/kworkers early - kthread creation checks for + * pending signals, which is _very_ annoying + */ + ret = bch2_fs_init_rw(c); + if (ret) + goto err; + } + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; @@ -965,7 +981,7 @@ use_clean: ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", journal_seq, last_seq, blacklist_seq - 1) ?: - bch2_fs_journal_start(&c->journal, journal_seq); + bch2_fs_journal_start(&c->journal, last_seq, journal_seq); if (ret) goto err; @@ -1126,7 +1142,7 @@ fsck_err: struct printbuf buf = PRINTBUF; bch2_log_msg_start(c, &buf); - prt_printf(&buf, "error in recovery: %s", bch2_err_str(ret)); + prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret)); bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); @@ -1181,7 +1197,7 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - ret = bch2_fs_journal_start(&c->journal, 1); + ret = bch2_fs_journal_start(&c->journal, 1, 1); if (ret) goto err; diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 35ac0d64d73a..c09ed2dd4639 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -217,11 +217,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr || - !c->opts.read_only || - !c->sb.clean || - c->opts.recovery_passes || - (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))) { + if (go_rw_in_recovery(c)) { if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); bch2_reconstruct_alloc(c); @@ -317,6 +313,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c, */ bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); + bool rewind = in_recovery && + r->curr_pass > pass && + !(r->passes_complete & BIT_ULL(pass)); if (persistent ? !(c->sb.recovery_passes_required & BIT_ULL(pass)) @@ -327,6 +326,9 @@ static bool recovery_pass_needs_set(struct bch_fs *c, (r->passes_ratelimiting & BIT_ULL(pass))) return true; + if (rewind) + return true; + return false; } @@ -341,7 +343,6 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, struct bch_fs_recovery *r = &c->recovery; int ret = 0; - lockdep_assert_held(&c->sb_lock); bch2_printbuf_make_room(out, 1024); @@ -412,10 +413,8 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, { int ret = 0; - scoped_guard(mutex, &c->sb_lock) { - if (!recovery_pass_needs_set(c, pass, &flags)) - return 0; - + if (recovery_pass_needs_set(c, pass, &flags)) { + guard(mutex)(&c->sb_lock); ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); bch2_write_super(c); } diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 260571c7105e..2117f0ce1922 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -17,6 +17,15 @@ enum bch_run_recovery_pass_flags { RUN_RECOVERY_PASS_ratelimit = BIT(1), }; +static inline bool go_rw_in_recovery(struct bch_fs *c) +{ + return (c->journal_keys.nr || + !c->opts.read_only || + !c->sb.clean || + c->opts.recovery_passes || + (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))); +} + int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index a535abd44df3..92b90cfe622b 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -64,6 +64,9 @@ void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, REFLINK_P_IDX(p.v), le32_to_cpu(p.v->front_pad), le32_to_cpu(p.v->back_pad)); + + if (REFLINK_P_ERROR(p.v)) + prt_str(out, " error"); } bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) @@ -269,13 +272,12 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, return k; if (unlikely(!bkey_extent_is_reflink_data(k.k))) { - unsigned size = min((u64) k.k->size, - REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) - - reflink_offset); - bch2_key_resize(&iter->k, size); + u64 missing_end = min(k.k->p.offset, + REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad)); + BUG_ON(reflink_offset == missing_end); int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, - k.k->p.offset, should_commit); + missing_end, should_commit); if (ret) { bch2_trans_iter_exit(trans, iter); return bkey_s_c_err(ret); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d06e73884871..d154b7651d28 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -3,9 +3,10 @@ #define _BCACHEFS_SB_ERRORS_FORMAT_H enum bch_fsck_flags { - FSCK_CAN_FIX = 1 << 0, - FSCK_CAN_IGNORE = 1 << 1, - FSCK_AUTOFIX = 1 << 2, + FSCK_CAN_FIX = BIT(0), + FSCK_CAN_IGNORE = BIT(1), + FSCK_AUTOFIX = BIT(2), + FSCK_ERR_NO_LOG = BIT(3), }; #define BCH_SB_ERRS() \ @@ -217,7 +218,7 @@ enum bch_fsck_flags { x(inode_str_hash_invalid, 194, 0) \ x(inode_v3_fields_start_bad, 195, 0) \ x(inode_snapshot_mismatch, 196, 0) \ - x(snapshot_key_missing_inode_snapshot, 314, 0) \ + x(snapshot_key_missing_inode_snapshot, 314, FSCK_AUTOFIX) \ x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ @@ -251,20 +252,20 @@ enum bch_fsck_flags { x(deleted_inode_not_unlinked, 214, FSCK_AUTOFIX) \ x(deleted_inode_has_child_snapshots, 288, FSCK_AUTOFIX) \ x(extent_overlapping, 215, 0) \ - x(key_in_missing_inode, 216, 0) \ + x(key_in_missing_inode, 216, FSCK_AUTOFIX) \ x(key_in_wrong_inode_type, 217, 0) \ - x(extent_past_end_of_inode, 218, 0) \ + x(extent_past_end_of_inode, 218, FSCK_AUTOFIX) \ x(dirent_empty_name, 219, 0) \ x(dirent_val_too_big, 220, 0) \ x(dirent_name_too_long, 221, 0) \ x(dirent_name_embedded_nul, 222, 0) \ x(dirent_name_dot_or_dotdot, 223, 0) \ x(dirent_name_has_slash, 224, 0) \ - x(dirent_d_type_wrong, 225, 0) \ + x(dirent_d_type_wrong, 225, FSCK_AUTOFIX) \ x(inode_bi_parent_wrong, 226, 0) \ x(dirent_in_missing_dir_inode, 227, 0) \ x(dirent_in_non_dir_inode, 228, 0) \ - x(dirent_to_missing_inode, 229, 0) \ + x(dirent_to_missing_inode, 229, FSCK_AUTOFIX) \ x(dirent_to_overwritten_inode, 302, 0) \ x(dirent_to_missing_subvol, 230, 0) \ x(dirent_to_itself, 231, 0) \ @@ -300,7 +301,7 @@ enum bch_fsck_flags { x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ - x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \ + x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ x(snapshot_node_missing, 264, FSCK_AUTOFIX) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ @@ -313,7 +314,7 @@ enum bch_fsck_flags { x(accounting_mismatch, 272, FSCK_AUTOFIX) \ x(accounting_replicas_not_marked, 273, 0) \ x(accounting_to_invalid_device, 289, 0) \ - x(invalid_btree_id, 274, 0) \ + x(invalid_btree_id, 274, FSCK_AUTOFIX) \ x(alloc_key_io_time_bad, 275, 0) \ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 23a332d76b32..4c43d2a2c1f5 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -135,7 +135,9 @@ static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor) bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { - bool ret; +#ifdef CONFIG_BCACHEFS_DEBUG + u32 orig_id = id; +#endif guard(rcu)(); struct snapshot_table *t = rcu_dereference(c->snapshots); @@ -147,11 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) while (id && id < ancestor - IS_ANCESTOR_BITMAP) id = get_ancestor_below(t, id, ancestor); - ret = id && id < ancestor + bool ret = id && id < ancestor ? test_ancestor_bitmap(t, id, ancestor) : id == ancestor; - EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); + EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, orig_id, ancestor)); return ret; } @@ -869,7 +871,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, 0, k, ret) { - if (le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { + if (k.k->type == KEY_TYPE_snapshot_tree && + le32_to_cpu(bkey_s_c_to_snapshot_tree(k).v->root_snapshot) == id) { tree_id = k.k->p.offset; break; } @@ -897,7 +900,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { - if (le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { + if (k.k->type == KEY_TYPE_subvolume && + le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot) == id) { snapshot->v.subvol = cpu_to_le32(k.k->p.offset); SET_BCH_SNAPSHOT_SUBVOL(&snapshot->v, true); break; diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index 71b735a85026..3e9f59226bdf 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -38,6 +38,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans, struct bkey_s_c_dirent old, bool *updated_before_k_pos) { + struct bch_fs *c = trans->c; struct qstr old_name = bch2_dirent_get_name(old); struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64)); int ret = PTR_ERR_OR_ZERO(new); @@ -60,7 +61,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans, sprintf(renamed_buf, "%.*s.fsck_renamed-%u", old_name.len, old_name.name, i)); - ret = bch2_dirent_init_name(new, hash_info, &renamed_name, NULL); + ret = bch2_dirent_init_name(c, new, hash_info, &renamed_name, NULL); if (ret) return ret; @@ -79,7 +80,7 @@ static int bch2_fsck_rename_dirent(struct btree_trans *trans, } ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); - bch_err_fn(trans->c, ret); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 79d51aef70aa..8979ac2d7a3b 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -48,9 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) struct bch_hash_info info = { .inum_snapshot = bi->bi_snapshot, .type = INODE_STR_HASH(bi), -#ifdef CONFIG_UNICODE .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, -#endif .siphash_key = { .k0 = bi->bi_hash_seed } }; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index a5b97c9c5163..c46b1053a02c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -210,7 +210,6 @@ static int bch2_dev_alloc(struct bch_fs *, unsigned); static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); static void bch2_dev_io_ref_stop(struct bch_dev *, int); static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); -static int bch2_fs_init_rw(struct bch_fs *); struct bch_fs *bch2_dev_to_fs(dev_t dev) { @@ -794,7 +793,7 @@ err: return ret; } -static int bch2_fs_init_rw(struct bch_fs *c) +int bch2_fs_init_rw(struct bch_fs *c) { if (test_bit(BCH_FS_rw_init_done, &c->flags)) return 0; @@ -1015,16 +1014,28 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, if (ret) goto err; + if (go_rw_in_recovery(c)) { + /* + * start workqueues/kworkers early - kthread creation checks for + * pending signals, which is _very_ annoying + */ + ret = bch2_fs_init_rw(c); + if (ret) + goto err; + } + #ifdef CONFIG_UNICODE - /* Default encoding until we can potentially have more as an option. */ - c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); - if (IS_ERR(c->cf_encoding)) { - printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - ret = -EINVAL; - goto err; + if (bch2_fs_casefold_enabled(c)) { + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } } #else if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { @@ -1151,12 +1162,11 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); -#ifdef CONFIG_UNICODE - bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); -#endif + if (c->cf_encoding) + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); if (!bch2_fs_may_start(c)) return bch_err_throw(c, insufficient_devices_to_start); diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index dc52f06cb2b9..e90bab9afe78 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -46,6 +46,7 @@ void __bch2_fs_stop(struct bch_fs *); void bch2_fs_free(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); +int bch2_fs_init_rw(struct bch_fs *); int bch2_fs_start(struct bch_fs *); struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index dc09532796af..9c5a9c551f03 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1080,34 +1080,14 @@ TRACE_EVENT(trans_blocked_journal_reclaim, __entry->must_wait) ); -TRACE_EVENT(trans_restart_journal_preres_get, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - unsigned flags), - TP_ARGS(trans, caller_ip, flags), - - TP_STRUCT__entry( - __array(char, trans_fn, 32 ) - __field(unsigned long, caller_ip ) - __field(unsigned, flags ) - ), - - TP_fast_assign( - strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); - __entry->caller_ip = caller_ip; - __entry->flags = flags; - ), - - TP_printk("%s %pS %x", __entry->trans_fn, - (void *) __entry->caller_ip, - __entry->flags) -); - +#if 0 +/* todo: bring back dynamic fault injection */ DEFINE_EVENT(transaction_event, trans_restart_fault_inject, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), TP_ARGS(trans, caller_ip) ); +#endif DEFINE_EVENT(transaction_event, trans_traverse_all, TP_PROTO(struct btree_trans *trans, @@ -1195,19 +1175,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path) -); - -DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) -); - DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -1229,13 +1196,6 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path), - TP_ARGS(trans, caller_ip, path) -); - DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, @@ -1294,44 +1254,6 @@ TRACE_EVENT(trans_restart_mem_realloced, __entry->bytes) ); -TRACE_EVENT(trans_restart_key_cache_key_realloced, - TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, - struct btree_path *path, - unsigned old_u64s, - unsigned new_u64s), - TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s), - - TP_STRUCT__entry( - __array(char, trans_fn, 32 ) - __field(unsigned long, caller_ip ) - __field(enum btree_id, btree_id ) - TRACE_BPOS_entries(pos) - __field(u32, old_u64s ) - __field(u32, new_u64s ) - ), - - TP_fast_assign( - strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); - __entry->caller_ip = caller_ip; - - __entry->btree_id = path->btree_id; - TRACE_BPOS_assign(pos, path->pos); - __entry->old_u64s = old_u64s; - __entry->new_u64s = new_u64s; - ), - - TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u", - __entry->trans_fn, - (void *) __entry->caller_ip, - bch2_btree_id_str(__entry->btree_id), - __entry->pos_inode, - __entry->pos_offset, - __entry->pos_snapshot, - __entry->old_u64s, - __entry->new_u64s) -); - DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), @@ -1490,6 +1412,31 @@ DEFINE_EVENT(fs_str, io_move_evacuate_bucket, TP_ARGS(c, str) ); +DEFINE_EVENT(fs_str, extent_trim_atomic, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_slot, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, __btree_iter_peek, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_max, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(fs_str, btree_iter_peek_prev_min, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + #ifdef CONFIG_BCACHEFS_PATH_TRACEPOINTS TRACE_EVENT(update_by_path, @@ -1902,21 +1849,6 @@ TRACE_EVENT(btree_path_free, __entry->dup_locked) ); -TRACE_EVENT(btree_path_free_trans_begin, - TP_PROTO(btree_path_idx_t path), - TP_ARGS(path), - - TP_STRUCT__entry( - __field(btree_path_idx_t, idx ) - ), - - TP_fast_assign( - __entry->idx = path; - ), - - TP_printk(" path %3u", __entry->idx) -); - #else /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ #ifndef _TRACE_BCACHEFS_H @@ -1934,7 +1866,6 @@ static inline void trace_btree_path_traverse_start(struct btree_trans *trans, st static inline void trace_btree_path_traverse_end(struct btree_trans *trans, struct btree_path *path) {} static inline void trace_btree_path_set_pos(struct btree_trans *trans, struct btree_path *path, struct bpos *new_pos) {} static inline void trace_btree_path_free(struct btree_trans *trans, btree_path_idx_t path, struct btree_path *dup) {} -static inline void trace_btree_path_free_trans_begin(btree_path_idx_t path) {} #endif #endif /* CONFIG_BCACHEFS_PATH_TRACEPOINTS */ diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 9de356bcb411..aa176cc9a324 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -83,6 +83,8 @@ enum btrfs_block_group_flags { BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, /* Does the block group need to be added to the free space tree? */ BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, + /* Set after we add a new block group to the free space tree. */ + BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, /* Indicate that the block group is placed on a sequential zone */ BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, /* diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index a3e2a2a81461..a83c268f7f87 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1241,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, { BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; + struct rb_node *node; int nr; int ret; @@ -1269,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, btrfs_release_path(path); } + node = rb_first_cached(&trans->fs_info->block_group_cache_tree); + while (node) { + struct btrfs_block_group *bg; + + bg = rb_entry(node, struct btrfs_block_group, cache_node); + clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); + node = rb_next(node); + cond_resched(); + } + return 0; } @@ -1358,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) block_group = rb_entry(node, struct btrfs_block_group, cache_node); + + if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, + &block_group->runtime_flags)) + goto next; + ret = populate_free_space_tree(trans, block_group); if (ret) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; } +next: if (btrfs_should_end_transaction(trans)) { btrfs_end_transaction(trans); trans = btrfs_start_transaction(free_space_root, 1); @@ -1390,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); + /* + * While rebuilding the free space tree we may allocate new metadata + * block groups while modifying the free space tree. + * + * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we + * can use multiple transactions, every time btrfs_end_transaction() is + * called at btrfs_rebuild_free_space_tree() we finish the creation of + * new block groups by calling btrfs_create_pending_block_groups(), and + * that in turn calls us, through add_block_group_free_space(), to add + * a free space info item and a free space extent item for the block + * group. + * + * Then later btrfs_rebuild_free_space_tree() may find such new block + * groups and processes them with populate_free_space_tree(), which can + * fail with EEXIST since there are already items for the block group in + * the free space tree. Notice that we say "may find" because a new + * block group may be added to the block groups rbtree in a node before + * or after the block group currently being processed by the rebuild + * process. So signal the rebuild process to skip such new block groups + * if it finds them. + */ + set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); + ret = add_new_free_space_info(trans, block_group, path); if (ret) return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 26d6ed170a19..fc66872b4c74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; int ret = 0; struct btrfs_trans_handle *trans; - u64 last_unlink_trans; struct fscrypt_name fname; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) @@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out_notrans; } + /* + * Propagate the last_unlink_trans value of the deleted dir to its + * parent directory. This is to prevent an unrecoverable log tree in the + * case we do something like this: + * 1) create dir foo + * 2) create snapshot under dir foo + * 3) delete the snapshot + * 4) rmdir foo + * 5) mkdir foo + * 6) fsync foo or some file inside foo + * + * This is because we can't unlink other roots when replaying the dir + * deletes for directory foo. + */ + if (BTRFS_I(inode)->last_unlink_trans >= trans->transid) + btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); + if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry); goto out; @@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (ret) goto out; - last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; - /* now the directory is empty */ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), &fname.disk_name); - if (!ret) { + if (!ret) btrfs_i_size_write(BTRFS_I(inode), 0); - /* - * Propagate the last_unlink_trans value of the deleted dir to - * its parent directory. This is to prevent an unrecoverable - * log tree in the case we do something like this: - * 1) create dir foo - * 2) create snapshot under dir foo - * 3) delete the snapshot - * 4) rmdir foo - * 5) mkdir foo - * 6) fsync foo or some file inside foo - */ - if (last_unlink_trans >= trans->transid) - BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; - } out: btrfs_end_transaction(trans); out_notrans: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4eda35bdba71..8a60983a697c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } + btrfs_record_new_subvolume(trans, BTRFS_I(dir)); + ret = btrfs_create_new_inode(trans, &new_inode_args); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } - btrfs_record_new_subvolume(trans, BTRFS_I(dir)); - d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 858b609e292c..cea8a7e9d6d3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r unsigned int nofs_flag; struct btrfs_inode *inode; + /* Only meant to be called for subvolume roots and not for log roots. */ + ASSERT(is_fstree(btrfs_root_id(root))); + /* * We're holding a transaction handle whether we are logging or * replaying a log tree, so we must make sure NOFS semantics apply @@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, return 0; } -/* - * simple helper to read an inode off the disk from a given root - * This can only be called for subvolume roots and not for the log - */ -static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root, - u64 objectid) -{ - struct btrfs_inode *inode; - - inode = btrfs_iget_logging(objectid, root); - if (IS_ERR(inode)) - return NULL; - return inode; -} - /* replays a single extent in 'eb' at 'slot' with 'key' into the * subvolume 'root'. path is released on entry and should be released * on exit. @@ -674,9 +662,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, return -EUCLEAN; } - inode = read_one_inode(root, key->objectid); - if (!inode) - return -EIO; + inode = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); /* * first check to see if we already have this extent in the @@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_release_path(path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -1073,7 +1062,9 @@ again: search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = parent_objectid; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); - if (ret == 0) { + if (ret < 0) { + return ret; + } else if (ret == 0) { struct btrfs_inode_ref *victim_ref; unsigned long ptr; unsigned long ptr_end; @@ -1146,13 +1137,13 @@ again: struct fscrypt_str victim_name; extref = (struct btrfs_inode_extref *)(base + cur_offset); + victim_name.len = btrfs_inode_extref_name_len(leaf, extref); if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) goto next; ret = read_alloc_one_name(leaf, &extref->name, - btrfs_inode_extref_name_len(leaf, extref), - &victim_name); + victim_name.len, &victim_name); if (ret) return ret; @@ -1167,10 +1158,10 @@ again: kfree(victim_name.name); return ret; } else if (!ret) { - ret = -ENOENT; - victim_parent = read_one_inode(root, - parent_objectid); - if (victim_parent) { + victim_parent = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(victim_parent)) { + ret = PTR_ERR(victim_parent); + } else { inc_nlink(&inode->vfs_inode); btrfs_release_path(path); @@ -1315,9 +1306,9 @@ again: struct btrfs_inode *dir; btrfs_release_path(path); - dir = read_one_inode(root, parent_id); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_id, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); kfree(name.name); goto out; } @@ -1389,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * copy the back ref in. The link count fixup code will take * care of the rest */ - dir = read_one_inode(root, parent_objectid); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; goto out; } - inode = read_one_inode(root, inode_objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(inode_objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -1409,11 +1402,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * parent object can change from one array * item to another. */ - if (!dir) - dir = read_one_inode(root, parent_objectid); if (!dir) { - ret = -ENOENT; - goto out; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; + goto out; + } } } else { ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); @@ -1682,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; btrfs_release_path(path); - inode = read_one_inode(root, key.offset); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.offset, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } @@ -1720,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_inode *inode; struct inode *vfs_inode; - inode = read_one_inode(root, objectid); - if (!inode) - return -EIO; + inode = btrfs_iget_logging(objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); vfs_inode = &inode->vfs_inode; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; @@ -1761,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, struct btrfs_inode *dir; int ret; - inode = read_one_inode(root, location->objectid); - if (!inode) - return -ENOENT; + inode = btrfs_iget_logging(location->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); - dir = read_one_inode(root, dirid); - if (!dir) { + dir = btrfs_iget_logging(dirid, root); + if (IS_ERR(dir)) { iput(&inode->vfs_inode); - return -EIO; + return PTR_ERR(dir); } ret = btrfs_add_link(trans, dir, inode, name, 1, index); @@ -1845,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, bool update_size = true; bool name_added = false; - dir = read_one_inode(root, key->objectid); - if (!dir) - return -EIO; + dir = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(dir)) + return PTR_ERR(dir); ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); if (ret) @@ -2147,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -2301,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, if (!log_path) return -ENOMEM; - dir = read_one_inode(root, dirid); - /* it isn't an error if the inode isn't there, that can happen - * because we replay the deletes before we copy in the inode item - * from the log + dir = btrfs_iget_logging(dirid, root); + /* + * It isn't an error if the inode isn't there, that can happen because + * we replay the deletes before we copy in the inode item from the log. */ - if (!dir) { + if (IS_ERR(dir)) { btrfs_free_path(log_path); - return 0; + ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; + return ret; } range_start = 0; @@ -2467,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, struct btrfs_inode *inode; u64 from; - inode = read_one_inode(root, key.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } from = ALIGN(i_size_read(&inode->vfs_inode), @@ -7448,6 +7447,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * full log sync. * Also we don't need to worry with renames, since btrfs_rename() marks the log * for full commit when renaming a subvolume. + * + * Must be called before creating the subvolume entry in its parent directory. */ void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans, struct btrfs_inode *dir) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d4dbffdedd08..895256cd2786 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -137,13 +137,7 @@ struct epitem { }; /* List header used to link this structure to the eventpoll ready list */ - struct list_head rdllink; - - /* - * Works together "struct eventpoll"->ovflist in keeping the - * single linked chain of items. - */ - struct epitem *next; + struct llist_node rdllink; /* The file descriptor information this item refers to */ struct epoll_filefd ffd; @@ -191,22 +185,15 @@ struct eventpoll { /* Wait queue used by file->poll() */ wait_queue_head_t poll_wait; - /* List of ready file descriptors */ - struct list_head rdllist; - - /* Lock which protects rdllist and ovflist */ - rwlock_t lock; + /* + * List of ready file descriptors. Adding to this list is lockless. Items can be removed + * only with eventpoll::mtx + */ + struct llist_head rdllist; /* RB tree root used to store monitored fd structs */ struct rb_root_cached rbr; - /* - * This is a single linked list that chains all the "struct epitem" that - * happened while transferring ready events to userspace w/out - * holding ->lock. - */ - struct epitem *ovflist; - /* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */ struct wakeup_source *ws; @@ -361,10 +348,14 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } -/* Tells us if the item is currently linked */ -static inline int ep_is_linked(struct epitem *epi) +/* + * Add the item to its container eventpoll's rdllist; do nothing if the item is already on rdllist. + */ +static void epitem_ready(struct epitem *epi) { - return !list_empty(&epi->rdllink); + if (&epi->rdllink == cmpxchg(&epi->rdllink.next, &epi->rdllink, NULL)) + llist_add(&epi->rdllink, &epi->ep->rdllist); + } static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p) @@ -383,13 +374,26 @@ static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p) * * @ep: Pointer to the eventpoll context. * - * Return: a value different than %zero if ready events are available, - * or %zero otherwise. + * Return: true if ready events might be available, false otherwise. */ -static inline int ep_events_available(struct eventpoll *ep) +static inline bool ep_events_available(struct eventpoll *ep) { - return !list_empty_careful(&ep->rdllist) || - READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; + bool available; + int locked; + + locked = mutex_trylock(&ep->mtx); + if (!locked) { + /* + * The lock held and someone might have removed all items while inspecting it. The + * llist_empty() check in this case is futile. Assume that something is enqueued and + * let ep_try_send_events() figure it out. + */ + return true; + } + + available = !llist_empty(&ep->rdllist); + mutex_unlock(&ep->mtx); + return available; } #ifdef CONFIG_NET_RX_BUSY_POLL @@ -724,77 +728,6 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) rcu_read_unlock(); } - -/* - * ep->mutex needs to be held because we could be hit by - * eventpoll_release_file() and epoll_ctl(). - */ -static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist) -{ - /* - * Steal the ready list, and re-init the original one to the - * empty list. Also, set ep->ovflist to NULL so that events - * happening while looping w/out locks, are not lost. We cannot - * have the poll callback to queue directly on ep->rdllist, - * because we want the "sproc" callback to be able to do it - * in a lockless way. - */ - lockdep_assert_irqs_enabled(); - write_lock_irq(&ep->lock); - list_splice_init(&ep->rdllist, txlist); - WRITE_ONCE(ep->ovflist, NULL); - write_unlock_irq(&ep->lock); -} - -static void ep_done_scan(struct eventpoll *ep, - struct list_head *txlist) -{ - struct epitem *epi, *nepi; - - write_lock_irq(&ep->lock); - /* - * During the time we spent inside the "sproc" callback, some - * other events might have been queued by the poll callback. - * We re-insert them inside the main ready-list here. - */ - for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; - nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { - /* - * We need to check if the item is already in the list. - * During the "sproc" callback execution time, items are - * queued into ->ovflist but the "txlist" might already - * contain them, and the list_splice() below takes care of them. - */ - if (!ep_is_linked(epi)) { - /* - * ->ovflist is LIFO, so we have to reverse it in order - * to keep in FIFO. - */ - list_add(&epi->rdllink, &ep->rdllist); - ep_pm_stay_awake(epi); - } - } - /* - * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after - * releasing the lock, events will be queued in the normal way inside - * ep->rdllist. - */ - WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); - - /* - * Quickly re-inject items left on "txlist". - */ - list_splice(txlist, &ep->rdllist); - __pm_relax(ep->ws); - - if (!list_empty(&ep->rdllist)) { - if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); - } - - write_unlock_irq(&ep->lock); -} - static void ep_get(struct eventpoll *ep) { refcount_inc(&ep->refcount); @@ -832,10 +765,12 @@ static void ep_free(struct eventpoll *ep) static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) { struct file *file = epi->ffd.file; + struct llist_node *put_back_last; struct epitems_head *to_free; struct hlist_head *head; + LLIST_HEAD(put_back); - lockdep_assert_irqs_enabled(); + lockdep_assert_held(&ep->mtx); /* * Removes poll wait queue hooks. @@ -867,10 +802,20 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) rb_erase_cached(&epi->rbn, &ep->rbr); - write_lock_irq(&ep->lock); - if (ep_is_linked(epi)) - list_del_init(&epi->rdllink); - write_unlock_irq(&ep->lock); + if (llist_on_list(&epi->rdllink)) { + put_back_last = NULL; + while (true) { + struct llist_node *n = llist_del_first(&ep->rdllist); + + if (&epi->rdllink == n || WARN_ON(!n)) + break; + if (!put_back_last) + put_back_last = n; + __llist_add(n, &put_back); + } + if (put_back_last) + llist_add_batch(put_back.first, put_back_last, &ep->rdllist); + } wakeup_source_unregister(ep_wakeup_source(epi)); /* @@ -883,7 +828,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) kfree_rcu(epi, rcu); percpu_counter_dec(&ep->user->epoll_watches); - return ep_refcount_dec_and_test(ep); + return true; } /* @@ -891,14 +836,14 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) */ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) { - WARN_ON_ONCE(__ep_remove(ep, epi, false)); + if (__ep_remove(ep, epi, false)) + WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); } static void ep_clear_and_put(struct eventpoll *ep) { struct rb_node *rbp, *next; struct epitem *epi; - bool dispose; /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) @@ -931,10 +876,8 @@ static void ep_clear_and_put(struct eventpoll *ep) cond_resched(); } - dispose = ep_refcount_dec_and_test(ep); mutex_unlock(&ep->mtx); - - if (dispose) + if (ep_refcount_dec_and_test(ep)) ep_free(ep); } @@ -974,8 +917,9 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth) { struct eventpoll *ep = file->private_data; - LIST_HEAD(txlist); - struct epitem *epi, *tmp; + struct wakeup_source *ws; + struct llist_node *n; + struct epitem *epi; poll_table pt; __poll_t res = 0; @@ -989,22 +933,39 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep * the ready list. */ mutex_lock_nested(&ep->mtx, depth); - ep_start_scan(ep, &txlist); - list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { + while (true) { + n = llist_del_first_init(&ep->rdllist); + if (!n) + break; + + epi = llist_entry(n, struct epitem, rdllink); + if (ep_item_poll(epi, &pt, depth + 1)) { res = EPOLLIN | EPOLLRDNORM; + epitem_ready(epi); break; } else { /* - * Item has been dropped into the ready list by the poll - * callback, but it's not actually ready, as far as - * caller requested events goes. We can remove it here. + * We need to activate ep before deactivating epi, to prevent autosuspend + * just in case epi becomes active after ep_item_poll() above. + * + * This is similar to ep_send_events(). */ + ws = ep_wakeup_source(epi); + if (ws) { + if (ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(ws); + } __pm_relax(ep_wakeup_source(epi)); - list_del_init(&epi->rdllink); + + /* Just in case epi becomes active right before __pm_relax() */ + if (unlikely(ep_item_poll(epi, &pt, depth + 1))) + ep_pm_stay_awake(epi); + + __pm_relax(ep->ws); } } - ep_done_scan(ep, &txlist); mutex_unlock(&ep->mtx); return res; } @@ -1137,7 +1098,7 @@ again: dispose = __ep_remove(ep, epi, true); mutex_unlock(&ep->mtx); - if (dispose) + if (dispose && ep_refcount_dec_and_test(ep)) ep_free(ep); goto again; } @@ -1153,12 +1114,10 @@ static int ep_alloc(struct eventpoll **pep) return -ENOMEM; mutex_init(&ep->mtx); - rwlock_init(&ep->lock); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); - INIT_LIST_HEAD(&ep->rdllist); + init_llist_head(&ep->rdllist); ep->rbr = RB_ROOT_CACHED; - ep->ovflist = EP_UNACTIVE_PTR; ep->user = get_current_user(); refcount_set(&ep->refcount, 1); @@ -1241,93 +1200,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, #endif /* CONFIG_KCMP */ /* - * Adds a new entry to the tail of the list in a lockless way, i.e. - * multiple CPUs are allowed to call this function concurrently. - * - * Beware: it is necessary to prevent any other modifications of the - * existing list until all changes are completed, in other words - * concurrent list_add_tail_lockless() calls should be protected - * with a read lock, where write lock acts as a barrier which - * makes sure all list_add_tail_lockless() calls are fully - * completed. - * - * Also an element can be locklessly added to the list only in one - * direction i.e. either to the tail or to the head, otherwise - * concurrent access will corrupt the list. - * - * Return: %false if element has been already added to the list, %true - * otherwise. - */ -static inline bool list_add_tail_lockless(struct list_head *new, - struct list_head *head) -{ - struct list_head *prev; - - /* - * This is simple 'new->next = head' operation, but cmpxchg() - * is used in order to detect that same element has been just - * added to the list from another CPU: the winner observes - * new->next == new. - */ - if (!try_cmpxchg(&new->next, &new, head)) - return false; - - /* - * Initially ->next of a new element must be updated with the head - * (we are inserting to the tail) and only then pointers are atomically - * exchanged. XCHG guarantees memory ordering, thus ->next should be - * updated before pointers are actually swapped and pointers are - * swapped before prev->next is updated. - */ - - prev = xchg(&head->prev, new); - - /* - * It is safe to modify prev->next and new->prev, because a new element - * is added only to the tail and new->next is updated before XCHG. - */ - - prev->next = new; - new->prev = prev; - - return true; -} - -/* - * Chains a new epi entry to the tail of the ep->ovflist in a lockless way, - * i.e. multiple CPUs are allowed to call this function concurrently. - * - * Return: %false if epi element has been already chained, %true otherwise. - */ -static inline bool chain_epi_lockless(struct epitem *epi) -{ - struct eventpoll *ep = epi->ep; - - /* Fast preliminary check */ - if (epi->next != EP_UNACTIVE_PTR) - return false; - - /* Check that the same epi has not been just chained from another CPU */ - if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) - return false; - - /* Atomically exchange tail */ - epi->next = xchg(&ep->ovflist, epi); - - return true; -} - -/* * This is the callback that is passed to the wait queue wakeup * mechanism. It is called by the stored file descriptors when they * have events to report. * - * This callback takes a read lock in order not to contend with concurrent - * events from another file descriptor, thus all modifications to ->rdllist - * or ->ovflist are lockless. Read lock is paired with the write lock from - * ep_start/done_scan(), which stops all list modifications and guarantees - * that lists state is seen correctly. - * * Another thing worth to mention is that ep_poll_callback() can be called * concurrently for the same @epi from different CPUs if poll table was inited * with several wait queues entries. Plural wakeup from different CPUs of a @@ -1337,15 +1213,11 @@ static inline bool chain_epi_lockless(struct epitem *epi) */ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { - int pwake = 0; struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; __poll_t pollflags = key_to_poll(key); - unsigned long flags; int ewake = 0; - read_lock_irqsave(&ep->lock, flags); - ep_set_busy_poll_napi_id(epi); /* @@ -1355,7 +1227,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * until the next EPOLL_CTL_MOD will be issued. */ if (!(epi->event.events & ~EP_PRIVATE_BITS)) - goto out_unlock; + goto out; /* * Check the events coming with the callback. At this stage, not @@ -1364,22 +1236,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * test for "key" != NULL before the event match test. */ if (pollflags && !(pollflags & epi->event.events)) - goto out_unlock; + goto out; - /* - * If we are transferring events to userspace, we can hold no locks - * (because we're accessing user memory, and because of linux f_op->poll() - * semantics). All the events that happen during that period of time are - * chained in ep->ovflist and requeued later on. - */ - if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { - if (chain_epi_lockless(epi)) - ep_pm_stay_awake_rcu(epi); - } else if (!ep_is_linked(epi)) { - /* In the usual case, add event to ready list. */ - if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) - ep_pm_stay_awake_rcu(epi); - } + ep_pm_stay_awake_rcu(epi); + epitem_ready(epi); /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() @@ -1408,15 +1268,9 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v wake_up(&ep->wq); } if (waitqueue_active(&ep->poll_wait)) - pwake++; - -out_unlock: - read_unlock_irqrestore(&ep->lock, flags); - - /* We have to call this outside the lock */ - if (pwake) ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE); +out: if (!(epi->event.events & EPOLLEXCLUSIVE)) ewake = 1; @@ -1661,8 +1515,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, if (is_file_epoll(tfile)) tep = tfile->private_data; - lockdep_assert_irqs_enabled(); - if (unlikely(percpu_counter_compare(&ep->user->epoll_watches, max_user_watches) >= 0)) return -ENOSPC; @@ -1674,11 +1526,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, } /* Item initialization follow here ... */ - INIT_LIST_HEAD(&epi->rdllink); + init_llist_node(&epi->rdllink); epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; - epi->next = EP_UNACTIVE_PTR; if (tep) mutex_lock_nested(&tep->mtx, 1); @@ -1745,16 +1596,13 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, return -ENOMEM; } - /* We have to drop the new item inside our item list to keep track of it */ - write_lock_irq(&ep->lock); - /* record NAPI ID of new item if present */ ep_set_busy_poll_napi_id(epi); /* If the file is already "ready" we drop it inside the ready list */ - if (revents && !ep_is_linked(epi)) { - list_add_tail(&epi->rdllink, &ep->rdllist); + if (revents) { ep_pm_stay_awake(epi); + epitem_ready(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1763,8 +1611,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, pwake++; } - write_unlock_irq(&ep->lock); - /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, NULL, 0); @@ -1779,11 +1625,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, static int ep_modify(struct eventpoll *ep, struct epitem *epi, const struct epoll_event *event) { - int pwake = 0; poll_table pt; - lockdep_assert_irqs_enabled(); - init_poll_funcptr(&pt, NULL); /* @@ -1827,24 +1670,16 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, * list, push it inside. */ if (ep_item_poll(epi, &pt, 1)) { - write_lock_irq(&ep->lock); - if (!ep_is_linked(epi)) { - list_add_tail(&epi->rdllink, &ep->rdllist); - ep_pm_stay_awake(epi); + ep_pm_stay_awake(epi); + epitem_ready(epi); - /* Notify waiting tasks that events are available */ - if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); - if (waitqueue_active(&ep->poll_wait)) - pwake++; - } - write_unlock_irq(&ep->lock); + /* Notify waiting tasks that events are available */ + if (waitqueue_active(&ep->wq)) + wake_up(&ep->wq); + if (waitqueue_active(&ep->poll_wait)) + ep_poll_safewake(ep, NULL, 0); } - /* We have to call this outside the lock */ - if (pwake) - ep_poll_safewake(ep, NULL, 0); - return 0; } @@ -1852,7 +1687,7 @@ static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, int maxevents) { struct epitem *epi, *tmp; - LIST_HEAD(txlist); + LLIST_HEAD(txlist); poll_table pt; int res = 0; @@ -1867,19 +1702,18 @@ static int ep_send_events(struct eventpoll *ep, init_poll_funcptr(&pt, NULL); mutex_lock(&ep->mtx); - ep_start_scan(ep, &txlist); - /* - * We can loop without lock because we are passed a task private list. - * Items cannot vanish during the loop we are holding ep->mtx. - */ - list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { + while (res < maxevents) { struct wakeup_source *ws; + struct llist_node *n; __poll_t revents; - if (res >= maxevents) + n = llist_del_first(&ep->rdllist); + if (!n) break; + epi = llist_entry(n, struct epitem, rdllink); + /* * Activate ep->ws before deactivating epi->ws to prevent * triggering auto-suspend here (in case we reactive epi->ws @@ -1896,21 +1730,30 @@ static int ep_send_events(struct eventpoll *ep, __pm_relax(ws); } - list_del_init(&epi->rdllink); - /* * If the event mask intersect the caller-requested one, * deliver the event to userspace. Again, we are holding ep->mtx, * so no operations coming from userspace can change the item. */ revents = ep_item_poll(epi, &pt, 1); - if (!revents) + if (!revents) { + init_llist_node(n); + + /* + * Just in case epi becomes ready after ep_item_poll() above, but before + * init_llist_node(). Make sure to add it to the ready list, otherwise an + * event may be lost. + */ + if (unlikely(ep_item_poll(epi, &pt, 1))) { + ep_pm_stay_awake(epi); + epitem_ready(epi); + } continue; + } events = epoll_put_uevent(revents, epi->event.data, events); if (!events) { - list_add(&epi->rdllink, &txlist); - ep_pm_stay_awake(epi); + llist_add(&epi->rdllink, &ep->rdllist); if (!res) res = -EFAULT; break; @@ -1918,25 +1761,31 @@ static int ep_send_events(struct eventpoll *ep, res++; if (epi->event.events & EPOLLONESHOT) epi->event.events &= EP_PRIVATE_BITS; - else if (!(epi->event.events & EPOLLET)) { + __llist_add(n, &txlist); + } + + llist_for_each_entry_safe(epi, tmp, txlist.first, rdllink) { + init_llist_node(&epi->rdllink); + + if (!(epi->event.events & EPOLLET)) { /* - * If this file has been added with Level - * Trigger mode, we need to insert back inside - * the ready list, so that the next call to - * epoll_wait() will check again the events - * availability. At this point, no one can insert - * into ep->rdllist besides us. The epoll_ctl() - * callers are locked out by - * ep_send_events() holding "mtx" and the - * poll callback will queue them in ep->ovflist. + * If this file has been added with Level Trigger mode, we need to insert + * back inside the ready list, so that the next call to epoll_wait() will + * check again the events availability. */ - list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); + epitem_ready(epi); } } - ep_done_scan(ep, &txlist); + + __pm_relax(ep->ws); mutex_unlock(&ep->mtx); + if (!llist_empty(&ep->rdllist)) { + if (waitqueue_active(&ep->wq)) + wake_up(&ep->wq); + } + return res; } @@ -2029,8 +1878,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, wait_queue_entry_t wait; ktime_t expires, *to = NULL; - lockdep_assert_irqs_enabled(); - if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { slack = select_estimate_accuracy(timeout); to = &expires; @@ -2090,54 +1937,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, init_wait(&wait); wait.func = ep_autoremove_wake_function; - write_lock_irq(&ep->lock); - /* - * Barrierless variant, waitqueue_active() is called under - * the same lock on wakeup ep_poll_callback() side, so it - * is safe to avoid an explicit barrier. - */ - __set_current_state(TASK_INTERRUPTIBLE); - - /* - * Do the final check under the lock. ep_start/done_scan() - * plays with two lists (->rdllist and ->ovflist) and there - * is always a race when both lists are empty for short - * period of time although events are pending, so lock is - * important. - */ - eavail = ep_events_available(ep); - if (!eavail) - __add_wait_queue_exclusive(&ep->wq, &wait); - - write_unlock_irq(&ep->lock); + prepare_to_wait_exclusive(&ep->wq, &wait, TASK_INTERRUPTIBLE); - if (!eavail) + if (!ep_events_available(ep)) timed_out = !ep_schedule_timeout(to) || !schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS); - __set_current_state(TASK_RUNNING); - - /* - * We were woken up, thus go and try to harvest some events. - * If timed out and still on the wait queue, recheck eavail - * carefully under lock, below. - */ - eavail = 1; - if (!list_empty_careful(&wait.entry)) { - write_lock_irq(&ep->lock); - /* - * If the thread timed out and is not on the wait queue, - * it means that the thread was woken up after its - * timeout expired before it could reacquire the lock. - * Thus, when wait.entry is empty, it needs to harvest - * events. - */ - if (timed_out) - eavail = list_empty(&wait.entry); - __remove_wait_queue(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } + finish_wait(&ep->wq, &wait); + eavail = ep_events_available(ep); } } diff --git a/fs/exec.c b/fs/exec.c index 1f5fdd2e096e..ba400aafd640 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -114,6 +114,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt) bool path_noexec(const struct path *path) { + /* If it's an anonymous inode make sure that we catch any shenanigans. */ + VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) && + !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC)); return (path->mnt->mnt_flags & MNT_NOEXEC) || (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); } @@ -781,13 +784,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (IS_ERR(file)) return file; + if (path_noexec(&file->f_path)) + return ERR_PTR(-EACCES); + /* * In the past the regular type check was here. It moved to may_open() in * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is * an invariant that all non-regular files error out before we get here. */ - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || - path_noexec(&file->f_path)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) return ERR_PTR(-EACCES); err = exe_file_deny_write_access(file); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f102afc03359..47006d0753f1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1147,7 +1147,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, struct address_space *mapping, struct iov_iter *ii, loff_t pos, - unsigned int max_pages) + unsigned int max_folios) { struct fuse_args_pages *ap = &ia->ap; struct fuse_conn *fc = get_fuse_conn(mapping->host); @@ -1157,12 +1157,11 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, int err = 0; num = min(iov_iter_count(ii), fc->max_write); - num = min(num, max_pages << PAGE_SHIFT); ap->args.in_pages = true; ap->descs[0].offset = offset; - while (num) { + while (num && ap->num_folios < max_folios) { size_t tmp; struct folio *folio; pgoff_t index = pos >> PAGE_SHIFT; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index bfe8d8af46f3..9572bdef49ee 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -9,6 +9,7 @@ #include "fuse_i.h" #include "dev_uring_i.h" +#include <linux/dax.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/file.h> @@ -162,6 +163,9 @@ static void fuse_evict_inode(struct inode *inode) /* Will write inode on close/munmap and in all other dirtiers */ WARN_ON(inode->i_state & I_DIRTY_INODE); + if (FUSE_IS_DAX(inode)) + dax_break_layout_final(inode); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { diff --git a/fs/libfs.c b/fs/libfs.c index 9ea0ecc325a8..6f487fc6be34 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1649,12 +1649,10 @@ struct inode *alloc_anon_inode(struct super_block *s) */ inode->i_state = I_DIRTY; /* - * Historically anonymous inodes didn't have a type at all and - * userspace has come to rely on this. Internally they're just - * regular files but S_IFREG is masked off when reporting - * information to userspace. + * Historically anonymous inodes don't have a type at all and + * userspace has come to rely on this. */ - inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + inode->i_mode = S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE | S_ANON_INODE; diff --git a/fs/namei.c b/fs/namei.c index f761cafaeaad..c26a7ee42184 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3480,7 +3480,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path, return -EACCES; break; default: - VFS_BUG_ON_INODE(1, inode); + VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode); } error = inode_permission(idmap, inode, MAY_OPEN | acc_mode); diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 72a3e6db2524..f27ea5099a68 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -53,30 +53,40 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, * data written into the pagecache until we can find out from the server what * the values actually are. */ -static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, - loff_t i_size, loff_t pos, size_t copied) +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied) { + loff_t i_size, end = pos + copied; blkcnt_t add; size_t gap; + if (end <= i_size_read(inode)) + return; + if (ctx->ops->update_i_size) { - ctx->ops->update_i_size(inode, pos); + ctx->ops->update_i_size(inode, end); return; } - i_size_write(inode, pos); + spin_lock(&inode->i_lock); + + i_size = i_size_read(inode); + if (end > i_size) { + i_size_write(inode, end); #if IS_ENABLED(CONFIG_FSCACHE) - fscache_update_cookie(ctx->cache, NULL, &pos); + fscache_update_cookie(ctx->cache, NULL, &end); #endif - gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); - if (copied > gap) { - add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); + gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); + if (copied > gap) { + add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); - inode->i_blocks = min_t(blkcnt_t, - DIV_ROUND_UP(pos, SECTOR_SIZE), - inode->i_blocks + add); + inode->i_blocks = min_t(blkcnt_t, + DIV_ROUND_UP(end, SECTOR_SIZE), + inode->i_blocks + add); + } } + spin_unlock(&inode->i_lock); } /** @@ -111,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, struct folio *folio = NULL, *writethrough = NULL; unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; - loff_t i_size, pos = iocb->ki_pos; + loff_t pos = iocb->ki_pos; size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; @@ -344,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, flush_dcache_folio(folio); /* Update the inode size if we moved the EOF marker */ + netfs_update_i_size(ctx, inode, pos, copied); pos += copied; - i_size = i_size_read(inode); - if (pos > i_size) - netfs_update_i_size(ctx, inode, i_size, pos, copied); written += copied; if (likely(!wreq)) { diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index fa9a5bf3c6d5..a16660ab7f83 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -9,20 +9,6 @@ #include <linux/uio.h> #include "internal.h" -static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) -{ - struct inode *inode = wreq->inode; - unsigned long long end = wreq->start + wreq->transferred; - - if (!wreq->error && - i_size_read(inode) < end) { - if (wreq->netfs_ops->update_i_size) - wreq->netfs_ops->update_i_size(inode, end); - else - i_size_write(inode, end); - } -} - /* * Perform an unbuffered write where we may have to do an RMW operation on an * encrypted file. This can also be used for direct I/O writes. @@ -98,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * if (async) wreq->iocb = iocb; wreq->len = iov_iter_count(&wreq->buffer.iter); - wreq->cleanup = netfs_cleanup_dio_write; ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { _debug("begin = %zd", ret); @@ -106,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } if (!async) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); ret = netfs_wait_for_write(wreq); if (ret > 0) iocb->ki_pos += ret; diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index e2ee9183392b..d4f16fefd965 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -28,6 +28,12 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); /* + * buffered_write.c + */ +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied); + +/* * main.c */ extern unsigned int netfs_debug; @@ -267,14 +273,32 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq, enum netfs_rreq_trace trace) { if (test_bit(rreq_flag, &rreq->flags)) { - trace_netfs_rreq(rreq, trace); clear_bit_unlock(rreq_flag, &rreq->flags); smp_mb__after_atomic(); /* Set flag before task state */ + trace_netfs_rreq(rreq, trace); wake_up(&rreq->waitq); } } /* + * Test the NETFS_RREQ_IN_PROGRESS flag, inserting an appropriate barrier. + */ +static inline bool netfs_check_rreq_in_progress(const struct netfs_io_request *rreq) +{ + /* Order read of flags before read of anything else, such as error. */ + return test_bit_acquire(NETFS_RREQ_IN_PROGRESS, &rreq->flags); +} + +/* + * Test the NETFS_SREQ_IN_PROGRESS flag, inserting an appropriate barrier. + */ +static inline bool netfs_check_subreq_in_progress(const struct netfs_io_subrequest *subreq) +{ + /* Order read of flags before read of anything else, such as error. */ + return test_bit_acquire(NETFS_SREQ_IN_PROGRESS, &subreq->flags); +} + +/* * fscache-cache.c */ #ifdef CONFIG_PROC_FS diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 3db401d269e7..73da6c9f5777 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v) if (v == &netfs_io_requests) { seq_puts(m, - "REQUEST OR REF FL ERR OPS COVERAGE\n" - "======== == === == ==== === =========\n" + "REQUEST OR REF FLAG ERR OPS COVERAGE\n" + "======== == === ==== ==== === =========\n" ); return 0; } rreq = list_entry(v, struct netfs_io_request, proc_link); seq_printf(m, - "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx", + "%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx", rreq->debug_id, netfs_origins[rreq->origin], refcount_read(&rreq->ref), diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 43b67a28a8fa..20748bcfbf59 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -356,22 +356,22 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, DEFINE_WAIT(myself); list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + if (!netfs_check_subreq_in_progress(subreq)) continue; - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce); for (;;) { prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + if (!netfs_check_subreq_in_progress(subreq)) break; trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } } + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce); finish_wait(&rreq->waitq, &myself); } @@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, static int netfs_collect_in_app(struct netfs_io_request *rreq, bool (*collector)(struct netfs_io_request *rreq)) { - bool need_collect = false, inactive = true; + bool need_collect = false, inactive = true, done = true; + + if (!netfs_check_rreq_in_progress(rreq)) { + trace_netfs_rreq(rreq, netfs_rreq_trace_recollect); + return 1; /* Done */ + } for (int i = 0; i < NR_IO_STREAMS; i++) { struct netfs_io_subrequest *subreq; @@ -395,14 +400,16 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, struct netfs_io_subrequest, rreq_link); if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || + (!netfs_check_subreq_in_progress(subreq) || test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { need_collect = true; break; } + if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) + done = false; } - if (!need_collect && !inactive) + if (!need_collect && !inactive && !done) return 0; /* Sleep */ __set_current_state(TASK_RUNNING); @@ -423,14 +430,13 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, /* * Wait for a request to complete, successfully or otherwise. */ -static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, - bool (*collector)(struct netfs_io_request *rreq)) +static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) { DEFINE_WAIT(myself); ssize_t ret; for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -440,18 +446,22 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, case 1: goto all_collected; case 2: + if (!netfs_check_rreq_in_progress(rreq)) + break; + cond_resched(); continue; } } - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) + if (!netfs_check_rreq_in_progress(rreq)) break; + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip); finish_wait(&rreq->waitq, &myself); ret = rreq->error; @@ -478,12 +488,12 @@ all_collected: ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) { - return netfs_wait_for_request(rreq, netfs_read_collection); + return netfs_wait_for_in_progress(rreq, netfs_read_collection); } ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) { - return netfs_wait_for_request(rreq, netfs_write_collection); + return netfs_wait_for_in_progress(rreq, netfs_write_collection); } /* @@ -494,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, { DEFINE_WAIT(myself); - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -507,19 +515,23 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, case 1: goto all_collected; case 2: + if (!netfs_check_rreq_in_progress(rreq) || + !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + break; + cond_resched(); continue; } } - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || + if (!netfs_check_rreq_in_progress(rreq) || !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) break; schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause); finish_wait(&rreq->waitq, &myself); } diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 96ee18af28ef..3e804da1e1eb 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -218,7 +218,7 @@ reassess: stream->collected_to = front->start; } - if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) + if (netfs_check_subreq_in_progress(front)) notes |= HIT_PENDING; smp_rmb(); /* Read counters after IN_PROGRESS flag. */ transferred = READ_ONCE(front->transferred); @@ -293,7 +293,9 @@ reassess: spin_lock(&rreq->lock); remove = front; - trace_netfs_sreq(front, netfs_sreq_trace_discard); + trace_netfs_sreq(front, + notes & ABANDON_SREQ ? + netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed); list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); @@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -445,7 +451,7 @@ void netfs_read_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_check_rreq_in_progress(rreq)) { if (netfs_read_collection(rreq)) /* Drop the ref from the IN_PROGRESS flag. */ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index e2b102ffb768..0f3a36852a4d 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -240,7 +240,7 @@ reassess_streams: } /* Stall if the front is still undergoing I/O. */ - if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) { + if (netfs_check_subreq_in_progress(front)) { notes |= HIT_PENDING; break; } @@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq) ictx->ops->invalidate_cache(wreq); } - if (wreq->cleanup) - wreq->cleanup(wreq); + if ((wreq->origin == NETFS_UNBUFFERED_WRITE || + wreq->origin == NETFS_DIO_WRITE) && + !wreq->error) + netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); if (wreq->origin == NETFS_DIO_WRITE && wreq->mapping->nrpages) { @@ -419,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq) if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); wreq->iocb->ki_pos += written; - if (wreq->iocb->ki_complete) + if (wreq->iocb->ki_complete) { + trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); wreq->iocb->ki_complete( wreq->iocb, wreq->error ? wreq->error : written); + } wreq->iocb = VFS_PTR_POISON; } @@ -434,7 +438,7 @@ void netfs_write_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_check_rreq_in_progress(rreq)) { if (netfs_write_collection(rreq)) /* Drop the ref from the IN_PROGRESS flag. */ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 9d1d8a8bab72..fc9c3e0d34d8 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -146,14 +146,13 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, subreq = netfs_alloc_subrequest(wreq); subreq->source = to->source; subreq->start = start; - subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); subreq->stream_nr = to->stream_nr; subreq->retry_count = 1; trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), netfs_sreq_trace_new); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); + trace_netfs_sreq(subreq, netfs_sreq_trace_split); list_add(&subreq->rreq_link, &to->rreq_link); to = list_next_entry(to, rreq_link); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index df4807460596..4bea008dbebd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr) } static int ff_layout_async_handle_error_v4(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - switch (task->tk_status) { - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: + switch (op_status) { + case NFS4_OK: + case NFS4ERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFS4ERR_BADSESSION: + case NFS4ERR_BADSLOT: + case NFS4ERR_BAD_HIGH_SLOT: + case NFS4ERR_DEADSESSION: + case NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case NFS4ERR_SEQ_FALSE_RETRY: + case NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session. Exchangeid " "flags 0x%x\n", __func__, task->tk_status, clp->cl_exchange_flags); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - break; - case -NFS4ERR_DELAY: + goto out_retry; + case NFS4ERR_DELAY: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); fallthrough; - case -NFS4ERR_GRACE: + case NFS4ERR_GRACE: rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); - break; - case -NFS4ERR_RETRY_UNCACHED_REP: - break; + goto out_retry; + case NFS4ERR_RETRY_UNCACHED_REP: + goto out_retry; /* Invalidate Layout errors */ - case -NFS4ERR_PNFS_NO_LAYOUT: - case -ESTALE: /* mapped NFS4ERR_STALE */ - case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ - case -EISDIR: /* mapped NFS4ERR_ISDIR */ - case -NFS4ERR_FHEXPIRED: - case -NFS4ERR_WRONG_TYPE: + case NFS4ERR_PNFS_NO_LAYOUT: + case NFS4ERR_STALE: + case NFS4ERR_BADHANDLE: + case NFS4ERR_ISDIR: + case NFS4ERR_FHEXPIRED: + case NFS4ERR_WRONG_TYPE: dprintk("%s Invalid layout error %d\n", __func__, task->tk_status); /* @@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; + default: + break; + } + + switch (task->tk_status) { /* RPC connection errors */ case -ENETDOWN: case -ENETUNREACH: @@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); - fallthrough; + break; default: - if (ff_layout_avoid_mds_available_ds(lseg)) - return -NFS4ERR_RESET_TO_PNFS; -reset: - dprintk("%s Retry through MDS. Error %d\n", __func__, - task->tk_status); - return -NFS4ERR_RESET_TO_MDS; + break; } + + if (ff_layout_avoid_mds_available_ds(lseg)) + return -NFS4ERR_RESET_TO_PNFS; +reset: + dprintk("%s Retry through MDS. Error %d\n", __func__, + task->tk_status); + return -NFS4ERR_RESET_TO_MDS; + +out_retry: task->tk_status = 0; return -EAGAIN; } /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + u32 op_status, struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + switch (op_status) { + case NFS_OK: + case NFSERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFSERR_ACCES: + case NFSERR_BADHANDLE: + case NFSERR_FBIG: + case NFSERR_IO: + case NFSERR_NOSPC: + case NFSERR_ROFS: + case NFSERR_STALE: + goto out_reset_to_pnfs; + case NFSERR_JUKEBOX: + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); + goto out_retry; + default: + break; + } + switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); } +out_reset_to_pnfs: /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; out_retry: @@ -1228,6 +1272,7 @@ out_retry: } static int ff_layout_async_handle_error(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, clp, lseg, idx); - case 4: - return ff_layout_async_handle_error_v4(task, state, clp, + return ff_layout_async_handle_error_v3(task, op_status, clp, lseg, idx); + case 4: + return ff_layout_async_handle_error_v4(task, op_status, state, + clp, lseg, idx); default: /* should never happen */ WARN_ON_ONCE(1); @@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, switch (status) { case NFS4ERR_DELAY: case NFS4ERR_GRACE: + case NFS4ERR_PERM: break; case NFS4ERR_NXIO: ff_layout_mark_ds_unreachable(lseg, idx); @@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, trace_ff_layout_read_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task, trace_ff_layout_write_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, trace_ff_layout_commit_error(data, task->tk_status); } - err = ff_layout_async_handle_error(task, NULL, data->ds_clp, - data->lseg, data->ds_commit_index); + err = ff_layout_async_handle_error(task, data->res.op_status, + NULL, data->ds_clp, data->lseg, + data->ds_commit_index); trace_nfs4_pnfs_commit_ds(data, err); switch (err) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8ab7868807a7..a2fa6bc4d74e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2589,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); + int err; nfs_clients_init(net); if (!rpc_proc_register(net, &nn->rpcstats)) { - nfs_clients_exit(net); - return -ENOMEM; + err = -ENOMEM; + goto err_proc_rpc; } - return nfs_fs_proc_net_init(net); + err = nfs_fs_proc_net_init(net); + if (err) + goto err_proc_nfs; + + return 0; + +err_proc_nfs: + rpc_proc_unregister(net, "nfs"); +err_proc_rpc: + nfs_clients_exit(net); + return err; } static void nfs_net_exit(struct net *net) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3adb7d0dbec7..1a7ec68bde15 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) { if (atomic_dec_and_test(&lo->plh_outstanding) && - test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) { + smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); + } } static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a3eb3b740f76..3604b616311c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode) head = ei->sysctl; if (head) { - RCU_INIT_POINTER(ei->sysctl, NULL); + WRITE_ONCE(ei->sysctl, NULL); proc_sys_evict_inode(inode, head); } } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index cc9d74a06ff0..08b78150cdde 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -918,17 +918,21 @@ static int proc_sys_compare(const struct dentry *dentry, struct ctl_table_header *head; struct inode *inode; - /* Although proc doesn't have negative dentries, rcu-walk means - * that inode here can be NULL */ - /* AV: can it, indeed? */ - inode = d_inode_rcu(dentry); - if (!inode) - return 1; if (name->len != len) return 1; if (memcmp(name->name, str, len)) return 1; - head = rcu_dereference(PROC_I(inode)->sysctl); + + // false positive is fine here - we'll recheck anyway + if (d_in_lookup(dentry)) + return 0; + + inode = d_inode_rcu(dentry); + // we just might have run into dentry in the middle of __dentry_kill() + if (!inode) + return 1; + + head = READ_ONCE(PROC_I(inode)->sysctl); return !head || !sysctl_is_seen(head); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 27972c0749e7..4be91eb6ea5c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2182,7 +2182,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_FILE; } - if (is_zero_pfn(pmd_pfn(pmd))) + if (is_huge_zero_pmd(pmd)) categories |= PAGE_IS_PFNZERO; if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 45e94e18f4d5..89160bc34d35 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count) struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; + struct list_head rlist; /* reconnect list */ spinlock_t srv_lock; /* protect anything here that is not protected */ __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ @@ -776,6 +777,7 @@ struct TCP_Server_Info { __le32 session_key_id; /* retrieved from negotiate response and send in session setup request */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ + unsigned long neg_start; /* when negotiate started (jiffies) */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ @@ -1302,6 +1304,7 @@ struct cifs_tcon { bool use_persistent:1; /* use persistent instead of durable handles */ bool no_lease:1; /* Do not request leases on files or directories */ bool use_witness:1; /* use witness protocol */ + bool dummy:1; /* dummy tcon used for reconnecting channels */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 66093fa78aed..045227ed4efc 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -136,6 +136,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *out_buf, int *bytes_returned); +void smb2_query_server_interfaces(struct work_struct *work); void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels); diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 7216fcec79e8..75142f49d65d 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1334,7 +1334,12 @@ cifs_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: + __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) /* reset bytes number since we can not check a sign */ @@ -1343,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid) task_io_account_read(rdata->got_bytes); cifs_stats_bytes_read(tcon, rdata->got_bytes); break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } if (rdata->result == -ENODATA) { @@ -1713,10 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index c48869c29e15..205f547ca49e 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -97,7 +97,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) return rc; } -static void smb2_query_server_interfaces(struct work_struct *work) +void smb2_query_server_interfaces(struct work_struct *work) { int rc; int xid; @@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work) (SMB_INTERFACE_POLL_INTERVAL * HZ)); } +#define set_need_reco(server) \ +do { \ + spin_lock(&server->srv_lock); \ + if (server->tcpStatus != CifsExiting) \ + server->tcpStatus = CifsNeedReconnect; \ + spin_unlock(&server->srv_lock); \ +} while (0) + /* * Update the tcpStatus for the server. * This is used to signal the cifsd thread to call cifs_reconnect @@ -137,39 +145,45 @@ void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels) { - struct TCP_Server_Info *pserver; + struct TCP_Server_Info *nserver; struct cifs_ses *ses; + LIST_HEAD(reco); int i; - /* If server is a channel, select the primary channel */ - pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; - /* if we need to signal just this channel */ if (!all_channels) { - spin_lock(&server->srv_lock); - if (server->tcpStatus != CifsExiting) - server->tcpStatus = CifsNeedReconnect; - spin_unlock(&server->srv_lock); + set_need_reco(server); return; } - spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { - if (cifs_ses_exiting(ses)) - continue; - spin_lock(&ses->chan_lock); - for (i = 0; i < ses->chan_count; i++) { - if (!ses->chans[i].server) + if (SERVER_IS_CHAN(server)) + server = server->primary_server; + scoped_guard(spinlock, &cifs_tcp_ses_lock) { + set_need_reco(server); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); continue; - - spin_lock(&ses->chans[i].server->srv_lock); - if (ses->chans[i].server->tcpStatus != CifsExiting) - ses->chans[i].server->tcpStatus = CifsNeedReconnect; - spin_unlock(&ses->chans[i].server->srv_lock); + } + spin_lock(&ses->chan_lock); + for (i = 1; i < ses->chan_count; i++) { + nserver = ses->chans[i].server; + if (!nserver) + continue; + nserver->srv_count++; + list_add(&nserver->rlist, &reco); + } + spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); } - spin_unlock(&ses->chan_lock); } - spin_unlock(&cifs_tcp_ses_lock); + + list_for_each_entry_safe(server, nserver, &reco, rlist) { + list_del_init(&server->rlist); + set_need_reco(server); + cifs_put_tcp_session(server, 0); + } } /* @@ -665,12 +679,12 @@ server_unresponsive(struct TCP_Server_Info *server) /* * If we're in the process of mounting a share or reconnecting a session * and the server abruptly shut down (e.g. socket wasn't closed, packet - * had been ACK'ed but no SMB response), don't wait longer than 20s to - * negotiate protocol. + * had been ACK'ed but no SMB response), don't wait longer than 20s from + * when negotiate actually started. */ spin_lock(&server->srv_lock); if (server->tcpStatus == CifsInNegotiate && - time_after(jiffies, server->lstrp + 20 * HZ)) { + time_after(jiffies, server->neg_start + 20 * HZ)) { spin_unlock(&server->srv_lock); cifs_reconnect(server, false); return true; @@ -2866,20 +2880,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->max_cached_dirs = ctx->max_cached_dirs; tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; - INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - INIT_DELAYED_WORK(&tcon->query_interfaces, - smb2_query_server_interfaces); if (ses->server->dialect >= SMB30_PROT_ID && (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { /* schedule query interfaces poll */ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, (SMB_INTERFACE_POLL_INTERVAL * HZ)); } -#ifdef CONFIG_CIFS_DFS_UPCALL - INIT_DELAYED_WORK(&tcon->dfs_cache_work, dfs_cache_refresh); -#endif spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); spin_unlock(&cifs_tcp_ses_lock); @@ -4201,6 +4209,7 @@ retry: server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; + server->neg_start = jiffies; spin_unlock(&server->srv_lock); rc = server->ops->negotiate(xid, ses, server); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index a634a34d4086..59ccc2229ab3 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1824,10 +1824,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_errorf(fc, "symlinkroot mount options must be absolute path\n"); goto cifs_parse_mount_err; } - kfree(ctx->symlinkroot); - ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL); - if (!ctx->symlinkroot) + if (strnlen(param->string, PATH_MAX) == PATH_MAX) { + cifs_errorf(fc, "symlinkroot path too long (max path length: %u)\n", + PATH_MAX - 1); goto cifs_parse_mount_err; + } + kfree(ctx->symlinkroot); + ctx->symlinkroot = param->string; + param->string = NULL; break; } /* case Opt_ignore: - is ignored as expected ... */ @@ -1837,13 +1841,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } - /* - * By default resolve all native absolute symlinks relative to "/mnt/". - * Same default has drvfs driver running in WSL for resolving SMB shares. - */ - if (!ctx->symlinkroot) - ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL); - return 0; cifs_parse_mount_err: diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index e77017f47084..da23cc12a52c 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -151,6 +151,12 @@ tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace) #ifdef CONFIG_CIFS_DFS_UPCALL INIT_LIST_HEAD(&ret_buf->dfs_ses_list); #endif + INIT_LIST_HEAD(&ret_buf->pending_opens); + INIT_DELAYED_WORK(&ret_buf->query_interfaces, + smb2_query_server_interfaces); +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_DELAYED_WORK(&ret_buf->dfs_cache_work, dfs_cache_refresh); +#endif return ret_buf; } diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index ba0193cf9033..4e5460206397 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -264,7 +264,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info, /* The Mode field in the response can now include the file type as well */ fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode), fattr->cf_cifsattrs & ATTR_DIRECTORY); - fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode)); + fattr->cf_dtype = S_DT(fattr->cf_mode); switch (fattr->cf_mode & S_IFMT) { case S_IFLNK: diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 511611206dab..5fa29a97ac15 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -57,6 +57,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, struct reparse_symlink_data_buffer *buf = NULL; struct cifs_open_info_data data = {}; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + const char *symroot = cifs_sb->ctx->symlinkroot; struct inode *new; struct kvec iov; __le16 *path = NULL; @@ -82,7 +83,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, .symlink_target = symlink_target, }; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + symroot && symname[0] == '/') { /* * This is a request to create an absolute symlink on the server * which does not support POSIX paths, and expects symlink in @@ -92,7 +94,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, * ensure compatibility of this symlink stored in absolute form * on the SMB server. */ - if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) { + if (!strstarts(symname, symroot)) { /* * If the absolute Linux symlink target path is not * inside "symlinkroot" location then there is no way @@ -101,12 +103,12 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, cifs_dbg(VFS, "absolute symlink '%s' cannot be converted to NT format " "because it is outside of symlinkroot='%s'\n", - symname, cifs_sb->ctx->symlinkroot); + symname, symroot); rc = -EINVAL; goto out; } - len = strlen(cifs_sb->ctx->symlinkroot); - if (cifs_sb->ctx->symlinkroot[len-1] != '/') + len = strlen(symroot); + if (symroot[len - 1] != '/') len++; if (symname[len] >= 'a' && symname[len] <= 'z' && (symname[len+1] == '/' || symname[len+1] == '\0')) { @@ -782,6 +784,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, const char *full_path, struct cifs_sb_info *cifs_sb) { + const char *symroot = cifs_sb->ctx->symlinkroot; char sep = CIFS_DIR_SEP(cifs_sb); char *linux_target = NULL; char *smb_target = NULL; @@ -815,7 +818,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, goto out; } - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + symroot && !relative) { /* * This is an absolute symlink from the server which does not * support POSIX paths, so the symlink is in NT-style path. @@ -875,15 +879,8 @@ globalroot: abs_path += sizeof("\\DosDevices\\")-1; else if (strstarts(abs_path, "\\GLOBAL??\\")) abs_path += sizeof("\\GLOBAL??\\")-1; - else { - /* Unhandled absolute symlink, points outside of DOS/Win32 */ - cifs_dbg(VFS, - "absolute symlink '%s' cannot be converted from NT format " - "because points to unknown target\n", - smb_target); - rc = -EIO; - goto out; - } + else + goto out_unhandled_target; /* Sometimes path separator after \?? is double backslash */ if (abs_path[0] == '\\') @@ -910,25 +907,19 @@ globalroot: abs_path++; abs_path[0] = drive_letter; } else { - /* Unhandled absolute symlink. Report an error. */ - cifs_dbg(VFS, - "absolute symlink '%s' cannot be converted from NT format " - "because points to unknown target\n", - smb_target); - rc = -EIO; - goto out; + goto out_unhandled_target; } abs_path_len = strlen(abs_path)+1; - symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot); - if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/') + symlinkroot_len = strlen(symroot); + if (symroot[symlinkroot_len - 1] == '/') symlinkroot_len--; linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL); if (!linux_target) { rc = -ENOMEM; goto out; } - memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len); + memcpy(linux_target, symroot, symlinkroot_len); linux_target[symlinkroot_len] = '/'; memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len); } else if (smb_target[0] == sep && relative) { @@ -966,6 +957,7 @@ globalroot: * These paths have same format as Linux symlinks, so no * conversion is needed. */ +out_unhandled_target: linux_target = smb_target; smb_target = NULL; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a717be1626a3..2df93a75e3b8 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -424,9 +424,9 @@ skip_sess_setup: free_xid(xid); ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; - /* regardless of rc value, setup polling */ - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (!tcon->ipc && !tcon->dummy) + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); mutex_unlock(&ses->session_mutex); @@ -4229,10 +4229,8 @@ void smb2_reconnect_server(struct work_struct *work) } goto done; } - tcon->status = TID_GOOD; - tcon->retry = false; - tcon->need_reconnect = false; + tcon->dummy = true; /* now reconnect sessions for necessary channels */ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { @@ -4567,7 +4565,11 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) @@ -4578,11 +4580,15 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(shdr->CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } #ifdef CONFIG_CIFS_SMB_DIRECT /* @@ -4835,11 +4841,14 @@ smb2_writev_callback(struct mid_q_entry *mid) switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; result = smb2_check_receive(mid, server, 0); - if (result != 0) + if (result != 0) { + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad); break; + } written = le32_to_cpu(rsp->DataLength); /* @@ -4861,14 +4870,23 @@ smb2_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } @@ -4908,7 +4926,6 @@ smb2_writev_callback(struct mid_q_entry *mid) server->credits, server->in_flight, 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; - trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); cifs_write_subrequest_terminated(wdata, result ?: written); release_mid(mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index cbc85bca006f..754e94a0e07f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -907,8 +907,10 @@ wait_send_queue: .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; + size_t payload_len = umin(*_remaining_data_length, + sp->max_send_size - sizeof(*packet)); - rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length, + rc = smb_extract_iter_to_rdma(iter, payload_len, &extract); if (rc < 0) goto err_dma; @@ -1013,6 +1015,27 @@ static int smbd_post_send_empty(struct smbd_connection *info) return smbd_post_send_iter(info, NULL, &remaining_data_length); } +static int smbd_post_send_full_iter(struct smbd_connection *info, + struct iov_iter *iter, + int *_remaining_data_length) +{ + int rc = 0; + + /* + * smbd_post_send_iter() respects the + * negotiated max_send_size, so we need to + * loop until the full iter is posted + */ + + while (iov_iter_count(iter) > 0) { + rc = smbd_post_send_iter(info, iter, _remaining_data_length); + if (rc < 0) + break; + } + + return rc; +} + /* * Post a receive request to the transport * The remote peer can only send data when a receive request is posted @@ -1452,6 +1475,9 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) char name[MAX_NAME_LEN]; int rc; + if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) + return -ENOMEM; + scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); info->request_cache = kmem_cache_create( @@ -1469,12 +1495,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) goto out1; scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); + + struct kmem_cache_args response_args = { + .align = __alignof__(struct smbd_response), + .useroffset = (offsetof(struct smbd_response, packet) + + sizeof(struct smbdirect_data_transfer)), + .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), + }; info->response_cache = - kmem_cache_create( - name, - sizeof(struct smbd_response) + - sp->max_recv_size, - 0, SLAB_HWCACHE_ALIGN, NULL); + kmem_cache_create(name, + sizeof(struct smbd_response) + sp->max_recv_size, + &response_args, SLAB_HWCACHE_ALIGN); if (!info->response_cache) goto out2; @@ -1747,35 +1778,39 @@ try_again: } /* - * Receive data from receive reassembly queue + * Receive data from the transport's receive reassembly queue * All the incoming data packets are placed in reassembly queue - * buf: the buffer to read data into + * iter: the buffer to read data into * size: the length of data to read * return value: actual data read - * Note: this implementation copies the data from reassebmly queue to receive + * + * Note: this implementation copies the data from reassembly queue to receive * buffers used by upper layer. This is not the optimal code path. A better way * to do it is to not have upper layer allocate its receive buffers but rather * borrow the buffer from reassembly queue, and return it after data is * consumed. But this will require more changes to upper layer code, and also * need to consider packet boundaries while they still being reassembled. */ -static int smbd_recv_buf(struct smbd_connection *info, char *buf, - unsigned int size) +int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { struct smbdirect_socket *sc = &info->socket; struct smbd_response *response; struct smbdirect_data_transfer *data_transfer; + size_t size = iov_iter_count(&msg->msg_iter); int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; + if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) + return -EINVAL; /* It's a bug in upper layer to get there */ + again: /* * No need to hold the reassembly queue lock all the time as we are * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time */ - log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size, + log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size, info->reassembly_data_length); if (info->reassembly_data_length >= size) { int queue_length; @@ -1813,7 +1848,10 @@ again: if (response->first_segment && size == 4) { unsigned int rfc1002_len = data_length + remaining_data_length; - *((__be32 *)buf) = cpu_to_be32(rfc1002_len); + __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); + if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), + &msg->msg_iter) != sizeof(rfc1002_hdr)) + return -EFAULT; data_read = 4; response->first_segment = false; log_read(INFO, "returning rfc1002 length %d\n", @@ -1822,10 +1860,9 @@ again: } to_copy = min_t(int, data_length - offset, to_read); - memcpy( - buf + data_read, - (char *)data_transfer + data_offset + offset, - to_copy); + if (copy_to_iter((char *)data_transfer + data_offset + offset, + to_copy, &msg->msg_iter) != to_copy) + return -EFAULT; /* move on to the next buffer? */ if (to_copy == data_length - offset) { @@ -1891,90 +1928,6 @@ read_rfc1002_done: } /* - * Receive a page from receive reassembly queue - * page: the page to read data into - * to_read: the length of data to read - * return value: actual data read - */ -static int smbd_recv_page(struct smbd_connection *info, - struct page *page, unsigned int page_offset, - unsigned int to_read) -{ - struct smbdirect_socket *sc = &info->socket; - int ret; - char *to_address; - void *page_address; - - /* make sure we have the page ready for read */ - ret = wait_event_interruptible( - info->wait_reassembly_queue, - info->reassembly_data_length >= to_read || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (ret) - return ret; - - /* now we can read from reassembly queue and not sleep */ - page_address = kmap_atomic(page); - to_address = (char *) page_address + page_offset; - - log_read(INFO, "reading from page=%p address=%p to_read=%d\n", - page, to_address, to_read); - - ret = smbd_recv_buf(info, to_address, to_read); - kunmap_atomic(page_address); - - return ret; -} - -/* - * Receive data from transport - * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC - * return: total bytes read, or 0. SMB Direct will not do partial read. - */ -int smbd_recv(struct smbd_connection *info, struct msghdr *msg) -{ - char *buf; - struct page *page; - unsigned int to_read, page_offset; - int rc; - - if (iov_iter_rw(&msg->msg_iter) == WRITE) { - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg iter dir %u\n", - iov_iter_rw(&msg->msg_iter)); - rc = -EINVAL; - goto out; - } - - switch (iov_iter_type(&msg->msg_iter)) { - case ITER_KVEC: - buf = msg->msg_iter.kvec->iov_base; - to_read = msg->msg_iter.kvec->iov_len; - rc = smbd_recv_buf(info, buf, to_read); - break; - - case ITER_BVEC: - page = msg->msg_iter.bvec->bv_page; - page_offset = msg->msg_iter.bvec->bv_offset; - to_read = msg->msg_iter.bvec->bv_len; - rc = smbd_recv_page(info, page, page_offset, to_read); - break; - - default: - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg type %d\n", - iov_iter_type(&msg->msg_iter)); - rc = -EINVAL; - } - -out: - /* SMBDirect will read it all or nothing */ - if (rc > 0) - msg->msg_iter.count = 0; - return rc; -} - -/* * Send data to transport * Each rqst is transported as a SMBDirect payload * rqst: the data to write @@ -2032,14 +1985,14 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_iter(info, &iter, &remaining_data_length); + rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); if (rc < 0) break; if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ - rc = smbd_post_send_iter(info, &rqst->rq_iter, - &remaining_data_length); + rc = smbd_post_send_full_iter(info, &rqst->rq_iter, + &remaining_data_length); if (rc < 0) break; } diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 52bcb55d9952..93e5b2bb9f28 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(smb3_rw_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", + TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", __entry->rreq_debug_id, __entry->rreq_debug_index, __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->offset, __entry->len, __entry->rc) @@ -190,7 +190,7 @@ DECLARE_EVENT_CLASS(smb3_other_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->offset, __entry->len, __entry->rc) ) @@ -247,7 +247,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc) ) @@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_done_class, __entry->target_offset = target_offset; __entry->len = len; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", + TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len) ) @@ -482,7 +482,7 @@ DECLARE_EVENT_CLASS(smb3_fd_class, __entry->tid = tid; __entry->sesid = sesid; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx", __entry->xid, __entry->sesid, __entry->tid, __entry->fid) ) @@ -521,7 +521,7 @@ DECLARE_EVENT_CLASS(smb3_fd_err_class, __entry->sesid = sesid; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->rc) ) @@ -794,7 +794,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_err_class, __entry->status = status; __entry->rc = rc; ), - TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d", + TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d", __entry->sesid, __entry->tid, __entry->cmd, __entry->mid, __entry->status, __entry->rc) ) @@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_done_class, __entry->cmd = cmd; __entry->mid = mid; ), - TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu", + TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu", __entry->sesid, __entry->tid, __entry->cmd, __entry->mid) ) @@ -867,7 +867,7 @@ DECLARE_EVENT_CLASS(smb3_mid_class, __entry->when_sent = when_sent; __entry->when_received = when_received; ), - TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", + TP_printk("cmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", __entry->cmd, __entry->mid, __entry->pid, __entry->when_sent, __entry->when_received) ) @@ -898,7 +898,7 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class, __assign_str(func_name); __entry->rc = rc; ), - TP_printk("\t%s: xid=%u rc=%d", + TP_printk("%s: xid=%u rc=%d", __get_str(func_name), __entry->xid, __entry->rc) ) @@ -924,7 +924,7 @@ DECLARE_EVENT_CLASS(smb3_sync_err_class, __entry->ino = ino; __entry->rc = rc; ), - TP_printk("\tino=%lu rc=%d", + TP_printk("ino=%lu rc=%d", __entry->ino, __entry->rc) ) @@ -950,7 +950,7 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class, __entry->xid = xid; __assign_str(func_name); ), - TP_printk("\t%s: xid=%u", + TP_printk("%s: xid=%u", __get_str(func_name), __entry->xid) ) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 7839efe050bf..000cc7f4a3ce 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3444,16 +3444,41 @@ xfs_alloc_read_agf( set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } + #ifdef DEBUG - else if (!xfs_is_shutdown(mp)) { - ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); - ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); - ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); - ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); - ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level)); - ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level)); + /* + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. + */ + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks); + ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount); + ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest); + ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level); + ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); + xfs_trans_brelse(tp, agfbp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } } -#endif +#endif /* DEBUG */ + if (agfbpp) *agfbpp = agfbp; else diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0c47b5c6ca7d..750111634d9f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi( set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); } +#ifdef DEBUG /* - * It's possible for these to be out of sync if - * we are in the middle of a forced shutdown. + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. */ - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - xfs_is_shutdown(pag_mount(pag))); + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount); + ok &= pag->pagi_count == be32_to_cpu(agi->agi_count); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); + xfs_trans_brelse(tp, agibp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } + } +#endif /* DEBUG */ + if (agibpp) *agibpp = agibp; else diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8af83bd161f9..ba5bd6031ece 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit( return error; } -/* - * Push a single buffer on a delwri queue. - * - * The purpose of this function is to submit a single buffer of a delwri queue - * and return with the buffer still on the original queue. - * - * The buffer locking and queue management logic between _delwri_pushbuf() and - * _delwri_queue() guarantee that the buffer cannot be queued to another list - * before returning. - */ -int -xfs_buf_delwri_pushbuf( - struct xfs_buf *bp, - struct list_head *buffer_list) -{ - int error; - - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - - trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); - - xfs_buf_lock(bp); - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); - bp->b_flags |= XBF_WRITE; - xfs_buf_submit(bp); - - /* - * The buffer is now locked, under I/O but still on the original delwri - * queue. Wait for I/O completion, restore the DELWRI_Q flag and - * return with the buffer unlocked and still on the original queue. - */ - error = xfs_buf_iowait(bp); - bp->b_flags |= _XBF_DELWRI_Q; - xfs_buf_unlock(bp); - - return error; -} - void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 9d2ab567cf81..15fc56948346 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); -extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp) { diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 90139e0f3271..7fc54725c5f6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -32,6 +32,61 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_buf_log_item, bli_item); } +static void +xfs_buf_item_get_format( + struct xfs_buf_log_item *bip, + int count) +{ + ASSERT(bip->bli_formats == NULL); + bip->bli_format_count = count; + + if (count == 1) { + bip->bli_formats = &bip->__bli_format; + return; + } + + bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), + GFP_KERNEL | __GFP_NOFAIL); +} + +static void +xfs_buf_item_free_format( + struct xfs_buf_log_item *bip) +{ + if (bip->bli_formats != &bip->__bli_format) { + kfree(bip->bli_formats); + bip->bli_formats = NULL; + } +} + +static void +xfs_buf_item_free( + struct xfs_buf_log_item *bip) +{ + xfs_buf_item_free_format(bip); + kvfree(bip->bli_item.li_lv_shadow); + kmem_cache_free(xfs_buf_item_cache, bip); +} + +/* + * xfs_buf_item_relse() is called when the buf log item is no longer needed. + */ +static void +xfs_buf_item_relse( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + + trace_xfs_buf_item_relse(bp, _RET_IP_); + + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + ASSERT(atomic_read(&bip->bli_refcount) == 0); + + bp->b_log_item = NULL; + xfs_buf_rele(bp); + xfs_buf_item_free(bip); +} + /* Is this log iovec plausibly large enough to contain the buffer log format? */ bool xfs_buf_log_check_iovec( @@ -390,6 +445,42 @@ xfs_buf_item_pin( } /* + * For a stale BLI, process all the necessary completions that must be + * performed when the final BLI reference goes away. The buffer will be + * referenced and locked here - we return to the caller with the buffer still + * referenced and locked for them to finalise processing of the buffer. + */ +static void +xfs_buf_item_finish_stale( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + struct xfs_log_item *lip = &bip->bli_item; + + ASSERT(bip->bli_flags & XFS_BLI_STALE); + ASSERT(xfs_buf_islocked(bp)); + ASSERT(bp->b_flags & XBF_STALE); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(list_empty(&lip->li_trans)); + ASSERT(!bp->b_transp); + + if (bip->bli_flags & XFS_BLI_STALE_INODE) { + xfs_buf_item_done(bp); + xfs_buf_inode_iodone(bp); + ASSERT(list_empty(&bp->b_li_list)); + return; + } + + /* + * We may or may not be on the AIL here, xfs_trans_ail_delete() will do + * the right thing regardless of the situation in which we are called. + */ + xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bip); + ASSERT(bp->b_log_item == NULL); +} + +/* * This is called to unpin the buffer associated with the buf log item which was * previously pinned with a call to xfs_buf_item_pin(). We enter this function * with a buffer pin count, a buffer reference and a BLI reference. @@ -438,13 +529,6 @@ xfs_buf_item_unpin( } if (stale) { - ASSERT(bip->bli_flags & XFS_BLI_STALE); - ASSERT(xfs_buf_islocked(bp)); - ASSERT(bp->b_flags & XBF_STALE); - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); - ASSERT(list_empty(&lip->li_trans)); - ASSERT(!bp->b_transp); - trace_xfs_buf_item_unpin_stale(bip); /* @@ -455,22 +539,7 @@ xfs_buf_item_unpin( * processing is complete. */ xfs_buf_rele(bp); - - /* - * If we get called here because of an IO error, we may or may - * not have the item on the AIL. xfs_trans_ail_delete() will - * take care of that situation. xfs_trans_ail_delete() drops - * the AIL lock. - */ - if (bip->bli_flags & XFS_BLI_STALE_INODE) { - xfs_buf_item_done(bp); - xfs_buf_inode_iodone(bp); - ASSERT(list_empty(&bp->b_li_list)); - } else { - xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - ASSERT(bp->b_log_item == NULL); - } + xfs_buf_item_finish_stale(bip); xfs_buf_relse(bp); return; } @@ -543,43 +612,42 @@ xfs_buf_item_push( * Drop the buffer log item refcount and take appropriate action. This helper * determines whether the bli must be freed or not, since a decrement to zero * does not necessarily mean the bli is unused. - * - * Return true if the bli is freed, false otherwise. */ -bool +void xfs_buf_item_put( struct xfs_buf_log_item *bip) { - struct xfs_log_item *lip = &bip->bli_item; - bool aborted; - bool dirty; + + ASSERT(xfs_buf_islocked(bip->bli_buf)); /* drop the bli ref and return if it wasn't the last one */ if (!atomic_dec_and_test(&bip->bli_refcount)) - return false; + return; - /* - * We dropped the last ref and must free the item if clean or aborted. - * If the bli is dirty and non-aborted, the buffer was clean in the - * transaction but still awaiting writeback from previous changes. In - * that case, the bli is freed on buffer writeback completion. - */ - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || - xlog_is_shutdown(lip->li_log); - dirty = bip->bli_flags & XFS_BLI_DIRTY; - if (dirty && !aborted) - return false; + /* If the BLI is in the AIL, then it is still dirty and in use */ + if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) { + ASSERT(bip->bli_flags & XFS_BLI_DIRTY); + return; + } /* - * The bli is aborted or clean. An aborted item may be in the AIL - * regardless of dirty state. For example, consider an aborted - * transaction that invalidated a dirty bli and cleared the dirty - * state. + * In shutdown conditions, we can be asked to free a dirty BLI that + * isn't in the AIL. This can occur due to a checkpoint aborting a BLI + * instead of inserting it into the AIL at checkpoint IO completion. If + * there's another bli reference (e.g. a btree cursor holds a clean + * reference) and it is released via xfs_trans_brelse(), we can get here + * with that aborted, dirty BLI. In this case, it is safe to free the + * dirty BLI immediately, as it is not in the AIL and there are no + * other references to it. + * + * We should never get here with a stale BLI via that path as + * xfs_trans_brelse() specifically holds onto stale buffers rather than + * releasing them. */ - if (aborted) - xfs_trans_ail_delete(lip, 0); - xfs_buf_item_relse(bip->bli_buf); - return true; + ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) || + test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags)); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + xfs_buf_item_relse(bip); } /* @@ -600,6 +668,15 @@ xfs_buf_item_put( * if necessary but do not unlock the buffer. This is for support of * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't * free the item. + * + * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must* + * perform a completion abort of any objects attached to the buffer for IO + * tracking purposes. This generally only happens in shutdown situations, + * normally xfs_buf_item_unpin() will drop the last BLI reference and perform + * completion processing. However, because transaction completion can race with + * checkpoint completion during a shutdown, this release context may end up + * being the last active reference to the BLI and so needs to perform this + * cleanup. */ STATIC void xfs_buf_item_release( @@ -607,18 +684,19 @@ xfs_buf_item_release( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - bool released; bool hold = bip->bli_flags & XFS_BLI_HOLD; bool stale = bip->bli_flags & XFS_BLI_STALE; -#if defined(DEBUG) || defined(XFS_WARN) - bool ordered = bip->bli_flags & XFS_BLI_ORDERED; - bool dirty = bip->bli_flags & XFS_BLI_DIRTY; bool aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags); + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; +#if defined(DEBUG) || defined(XFS_WARN) + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; #endif trace_xfs_buf_item_release(bip); + ASSERT(xfs_buf_islocked(bp)); + /* * The bli dirty state should match whether the blf has logged segments * except for ordered buffers, where only the bli should be dirty. @@ -634,16 +712,56 @@ xfs_buf_item_release( bp->b_transp = NULL; bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); + /* If there are other references, then we have nothing to do. */ + if (!atomic_dec_and_test(&bip->bli_refcount)) + goto out_release; + + /* + * Stale buffer completion frees the BLI, unlocks and releases the + * buffer. Neither the BLI or buffer are safe to reference after this + * call, so there's nothing more we need to do here. + * + * If we get here with a stale buffer and references to the BLI remain, + * we must not unlock the buffer as the last BLI reference owns lock + * context, not us. + */ + if (stale) { + xfs_buf_item_finish_stale(bip); + xfs_buf_relse(bp); + ASSERT(!hold); + return; + } + /* - * Unref the item and unlock the buffer unless held or stale. Stale - * buffers remain locked until final unpin unless the bli is freed by - * the unref call. The latter implies shutdown because buffer - * invalidation dirties the bli and transaction. + * Dirty or clean, aborted items are done and need to be removed from + * the AIL and released. This frees the BLI, but leaves the buffer + * locked and referenced. */ - released = xfs_buf_item_put(bip); - if (hold || (stale && !released)) + if (aborted || xlog_is_shutdown(lip->li_log)) { + ASSERT(list_empty(&bip->bli_buf->b_li_list)); + xfs_buf_item_done(bp); + goto out_release; + } + + /* + * Clean, unreferenced BLIs can be immediately freed, leaving the buffer + * locked and referenced. + * + * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback. + */ + if (!dirty) + xfs_buf_item_relse(bip); + else + ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags)); + + /* Not safe to reference the BLI from here */ +out_release: + /* + * If we get here with a stale buffer, we must not unlock the + * buffer as the last BLI reference owns lock context, not us. + */ + if (stale || hold) return; - ASSERT(!stale || aborted); xfs_buf_relse(bp); } @@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_push = xfs_buf_item_push, }; -STATIC void -xfs_buf_item_get_format( - struct xfs_buf_log_item *bip, - int count) -{ - ASSERT(bip->bli_formats == NULL); - bip->bli_format_count = count; - - if (count == 1) { - bip->bli_formats = &bip->__bli_format; - return; - } - - bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), - GFP_KERNEL | __GFP_NOFAIL); -} - -STATIC void -xfs_buf_item_free_format( - struct xfs_buf_log_item *bip) -{ - if (bip->bli_formats != &bip->__bli_format) { - kfree(bip->bli_formats); - bip->bli_formats = NULL; - } -} - /* * Allocate a new buf log item to go with the given buffer. * Set the buffer's b_log_item field to point to the new @@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format( return false; } -STATIC void -xfs_buf_item_free( - struct xfs_buf_log_item *bip) -{ - xfs_buf_item_free_format(bip); - kvfree(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_cache, bip); -} - -/* - * xfs_buf_item_relse() is called when the buf log item is no longer needed. - */ -void -xfs_buf_item_relse( - struct xfs_buf *bp) -{ - struct xfs_buf_log_item *bip = bp->b_log_item; - - trace_xfs_buf_item_relse(bp, _RET_IP_); - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); - - if (atomic_read(&bip->bli_refcount)) - return; - bp->b_log_item = NULL; - xfs_buf_rele(bp); - xfs_buf_item_free(bip); -} - void xfs_buf_item_done( struct xfs_buf *bp) @@ -1023,5 +1086,5 @@ xfs_buf_item_done( xfs_trans_ail_delete(&bp->b_log_item->bli_item, (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : SHUTDOWN_CORRUPT_INCORE); - xfs_buf_item_relse(bp); + xfs_buf_item_relse(bp->b_log_item); } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index e10e324cd245..416890b84f8c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -49,8 +49,7 @@ struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_done(struct xfs_buf *bp); -void xfs_buf_item_relse(struct xfs_buf *); -bool xfs_buf_item_put(struct xfs_buf_log_item *); +void xfs_buf_item_put(struct xfs_buf_log_item *bip); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_inode_iodone(struct xfs_buf *); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4e32f0860b7..0bd8022e47b4 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1398,11 +1398,9 @@ xfs_qm_dqflush( ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); + ASSERT(atomic_read(&dqp->q_pincount) == 0); trace_xfs_dqflush(dqp); - - xfs_qm_dqunpin_wait(dqp); - fa = xfs_qm_dqflush_check(dqp); if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 48254a72071b..0b41b18debf3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range( } #define XFS_FALLOC_FL_SUPPORTED \ - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) + (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \ + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) STATIC long __xfs_file_fallocate( diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 726e29b837e6..bbc2f2973dcc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -979,7 +979,15 @@ xfs_reclaim_inode( */ if (xlog_is_shutdown(ip->i_mount->m_log)) { xfs_iunpin_wait(ip); + /* + * Avoid a ABBA deadlock on the inode cluster buffer vs + * concurrent xfs_ifree_cluster() trying to mark the inode + * stale. We don't need the inode locked to run the flush abort + * code, but the flush abort needs to lock the cluster buffer. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iflush_shutdown_abort(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); goto reclaim; } if (xfs_ipincount(ip)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ee3e0f284287..761a996a857c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1635,7 +1635,7 @@ retry: iip = ip->i_itemp; if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { ASSERT(!list_empty(&iip->ili_item.li_bio_list)); - ASSERT(iip->ili_last_fields); + ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log)); goto out_iunlock; } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c6cb0b6b9e46..285e27ff89e2 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -758,11 +758,14 @@ xfs_inode_item_push( * completed and items removed from the AIL before the next push * attempt. */ + trace_xfs_inode_push_stale(ip, _RET_IP_); return XFS_ITEM_PINNED; } - if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) { + trace_xfs_inode_push_pinned(ip, _RET_IP_); return XFS_ITEM_PINNED; + } if (xfs_iflags_test(ip, XFS_IFLUSHING)) return XFS_ITEM_FLUSHING; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f66d2d430e4f..a80cb6b9969a 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -793,8 +793,10 @@ xlog_cil_ail_insert( struct xfs_log_item *lip = lv->lv_item; xfs_lsn_t item_lsn; - if (aborted) + if (aborted) { + trace_xlog_ail_insert_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { lip->li_ops->iop_release(lip); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 08443ceec329..866c71d9fbae 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -320,7 +320,7 @@ xfs_mru_cache_create( xfs_mru_cache_free_func_t free_func) { struct xfs_mru_cache *mru = NULL; - int err = 0, grp; + int grp; unsigned int grp_time; if (mrup) @@ -341,8 +341,8 @@ xfs_mru_cache_create( mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists), GFP_KERNEL | __GFP_NOFAIL); if (!mru->lists) { - err = -ENOMEM; - goto exit; + kfree(mru); + return -ENOMEM; } for (grp = 0; grp < mru->grp_count; grp++) @@ -361,14 +361,7 @@ xfs_mru_cache_create( mru->free_func = free_func; mru->data = data; *mrup = mru; - -exit: - if (err && mru && mru->lists) - kfree(mru->lists); - if (err && mru) - kfree(mru); - - return err; + return 0; } /* @@ -425,10 +418,6 @@ xfs_mru_cache_insert( { int error = -EINVAL; - ASSERT(mru && mru->lists); - if (!mru || !mru->lists) - goto out_free; - error = -ENOMEM; if (radix_tree_preload(GFP_KERNEL)) goto out_free; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 417439b58785..fa135ac26471 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -134,6 +134,7 @@ xfs_qm_dqpurge( dqp->q_flags |= XFS_DQFLAG_FREEING; + xfs_qm_dqunpin_wait(dqp); xfs_dqflock(dqp); /* @@ -465,6 +466,7 @@ xfs_qm_dquot_isolate( struct xfs_dquot *dqp = container_of(item, struct xfs_dquot, q_lru); struct xfs_qm_isolate *isol = arg; + enum lru_status ret = LRU_SKIP; if (!xfs_dqlock_nowait(dqp)) goto out_miss_busy; @@ -478,6 +480,16 @@ xfs_qm_dquot_isolate( goto out_miss_unlock; /* + * If the dquot is pinned or dirty, rotate it to the end of the LRU to + * give some time for it to be cleaned before we try to isolate it + * again. + */ + ret = LRU_ROTATE; + if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) { + goto out_miss_unlock; + } + + /* * This dquot has acquired a reference in the meantime remove it from * the freelist and try again. */ @@ -492,41 +504,14 @@ xfs_qm_dquot_isolate( } /* - * If the dquot is dirty, flush it. If it's already being flushed, just - * skip it so there is time for the IO to complete before we try to - * reclaim it again on the next LRU pass. + * The dquot may still be under IO, in which case the flush lock will be + * held. If we can't get the flush lock now, just skip over the dquot as + * if it was dirty. */ if (!xfs_dqflock_nowait(dqp)) goto out_miss_unlock; - if (XFS_DQ_IS_DIRTY(dqp)) { - struct xfs_buf *bp = NULL; - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* we have to drop the LRU lock to flush the dquot */ - spin_unlock(&lru->lock); - - error = xfs_dquot_use_attached_buf(dqp, &bp); - if (!bp || error == -EAGAIN) { - xfs_dqfunlock(dqp); - goto out_unlock_dirty; - } - - /* - * dqflush completes dqflock on error, and the delwri ioend - * does it on success. - */ - error = xfs_qm_dqflush(dqp, bp); - if (error) - goto out_unlock_dirty; - - xfs_buf_delwri_queue(bp, &isol->buffers); - xfs_buf_relse(bp); - goto out_unlock_dirty; - } - + ASSERT(!XFS_DQ_IS_DIRTY(dqp)); xfs_dquot_detach_buf(dqp); xfs_dqfunlock(dqp); @@ -548,13 +533,7 @@ out_miss_unlock: out_miss_busy: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - return LRU_SKIP; - -out_unlock_dirty: - trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - xfs_dqunlock(dqp); - return LRU_RETRY; + return ret; } static unsigned long @@ -1486,7 +1465,6 @@ xfs_qm_flush_one( struct xfs_dquot *dqp, void *data) { - struct xfs_mount *mp = dqp->q_mount; struct list_head *buffer_list = data; struct xfs_buf *bp = NULL; int error = 0; @@ -1497,34 +1475,8 @@ xfs_qm_flush_one( if (!XFS_DQ_IS_DIRTY(dqp)) goto out_unlock; - /* - * The only way the dquot is already flush locked by the time quotacheck - * gets here is if reclaim flushed it before the dqadjust walk dirtied - * it for the final time. Quotacheck collects all dquot bufs in the - * local delwri queue before dquots are dirtied, so reclaim can't have - * possibly queued it for I/O. The only way out is to push the buffer to - * cycle the flush lock. - */ - if (!xfs_dqflock_nowait(dqp)) { - /* buf is pinned in-core by delwri list */ - error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) - goto out_unlock; - - if (!(bp->b_flags & _XBF_DELWRI_Q)) { - error = -EAGAIN; - xfs_buf_relse(bp); - goto out_unlock; - } - xfs_buf_unlock(bp); - - xfs_buf_delwri_pushbuf(bp, buffer_list); - xfs_buf_rele(bp); - - error = -EAGAIN; - goto out_unlock; - } + xfs_qm_dqunpin_wait(dqp); + xfs_dqflock(dqp); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6484c596ecea..736eb0924573 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom( kfree(nmp); + trace_xfs_growfs_check_rtgeom(mp, min_logfsbs); + if (min_logfsbs > mp->m_sb.sb_logblocks) return -EINVAL; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0bc4b5489078..bb0a82635a77 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2020,14 +2020,13 @@ xfs_remount_rw( int error; if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && - bdev_read_only(mp->m_logdev_targp->bt_bdev)) { + xfs_readonly_buftarg(mp->m_logdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only logdev"); return -EACCES; } - if (mp->m_rtdev_targp && - bdev_read_only(mp->m_rtdev_targp->bt_bdev)) { + if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only rtdev"); return -EACCES; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 01d284a1c759..ba45d801df1c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_iodone_async); @@ -1147,6 +1146,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __field(xfs_ino_t, ino) __field(int, count) __field(int, pincount) + __field(unsigned long, iflags) __field(unsigned long, caller_ip) ), TP_fast_assign( @@ -1154,13 +1154,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __entry->ino = ip->i_ino; __entry->count = atomic_read(&VFS_I(ip)->i_count); __entry->pincount = atomic_read(&ip->i_pincount); + __entry->iflags = ip->i_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, __entry->pincount, + __entry->iflags, (char *)__entry->caller_ip) ) @@ -1250,6 +1252,8 @@ DEFINE_IREF_EVENT(xfs_irele); DEFINE_IREF_EVENT(xfs_inode_pin); DEFINE_IREF_EVENT(xfs_inode_unpin); DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); +DEFINE_IREF_EVENT(xfs_inode_push_pinned); +DEFINE_IREF_EVENT(xfs_inode_push_stale); DECLARE_EVENT_CLASS(xfs_namespace_class, TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), @@ -1654,6 +1658,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); +DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); +DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c6657072361a..b4a07af513ba 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -742,8 +742,10 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (abort) + if (abort) { + trace_xfs_trans_free_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->iop_release) lip->li_ops->iop_release(lip); } diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 80add26c0111..01315ed75502 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -727,7 +727,7 @@ xfs_select_zone( for (;;) { prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE); oz = xfs_select_zone_nowait(mp, write_hint, pack_tight); - if (oz) + if (oz || xfs_is_shutdown(mp)) break; schedule(); } @@ -777,26 +777,6 @@ xfs_mark_rtg_boundary( ioend->io_flags |= IOMAP_IOEND_BOUNDARY; } -static void -xfs_submit_zoned_bio( - struct iomap_ioend *ioend, - struct xfs_open_zone *oz, - bool is_seq) -{ - ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; - ioend->io_private = oz; - atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ - - if (is_seq) { - ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; - ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; - } else { - xfs_mark_rtg_boundary(ioend); - } - - submit_bio(&ioend->io_bio); -} - /* * Cache the last zone written to for an inode so that it is considered first * for subsequent writes. @@ -891,6 +871,26 @@ xfs_zone_cache_create_association( xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); } +static void +xfs_submit_zoned_bio( + struct iomap_ioend *ioend, + struct xfs_open_zone *oz, + bool is_seq) +{ + ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; + ioend->io_private = oz; + atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ + + if (is_seq) { + ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; + ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; + } else { + xfs_mark_rtg_boundary(ioend); + } + + submit_bio(&ioend->io_bio); +} + void xfs_zone_alloc_and_submit( struct iomap_ioend *ioend, diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index b9bccd3ff57f..21a27fd5e198 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -25,7 +25,7 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); -static inline void sha256_choose_blocks( +static __always_inline void sha256_choose_blocks( u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, bool force_generic, bool force_simd) { diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b37860f4a895..6d2c08e81101 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -223,6 +223,9 @@ struct mipi_dsi_multi_context { #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) +extern const struct bus_type mipi_dsi_bus_type; +#define dev_is_mipi_dsi(dev) ((dev)->bus == &mipi_dsi_bus_type) + /** * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any * given pixel format defined by the MIPI DSI diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h index 125f096c88cb..ee9df8cc67b7 100644 --- a/include/drm/spsc_queue.h +++ b/include/drm/spsc_queue.h @@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struct spsc_queue *queue, struct spsc_node *n preempt_disable(); + atomic_inc(&queue->job_count); + smp_mb__after_atomic(); + tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next); WRITE_ONCE(*tail, node); - atomic_inc(&queue->job_count); /* * In case of first element verify new node will be visible to the consumer diff --git a/include/dt-bindings/clock/ast2600-clock.h b/include/dt-bindings/clock/ast2600-clock.h index 7ae96c7bd72f..f60fff261130 100644 --- a/include/dt-bindings/clock/ast2600-clock.h +++ b/include/dt-bindings/clock/ast2600-clock.h @@ -122,6 +122,8 @@ #define ASPEED_RESET_PCIE_DEV_OEN 20 #define ASPEED_RESET_PCIE_RC_O 19 #define ASPEED_RESET_PCIE_RC_OEN 18 +#define ASPEED_RESET_MAC2 12 +#define ASPEED_RESET_MAC1 11 #define ASPEED_RESET_PCI_DP 5 #define ASPEED_RESET_HACE 4 #define ASPEED_RESET_AHB 1 diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 5bded24dc24f..e1634897e159 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -283,6 +283,7 @@ struct ffa_indirect_msg_hdr { u32 offset; u32 send_recv_id; u32 size; + u32 res1; uuid_t uuid; }; diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 028b3e00378e..15c35655f482 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -183,6 +183,12 @@ #define BCM_LED_MULTICOLOR_PROGRAM 0xa /* + * Broadcom Synchronous Ethernet Controls (expansion register 0x0E) + */ +#define BCM_EXP_SYNC_ETHERNET (MII_BCM54XX_EXP_SEL_ER + 0x0E) +#define BCM_EXP_SYNC_ETHERNET_MII_LITE BIT(11) + +/* * BCM5482: Shadow registers * Shadow values go into bits [14:10] of register 0x1c to select a shadow * register to access. diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 96a3a0d6a60e..6378370a952f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -82,6 +82,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 59877fd2a1d3..de5bd76a400c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -865,9 +865,6 @@ struct kernel_ethtool_ts_info { * @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*. * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. - * @cap_rss_ctx_supported: indicates if the driver supports RSS - * contexts via legacy API, drivers implementing @create_rxfh_context - * do not have to set this bit. * @rxfh_per_ctx_fields: device supports selecting different header fields * for Rx hash calculation and RSS for each additional context. * @rxfh_per_ctx_key: device supports setting different RSS key for each @@ -1100,7 +1097,6 @@ struct kernel_ethtool_ts_info { struct ethtool_ops { u32 supported_input_xfrm:8; u32 cap_link_lanes_supported:1; - u32 cap_rss_ctx_supported:1; u32 rxfh_per_ctx_fields:1; u32 rxfh_per_ctx_key:1; u32 cap_rss_rxnfc_adds:1; diff --git a/include/linux/fs.h b/include/linux/fs.h index b085f161ed22..040c0036320f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3608,6 +3608,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; diff --git a/include/linux/futex.h b/include/linux/futex.h index 005b040c4791..b37193653e6b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -89,6 +89,7 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; mutex_init(&mm->futex_hash_lock); } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 120de474a8bf..ea95c4a60fa6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2024 Intel Corporation + * Copyright (c) 2018 - 2025 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -2837,11 +2837,12 @@ static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000 -#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 -#define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 -#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 -#define IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP 3 -#define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP 4 +#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 +#define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 +#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 +#define IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP 3 +#define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD 4 +#define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP 8 /** * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field @@ -2859,13 +2860,31 @@ struct ieee80211_he_6ghz_oper { #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ 2 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ 3 #define IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON 0x4 -#define IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO 0x38 +#define IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO 0x78 u8 control; u8 ccfs0; u8 ccfs1; u8 minrate; } __packed; +/** + * enum ieee80211_reg_conn_bits - represents Regulatory connectivity field bits. + * + * This enumeration defines bit flags used to represent regulatory connectivity + * field bits. + * + * @IEEE80211_REG_CONN_LPI_VALID: Indicates whether the LPI bit is valid. + * @IEEE80211_REG_CONN_LPI_VALUE: Represents the value of the LPI bit. + * @IEEE80211_REG_CONN_SP_VALID: Indicates whether the SP bit is valid. + * @IEEE80211_REG_CONN_SP_VALUE: Represents the value of the SP bit. + */ +enum ieee80211_reg_conn_bits { + IEEE80211_REG_CONN_LPI_VALID = BIT(0), + IEEE80211_REG_CONN_LPI_VALUE = BIT(1), + IEEE80211_REG_CONN_SP_VALID = BIT(2), + IEEE80211_REG_CONN_SP_VALUE = BIT(3), +}; + /* transmit power interpretation type of transmit power envelope element */ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_LOCAL_EIRP, @@ -3847,6 +3866,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, WLAN_EID_EXT_FILS_NONCE = 13, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE = 14, + WLAN_EID_EXT_DH_PARAMETER = 32, WLAN_EID_EXT_HE_CAPABILITY = 35, WLAN_EID_EXT_HE_OPERATION = 36, WLAN_EID_EXT_UORA = 37, @@ -3870,6 +3890,8 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_CAPABILITY = 108, WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, + WLAN_EID_EXT_KNOWN_STA_IDENTIFCATION = 136, + WLAN_EID_EXT_NON_AP_STA_REG_CON = 137, }; /* Action category code */ @@ -5333,6 +5355,13 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) return get_unaligned_le16(common); } +/* Defined in Figure 9-1074t in P802.11be_D7.0 */ +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE 0x0001 +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_RECO_MAX_LINKS_MASK 0x001e +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE 0x0020 +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK 0x0040 +#define IEEE80211_EHT_ML_EXT_MLD_CAPA_BTM_MLD_RECO_MULTI_AP 0x0080 + /** * ieee80211_mle_get_ext_mld_capa_op - returns the extended MLD capabilities * and operations. diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 93a73c076d16..fbd424b2abb1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,6 +28,7 @@ extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_ignore_percpu(const void __percpu *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, @@ -97,6 +98,9 @@ static inline void kmemleak_not_leak(const void *ptr) static inline void kmemleak_transient_leak(const void *ptr) { } +static inline void kmemleak_ignore_percpu(const void __percpu *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 939e58c2f386..86055d55836d 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,7 +40,7 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) -#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 0 +#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 16 #define MLX5_FS_MAX_POOL_SIZE BIT(30) enum mlx5_flow_destination_type { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2c09df4ee574..0e93f342be09 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -12502,17 +12502,6 @@ struct mlx5_ifc_affiliated_event_header_bits { }; enum { - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = BIT_ULL(0x24), -}; - -enum { - MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = BIT_ULL(0x13), -}; - -enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, @@ -12520,10 +12509,29 @@ enum { MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL = 0x53, + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT = 0x58, MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; enum { + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_IPSEC), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_SAMPLER), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO), +}; + +enum { + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL - 0x40), + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT = + BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT - 0x40), +}; + +enum { MLX5_IPSEC_OBJECT_ICV_LEN_16B, }; @@ -13279,4 +13287,41 @@ struct mlx5_ifc_mrtcq_reg_bits { u8 reserved_at_80[0x180]; }; +struct mlx5_ifc_pcie_cong_event_obj_bits { + u8 modify_select_field[0x40]; + + u8 inbound_event_en[0x1]; + u8 outbound_event_en[0x1]; + u8 reserved_at_42[0x1e]; + + u8 reserved_at_60[0x1]; + u8 inbound_cong_state[0x3]; + u8 reserved_at_64[0x1]; + u8 outbound_cong_state[0x3]; + u8 reserved_at_68[0x18]; + + u8 inbound_cong_low_threshold[0x10]; + u8 inbound_cong_high_threshold[0x10]; + + u8 outbound_cong_low_threshold[0x10]; + u8 outbound_cong_high_threshold[0x10]; + + u8 reserved_at_e0[0x340]; +}; + +struct mlx5_ifc_pcie_cong_event_cmd_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_pcie_cong_event_obj_bits cong_obj; +}; + +struct mlx5_ifc_pcie_cong_event_cmd_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits hdr; + struct mlx5_ifc_pcie_cong_event_obj_bits cong_obj; +}; + +enum mlx5e_pcie_cong_event_mod_field { + MLX5_PCIE_CONG_EVENT_MOD_EVENT_EN = BIT(0), + MLX5_PCIE_CONG_EVENT_MOD_THRESH = BIT(2), +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index e8462deda6db..f0aa098a395f 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -237,6 +237,9 @@ * @last_data_desc - last DMA desc in data channel (tx/rx). * @last_cmd_desc - last DMA desc in command channel. * @txn_done - completion for NAND transfer. + * @bam_ce_nitems - the number of elements in the @bam_ce array + * @cmd_sgl_nitems - the number of elements in the @cmd_sgl array + * @data_sgl_nitems - the number of elements in the @data_sgl array * @bam_ce_pos - the index in bam_ce which is available for next sgl * @bam_ce_start - the index in bam_ce which marks the start position ce * for current sgl. It will be used for size calculation @@ -255,6 +258,11 @@ struct bam_transaction { struct dma_async_tx_descriptor *last_data_desc; struct dma_async_tx_descriptor *last_cmd_desc; struct completion txn_done; + + unsigned int bam_ce_nitems; + unsigned int cmd_sgl_nitems; + unsigned int data_sgl_nitems; + struct_group(bam_positions, u32 bam_ce_pos; u32 bam_ce_start; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5847c20994d3..a80d21a14612 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3332,8 +3332,8 @@ int dev_get_iflink(const struct net_device *dev); int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, struct net_device_path_stack *stack); -struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, - unsigned short mask); +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, + unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 065c17385e53..f43f075852c0 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -265,21 +265,20 @@ struct netfs_io_request { bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ -#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ -#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ -#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ +#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */ +#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */ +#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */ +#define NETFS_RREQ_FAILED 3 /* The request failed */ +#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */ +#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */ +#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ -#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ -#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; - void (*cleanup)(struct netfs_io_request *req); }; /* diff --git a/include/linux/phy.h b/include/linux/phy.h index 74c1bcf64b3c..4c2b8b6e7187 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -106,6 +106,7 @@ extern const int phy_basic_ports_array[3]; * @PHY_INTERFACE_MODE_50GBASER: 50GBase-R - with Clause 134 FEC * @PHY_INTERFACE_MODE_LAUI: 50 Gigabit Attachment Unit Interface * @PHY_INTERFACE_MODE_100GBASEP: 100GBase-P - with Clause 134 FEC + * @PHY_INTERFACE_MODE_MIILITE: MII-Lite - MII without RXER TXER CRS COL * @PHY_INTERFACE_MODE_MAX: Book keeping * * Describes the interface between the MAC and PHY. @@ -150,6 +151,7 @@ typedef enum { PHY_INTERFACE_MODE_50GBASER, PHY_INTERFACE_MODE_LAUI, PHY_INTERFACE_MODE_100GBASEP, + PHY_INTERFACE_MODE_MIILITE, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -272,6 +274,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "laui"; case PHY_INTERFACE_MODE_100GBASEP: return "100gbase-p"; + case PHY_INTERFACE_MODE_MIILITE: + return "mii-lite"; default: return "unknown"; } @@ -409,8 +413,10 @@ struct mii_bus { /** @shared_lock: protect access to the shared element */ struct mutex shared_lock; +#if IS_ENABLED(CONFIG_PHY_PACKAGE) /** @shared: shared state across different PHYs */ struct phy_package_shared *shared[PHY_MAX_ADDR]; +#endif }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) @@ -718,9 +724,11 @@ struct phy_device { /* For use by PHYs to maintain extra state */ void *priv; +#if IS_ENABLED(CONFIG_PHY_PACKAGE) /* shared data pointer */ /* For use by PHYs inside the same package that need a shared state. */ struct phy_package_shared *shared; +#endif /* Reporting cable test results */ struct sk_buff *skb; diff --git a/arch/x86/include/asm/amd/fch.h b/include/linux/platform_data/x86/amd-fch.h index 2cf5153edbc2..2cf5153edbc2 100644 --- a/arch/x86/include/asm/amd/fch.h +++ b/include/linux/platform_data/x86/amd-fch.h diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0b3a36bdaa90..0f5f94137f6d 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -594,6 +594,7 @@ struct sev_data_snp_addr { * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the * purpose of guest-assisted migration. * @rsvd: reserved + * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest * @gosvw: guest OS-visible workarounds, as defined by hypervisor */ struct sev_data_snp_launch_start { @@ -603,6 +604,7 @@ struct sev_data_snp_launch_start { u32 ma_en:1; /* In */ u32 imi_en:1; /* In */ u32 rsvd:30; + u32 desired_tsc_khz; /* In */ u8 gosvw[16]; /* In */ } __packed; diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index eced7e9bf69a..3d089bd4d5e9 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -477,40 +477,14 @@ static inline ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, static inline void ptp_read_system_prets(struct ptp_system_timestamp *sts) { - if (sts) { - switch (sts->clockid) { - case CLOCK_REALTIME: - ktime_get_real_ts64(&sts->pre_ts); - break; - case CLOCK_MONOTONIC: - ktime_get_ts64(&sts->pre_ts); - break; - case CLOCK_MONOTONIC_RAW: - ktime_get_raw_ts64(&sts->pre_ts); - break; - default: - break; - } - } + if (sts) + ktime_get_clock_ts64(sts->clockid, &sts->pre_ts); } static inline void ptp_read_system_postts(struct ptp_system_timestamp *sts) { - if (sts) { - switch (sts->clockid) { - case CLOCK_REALTIME: - ktime_get_real_ts64(&sts->post_ts); - break; - case CLOCK_MONOTONIC: - ktime_get_ts64(&sts->post_ts); - break; - case CLOCK_MONOTONIC_RAW: - ktime_get_raw_ts64(&sts->post_ts); - break; - default: - break; - } - } + if (sts) + ktime_get_clock_ts64(sts->clockid, &sts->post_ts); } #endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4f6dcb37bae8..b8b06e71b73e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5265,7 +5265,7 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb) } ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, - ssize_t maxsize, gfp_t gfp); + ssize_t maxsize); #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/linux/soc/amd/isp4_misc.h b/include/linux/soc/amd/isp4_misc.h new file mode 100644 index 000000000000..6738796986a7 --- /dev/null +++ b/include/linux/soc/amd/isp4_misc.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __SOC_ISP4_MISC_H +#define __SOC_ISP4_MISC_H + +#define AMDISP_I2C_ADAP_NAME "AMDISP DesignWare I2C adapter" + +#endif diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4789f91dae94..e9ea43234d9a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include <uapi/linux/spi/spi.h> /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 16 +#define SPI_CS_CNT_MAX 24 struct dma_chan; struct software_node; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1c76c8f2c82..6a3f92098872 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); extern void pm_report_hw_sleep_time(u64 t); extern void pm_report_max_hw_sleep(u64 t); +void pm_restrict_gfp_mask(void); +void pm_restore_gfp_mask(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline void pm_report_hw_sleep_time(u64 t) {}; static inline void pm_report_max_hw_sleep(u64 t) {}; +static inline void pm_restrict_gfp_mask(void) {} +static inline void pm_restore_gfp_mask(void) {} + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 542773650200..4a4c2778abae 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -44,6 +44,7 @@ extern void ktime_get_ts64(struct timespec64 *ts); extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +extern void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); @@ -345,4 +346,13 @@ void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock, extern int update_persistent_clock64(struct timespec64 now); #endif +/* Temporary workaround to avoid merge dependencies and cross tree messes */ +#ifndef CLOCK_AUX +#define CLOCK_AUX MAX_CLOCKS +#define MAX_AUX_CLOCKS 8 +#define CLOCK_AUX_LAST (CLOCK_AUX + MAX_AUX_CLOCKS - 1) + +static inline bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt) { return false; } +#endif + #endif diff --git a/include/linux/usb.h b/include/linux/usb.h index 1b2545b4363b..92c752f5446f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -614,6 +614,7 @@ struct usb3_lpm_parameters { * FIXME -- complete doc * @authenticated: Crypto authentication passed * @tunnel_mode: Connection native or tunneled over USB4 + * @usb4_link: device link to the USB4 host interface * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -724,6 +725,7 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; enum usb_link_tunnel_mode tunnel_mode; + struct device_link *usb4_link; int slot_id; struct usb2_lpm_parameters l1_params; diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index f2da264d9c14..acb0ad03bdac 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -57,6 +57,7 @@ enum { DP_PIN_ASSIGN_D, DP_PIN_ASSIGN_E, DP_PIN_ASSIGN_F, /* Not supported after v1.0b */ + DP_PIN_ASSIGN_MAX, }; /* DisplayPort alt mode specific commands */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 64cb4b04be7a..04b90c88d164 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -11,6 +11,7 @@ #include <linux/gfp.h> #include <linux/dma-mapping.h> #include <linux/completion.h> +#include <linux/virtio_features.h> /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -141,7 +142,9 @@ struct virtio_admin_cmd { * @config: the configuration ops for this device. * @vringh_config: configuration ops for host vrings. * @vqs: the list of virtqueues for this device. - * @features: the features supported by both driver and device. + * @features: the 64 lower features supported by both driver and device. + * @features_array: the full features space supported by both driver and + * device. * @priv: private pointer for the driver's use. * @debugfs_dir: debugfs directory entry. * @debugfs_filter_features: features to be filtered set by debugfs. @@ -159,11 +162,11 @@ struct virtio_device { const struct virtio_config_ops *config; const struct vringh_config_ops *vringh_config; struct list_head vqs; - u64 features; + VIRTIO_DECLARE_FEATURES(features); void *priv; #ifdef CONFIG_VIRTIO_DEBUG struct dentry *debugfs_dir; - u64 debugfs_filter_features; + u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS]; #endif }; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index b3e1d30c765b..918cf25cd3c6 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -77,7 +77,11 @@ struct virtqueue_info { * vdev: the virtio_device * @get_features: get the array of feature bits for this device. * vdev: the virtio_device - * Returns the first 64 feature bits (all we currently need). + * Returns the first 64 feature bits. + * @get_extended_features: + * vdev: the virtio_device + * Returns the first VIRTIO_FEATURES_MAX feature bits (all we currently + * need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device * This sends the driver feature bits to the device: it can change @@ -121,6 +125,8 @@ struct virtio_config_ops { void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); + void (*get_extended_features)(struct virtio_device *vdev, + u64 *features); int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); int (*set_vq_affinity)(struct virtqueue *vq, @@ -147,13 +153,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, static inline bool __virtio_test_bit(const struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - return vdev->features & BIT_ULL(fbit); + return virtio_features_test_bit(vdev->features_array, fbit); } /** @@ -164,13 +164,7 @@ static inline bool __virtio_test_bit(const struct virtio_device *vdev, static inline void __virtio_set_bit(struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - vdev->features |= BIT_ULL(fbit); + virtio_features_set_bit(vdev->features_array, fbit); } /** @@ -181,13 +175,7 @@ static inline void __virtio_set_bit(struct virtio_device *vdev, static inline void __virtio_clear_bit(struct virtio_device *vdev, unsigned int fbit) { - /* Did you forget to fix assumptions on max features? */ - if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 64); - else - BUG_ON(fbit >= 64); - - vdev->features &= ~BIT_ULL(fbit); + virtio_features_clear_bit(vdev->features_array, fbit); } /** @@ -204,6 +192,17 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, return __virtio_test_bit(vdev, fbit); } +static inline void virtio_get_features(struct virtio_device *vdev, + u64 *features) +{ + if (vdev->config->get_extended_features) { + vdev->config->get_extended_features(vdev, features); + return; + } + + virtio_features_from_u64(features, vdev->config->get_features(vdev)); +} + /** * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device diff --git a/include/linux/virtio_features.h b/include/linux/virtio_features.h new file mode 100644 index 000000000000..f748f2f87de8 --- /dev/null +++ b/include/linux/virtio_features.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_FEATURES_H +#define _LINUX_VIRTIO_FEATURES_H + +#include <linux/bits.h> + +#define VIRTIO_FEATURES_DWORDS 2 +#define VIRTIO_FEATURES_MAX (VIRTIO_FEATURES_DWORDS * 64) +#define VIRTIO_FEATURES_WORDS (VIRTIO_FEATURES_DWORDS * 2) +#define VIRTIO_BIT(b) BIT_ULL((b) & 0x3f) +#define VIRTIO_DWORD(b) ((b) >> 6) +#define VIRTIO_DECLARE_FEATURES(name) \ + union { \ + u64 name; \ + u64 name##_array[VIRTIO_FEATURES_DWORDS];\ + } + +static inline bool virtio_features_chk_bit(unsigned int bit) +{ + if (__builtin_constant_p(bit)) { + /* + * Don't care returning the correct value: the build + * will fail before any bad features access + */ + BUILD_BUG_ON(bit >= VIRTIO_FEATURES_MAX); + } else { + if (WARN_ON_ONCE(bit >= VIRTIO_FEATURES_MAX)) + return false; + } + return true; +} + +static inline bool virtio_features_test_bit(const u64 *features, + unsigned int bit) +{ + return virtio_features_chk_bit(bit) && + !!(features[VIRTIO_DWORD(bit)] & VIRTIO_BIT(bit)); +} + +static inline void virtio_features_set_bit(u64 *features, + unsigned int bit) +{ + if (virtio_features_chk_bit(bit)) + features[VIRTIO_DWORD(bit)] |= VIRTIO_BIT(bit); +} + +static inline void virtio_features_clear_bit(u64 *features, + unsigned int bit) +{ + if (virtio_features_chk_bit(bit)) + features[VIRTIO_DWORD(bit)] &= ~VIRTIO_BIT(bit); +} + +static inline void virtio_features_zero(u64 *features) +{ + memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_DWORDS); +} + +static inline void virtio_features_from_u64(u64 *features, u64 from) +{ + virtio_features_zero(features); + features[0] = from; +} + +static inline bool virtio_features_equal(const u64 *f1, const u64 *f2) +{ + int i; + + for (i = 0; i < VIRTIO_FEATURES_DWORDS; ++i) + if (f1[i] != f2[i]) + return false; + return true; +} + +static inline void virtio_features_copy(u64 *to, const u64 *from) +{ + memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_DWORDS); +} + +static inline void virtio_features_andnot(u64 *to, const u64 *f1, const u64 *f2) +{ + int i; + + for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++) + to[i] = f1[i] & ~f2[i]; +} + +#endif diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 02a9f4dc594d..20e0584db1dd 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -47,9 +47,9 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, return 0; } -static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, - const struct virtio_net_hdr *hdr, - bool little_endian) +static inline int __virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian, u8 hdr_gso_type) { unsigned int nh_min_len = sizeof(struct iphdr); unsigned int gso_type = 0; @@ -57,8 +57,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int p_off = 0; unsigned int ip_proto; - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr_gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; ip_proto = IPPROTO_TCP; @@ -84,7 +84,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return -EINVAL; } - if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + if (hdr_gso_type & VIRTIO_NET_HDR_GSO_ECN) gso_type |= SKB_GSO_TCP_ECN; if (hdr->gso_size == 0) @@ -122,7 +122,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!protocol) virtio_net_hdr_set_proto(skb, hdr); - else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) + else if (!virtio_net_hdr_match_proto(protocol, + hdr_gso_type)) return -EINVAL; else skb->protocol = protocol; @@ -153,7 +154,7 @@ retry: } } - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -199,6 +200,13 @@ retry: return 0; } +static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr *hdr, + bool little_endian) +{ + return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type); +} + static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, @@ -242,4 +250,177 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, return 0; } +static inline unsigned int virtio_l3min(bool is_ipv6) +{ + return is_ipv6 ? sizeof(struct ipv6hdr) : sizeof(struct iphdr); +} + +static inline int +virtio_net_hdr_tnl_to_skb(struct sk_buff *skb, + const struct virtio_net_hdr_v1_hash_tunnel *vhdr, + bool tnl_hdr_negotiated, + bool tnl_csum_negotiated, + bool little_endian) +{ + const struct virtio_net_hdr *hdr = (const struct virtio_net_hdr *)vhdr; + unsigned int inner_nh, outer_th, inner_th; + unsigned int inner_l3min, outer_l3min; + u8 gso_inner_type, gso_tunnel_type; + bool outer_isv6, inner_isv6; + int ret; + + gso_tunnel_type = hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL; + if (!gso_tunnel_type) + return virtio_net_hdr_to_skb(skb, hdr, little_endian); + + /* Tunnel not supported/negotiated, but the hdr asks for it. */ + if (!tnl_hdr_negotiated) + return -EINVAL; + + /* Either ipv4 or ipv6. */ + if (gso_tunnel_type == VIRTIO_NET_HDR_GSO_UDP_TUNNEL) + return -EINVAL; + + /* The UDP tunnel must carry a GSO packet, but no UFO. */ + gso_inner_type = hdr->gso_type & ~(VIRTIO_NET_HDR_GSO_ECN | + VIRTIO_NET_HDR_GSO_UDP_TUNNEL); + if (!gso_inner_type || gso_inner_type == VIRTIO_NET_HDR_GSO_UDP) + return -EINVAL; + + /* Rely on csum being present. */ + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + + /* Validate offsets. */ + outer_isv6 = gso_tunnel_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; + inner_isv6 = gso_inner_type == VIRTIO_NET_HDR_GSO_TCPV6; + inner_l3min = virtio_l3min(inner_isv6); + outer_l3min = ETH_HLEN + virtio_l3min(outer_isv6); + + inner_th = __virtio16_to_cpu(little_endian, hdr->csum_start); + inner_nh = le16_to_cpu(vhdr->inner_nh_offset); + outer_th = le16_to_cpu(vhdr->outer_th_offset); + if (outer_th < outer_l3min || + inner_nh < outer_th + sizeof(struct udphdr) || + inner_th < inner_nh + inner_l3min) + return -EINVAL; + + /* Let the basic parsing deal with plain GSO features. */ + ret = __virtio_net_hdr_to_skb(skb, hdr, true, + hdr->gso_type & ~gso_tunnel_type); + if (ret) + return ret; + + /* In case of USO, the inner protocol is still unknown and + * `inner_isv6` is just a guess, additional parsing is needed. + * The previous validation ensures that accessing an ipv4 inner + * network header is safe. + */ + if (gso_inner_type == VIRTIO_NET_HDR_GSO_UDP_L4) { + struct iphdr *iphdr = (struct iphdr *)(skb->data + inner_nh); + + inner_isv6 = iphdr->version == 6; + inner_l3min = virtio_l3min(inner_isv6); + if (inner_th < inner_nh + inner_l3min) + return -EINVAL; + } + + skb_set_inner_protocol(skb, inner_isv6 ? htons(ETH_P_IPV6) : + htons(ETH_P_IP)); + if (hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM) { + if (!tnl_csum_negotiated) + return -EINVAL; + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } + + skb->inner_transport_header = inner_th + skb_headroom(skb); + skb->inner_network_header = inner_nh + skb_headroom(skb); + skb->inner_mac_header = inner_nh + skb_headroom(skb); + skb->transport_header = outer_th + skb_headroom(skb); + skb->encapsulation = 1; + return 0; +} + +/* Checksum-related fields validation for the driver */ +static inline int virtio_net_handle_csum_offload(struct sk_buff *skb, + struct virtio_net_hdr *hdr, + bool tnl_csum_negotiated) +{ + if (!(hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL)) { + if (!(hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID)) + return 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!(hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM)) + return 0; + + /* tunnel csum packets are invalid when the related + * feature has not been negotiated + */ + if (!tnl_csum_negotiated) + return -EINVAL; + skb->csum_level = 1; + return 0; + } + + /* DATA_VALID is mutually exclusive with NEEDS_CSUM, and GSO + * over UDP tunnel requires the latter + */ + if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID) + return -EINVAL; + return 0; +} + +/* + * vlan_hlen always refers to the outermost MAC header. That also + * means it refers to the only MAC header, if the packet does not carry + * any encapsulation. + */ +static inline int +virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, + struct virtio_net_hdr_v1_hash_tunnel *vhdr, + bool tnl_hdr_negotiated, + bool little_endian, + int vlan_hlen) +{ + struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)vhdr; + unsigned int inner_nh, outer_th; + int tnl_gso_type; + int ret; + + tnl_gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM); + if (!tnl_gso_type) + return virtio_net_hdr_from_skb(skb, hdr, little_endian, false, + vlan_hlen); + + /* Tunnel support not negotiated but skb ask for it. */ + if (!tnl_hdr_negotiated) + return -EINVAL; + + /* Let the basic parsing deal with plain GSO features. */ + skb_shinfo(skb)->gso_type &= ~tnl_gso_type; + ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen); + skb_shinfo(skb)->gso_type |= tnl_gso_type; + if (ret) + return ret; + + if (skb->protocol == htons(ETH_P_IPV6)) + hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; + else + hdr->gso_type |= VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) + hdr->flags |= VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM; + + inner_nh = skb->inner_network_header - skb_headroom(skb); + outer_th = skb->transport_header - skb_headroom(skb); + vhdr->inner_nh_offset = cpu_to_le16(inner_nh); + vhdr->outer_th_offset = cpu_to_le16(outer_th); + return 0; +} + #endif /* _LINUX_VIRTIO_NET_H */ diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index c0b1b1ca1163..48bc12d1045b 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -3,6 +3,7 @@ #define _LINUX_VIRTIO_PCI_MODERN_H #include <linux/pci.h> +#include <linux/virtio_config.h> #include <linux/virtio_pci.h> /** @@ -95,10 +96,44 @@ static inline void vp_iowrite64_twopart(u64 val, vp_iowrite32(val >> 32, hi); } -u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev); -u64 vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev); -void vp_modern_set_features(struct virtio_pci_modern_device *mdev, - u64 features); +void +vp_modern_get_driver_extended_features(struct virtio_pci_modern_device *mdev, + u64 *features); +void vp_modern_get_extended_features(struct virtio_pci_modern_device *mdev, + u64 *features); +void vp_modern_set_extended_features(struct virtio_pci_modern_device *mdev, + const u64 *features); + +static inline u64 +vp_modern_get_features(struct virtio_pci_modern_device *mdev) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + + vp_modern_get_extended_features(mdev, features_array); + return features_array[0]; +} + +static inline u64 +vp_modern_get_driver_features(struct virtio_pci_modern_device *mdev) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + int i; + + vp_modern_get_driver_extended_features(mdev, features_array); + for (i = 1; i < VIRTIO_FEATURES_DWORDS; ++i) + WARN_ON_ONCE(features_array[i]); + return features_array[0]; +} + +static inline void +vp_modern_set_features(struct virtio_pci_modern_device *mdev, u64 features) +{ + u64 features_array[VIRTIO_FEATURES_DWORDS]; + + virtio_features_from_u64(features_array, features); + vp_modern_set_extended_features(mdev, features_array); +} + u32 vp_modern_generation(struct virtio_pci_modern_device *mdev); u8 vp_modern_get_status(struct virtio_pci_modern_device *mdev); void vp_modern_set_status(struct virtio_pci_modern_device *mdev, diff --git a/include/net/act_api.h b/include/net/act_api.h index 404df8557f6a..04781c92b43d 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -170,14 +170,12 @@ static inline void tc_action_net_exit(struct list_head *net_list, { struct net *net; - rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { struct tc_action_net *tn = net_generic(net, id); tcf_idrinfo_destroy(tn->ops, tn->idrinfo); kfree(tn->idrinfo); } - rtnl_unlock(); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1af1841b7601..34f53dde65ce 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -47,6 +47,8 @@ struct unix_sock { #define peer_wait peer_wq.wait wait_queue_entry_t peer_wake; struct scm_stat scm_stat; + int inq_len; + bool recvmsg_inq; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) struct sk_buff *oob_skb; #endif diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d56e6e135158..d40e978126e3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -243,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -#ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; +#ifdef CONFIG_BPF_SYSCALL int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9fc8f544e20e..0da011fc8146 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1350,8 +1350,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || - c->state != state) + if (c->type != BIS_LINK || c->state != state) continue; if (handle == c->iso_qos.bcast.big) { diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 18687ccf0638..022b122a9fb6 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -77,6 +77,7 @@ enum { BOND_OPT_NS_TARGETS, BOND_OPT_PRIO, BOND_OPT_COUPLED_CONTROL, + BOND_OPT_BROADCAST_NEIGH, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 95f67b308c19..e06f0d63b2c1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -115,6 +115,8 @@ static inline int is_netpoll_tx_blocked(struct net_device *dev) #define is_netpoll_tx_blocked(dev) (0) #endif +DECLARE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled); + struct bond_params { int mode; int xmit_policy; @@ -149,6 +151,7 @@ struct bond_params { struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; #endif int coupled_control; + int broadcast_neighbor; /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4a092da3a9de..6ec9a8865b8b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2748,15 +2748,16 @@ struct cfg80211_scan_6ghz_params { * @wiphy: the wiphy this was for * @scan_start: time (in jiffies) when the scan started * @wdev: the wireless device to scan for - * @info: (internal) information about completed scan - * @notified: (internal) scan request was notified as done or aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band * @mac_addr: MAC address used with randomisation * @mac_addr_mask: MAC address mask used with randomisation, bits that * are 0 in the mask should be randomised, bits that are 1 should * be taken from the @mac_addr * @scan_6ghz: relevant for split scan request only, - * true if this is the second scan request + * true if this is a 6 GHz scan request + * @first_part: %true if this is the first part of a split scan request or a + * scan that was not split. May be %true for a @scan_6ghz scan if no other + * channels were requested * @n_6ghz_params: number of 6 GHz params * @scan_6ghz_params: 6 GHz params * @bssid: BSSID to scan for (most commonly, the wildcard BSSID) @@ -2780,14 +2781,11 @@ struct cfg80211_scan_request { u8 mac_addr[ETH_ALEN] __aligned(2); u8 mac_addr_mask[ETH_ALEN] __aligned(2); u8 bssid[ETH_ALEN] __aligned(2); - - /* internal */ struct wiphy *wiphy; unsigned long scan_start; - struct cfg80211_scan_info info; - bool notified; bool no_cck; bool scan_6ghz; + bool first_part; u32 n_6ghz_params; struct cfg80211_scan_6ghz_params *scan_6ghz_params; s8 tsf_report_link_id; diff --git a/include/net/devlink.h b/include/net/devlink.h index d0ce5a7e984c..93640a29427c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -425,6 +425,7 @@ enum devlink_param_type { DEVLINK_PARAM_TYPE_U8 = DEVLINK_VAR_ATTR_TYPE_U8, DEVLINK_PARAM_TYPE_U16 = DEVLINK_VAR_ATTR_TYPE_U16, DEVLINK_PARAM_TYPE_U32 = DEVLINK_VAR_ATTR_TYPE_U32, + DEVLINK_PARAM_TYPE_U64 = DEVLINK_VAR_ATTR_TYPE_U64, DEVLINK_PARAM_TYPE_STRING = DEVLINK_VAR_ATTR_TYPE_STRING, DEVLINK_PARAM_TYPE_BOOL = DEVLINK_VAR_ATTR_TYPE_FLAG, }; @@ -433,6 +434,7 @@ union devlink_param_value { u8 vu8; u16 vu16; u32 vu32; + u64 vu64; char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE]; bool vbool; }; @@ -523,6 +525,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, + DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -584,6 +587,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME "enable_phc" #define DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_CLOCK_ID_NAME "clock_id" +#define DEVLINK_PARAM_GENERIC_CLOCK_ID_TYPE DEVLINK_PARAM_TYPE_U64 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcd5969bb559..577fd6a8c372 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2852,8 +2852,6 @@ struct ieee80211_txq { * * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT * and connecting with a lower bandwidth instead - * @IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ: HW requires disabling puncturing in - * EHT in 5 GHz and connecting with a lower bandwidth instead * * @IEEE80211_HW_HANDLES_QUIET_CSA: HW/driver handles quieting for CSA, so * no need to stop queues. This really should be set by a driver that @@ -2923,7 +2921,6 @@ enum ieee80211_hw_flags { IEEE80211_HW_DETECTS_COLOR_COLLISION, IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX, IEEE80211_HW_DISALLOW_PUNCTURING, - IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ, IEEE80211_HW_HANDLES_QUIET_CSA, IEEE80211_HW_STRICT, @@ -4313,6 +4310,8 @@ struct ieee80211_prep_tx_info { * @mgd_complete_tx: Notify the driver that the response frame for a previously * transmitted frame announced with @mgd_prepare_tx was received, the data * is filled similarly to @mgd_prepare_tx though the duration is not used. + * Note that this isn't always called for each mgd_prepare_tx() call, for + * example for SAE the 'confirm' messages can be on the air in any order. * * @mgd_protect_tdls_discover: Protect a TDLS discovery session. After sending * a TDLS discovery-request, we expect a reply to arrive on the AP's @@ -4477,6 +4476,8 @@ struct ieee80211_prep_tx_info { * new links bitmaps may be 0 if going from/to a non-MLO situation. * The @old array contains pointers to the old bss_conf structures * that were already removed, in case they're needed. + * Note that removal of link should always succeed, so the return value + * will be ignored in a removal only case. * This callback can sleep. * @change_sta_links: Change the valid links of a station, similar to * @change_vif_links. This callback can sleep. diff --git a/include/net/mctp.h b/include/net/mctp.h index 07d458990113..ac4f4ecdfc24 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -183,8 +183,8 @@ struct mctp_sk_key { struct mctp_skb_cb { unsigned int magic; unsigned int net; - int ifindex; /* extended/direct addressing if set */ - mctp_eid_t src; + /* fields below provide extended addressing for ingress to recvmsg() */ + int ifindex; unsigned char halen; unsigned char haddr[MAX_ADDR_LEN]; }; @@ -222,6 +222,8 @@ struct mctp_flow { struct mctp_sk_key *key; }; +struct mctp_dst; + /* Route definition. * * These are held in the pernet->mctp.routes list, with RCU protection for @@ -229,16 +231,25 @@ struct mctp_flow { * dropped on NETDEV_UNREGISTER events. * * Updates to the route table are performed under rtnl; all reads under RCU, - * so routes cannot be referenced over a RCU grace period. Specifically: A - * caller cannot block between mctp_route_lookup and mctp_route_release() + * so routes cannot be referenced over a RCU grace period. */ struct mctp_route { mctp_eid_t min, max; unsigned char type; + unsigned int mtu; - struct mctp_dev *dev; - int (*output)(struct mctp_route *route, + + enum { + MCTP_ROUTE_DIRECT, + MCTP_ROUTE_GATEWAY, + } dst_type; + union { + struct mctp_dev *dev; + struct mctp_fq_addr gateway; + }; + + int (*output)(struct mctp_dst *dst, struct sk_buff *skb); struct list_head list; @@ -246,12 +257,35 @@ struct mctp_route { struct rcu_head rcu; }; +/* Route lookup result: dst. Represents the results of a routing decision, + * but is only held over the individual routing operation. + * + * Will typically be stored on the caller stack, and must be released after + * usage. + */ +struct mctp_dst { + struct mctp_dev *dev; + unsigned int mtu; + mctp_eid_t nexthop; + + /* set for direct addressing */ + unsigned char halen; + unsigned char haddr[MAX_ADDR_LEN]; + + int (*output)(struct mctp_dst *dst, struct sk_buff *skb); +}; + +int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, + unsigned char halen, const unsigned char *haddr); + /* route interfaces */ -struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, - mctp_eid_t daddr); +int mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr, struct mctp_dst *dst); + +void mctp_dst_release(struct mctp_dst *dst); /* always takes ownership of skb */ -int mctp_local_output(struct sock *sk, struct mctp_route *rt, +int mctp_local_output(struct sock *sk, struct mctp_dst *dst, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); void mctp_key_unref(struct mctp_sk_key *key); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 3c88d5bc5eed..d38783a2ce57 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -60,15 +60,6 @@ enum { #include <net/neighbour.h> -/* Set to 3 to get tracing... */ -#define ND_DEBUG 1 - -#define ND_PRINTK(val, level, fmt, ...) \ -do { \ - if (val <= ND_DEBUG) \ - net_##level##_ratelimited(fmt, ##__VA_ARGS__); \ -} while (0) - struct ctl_table; struct inet6_dev; struct net_device; diff --git a/include/net/netmem.h b/include/net/netmem.h index 7a1dafa3f080..de1d95f04076 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -139,10 +139,9 @@ static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) return (__force netmem_ref)((unsigned long)niov | NET_IOV); } -static inline netmem_ref page_to_netmem(const struct page *page) -{ - return (__force netmem_ref)page; -} +#define page_to_netmem(p) (_Generic((p), \ + const struct page * : (__force const netmem_ref)(p), \ + struct page * : (__force netmem_ref)(p))) /** * virt_to_netmem - convert virtual memory pointer to a netmem reference diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 773fc65780b5..db180626be06 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -444,12 +444,7 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - dma_addr_t ret = page->dma_addr; - - if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) - ret <<= PAGE_SHIFT; - - return ret; + return page_pool_get_dma_addr_netmem(page_to_netmem(page)); } static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d7b7b6cd4aa1..8a75c73fc555 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); @@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, return true; } +static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} + +static inline unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + unsigned int len; + + skb = sch->ops->peek(sch); + if (unlikely(skb == NULL)) { + qdisc_warn_nonwc("qdisc_peek_len", sch); + return 0; + } + len = qdisc_pkt_len(skb); + + return len; +} + #endif diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index e8bd6ddb7b12..ce587a225661 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -84,6 +84,7 @@ struct xdp_sock { struct list_head map_list; /* Protects map_list */ spinlock_t map_list_lock; + u32 max_tx_budget; /* Protects multiple processes in the control path */ struct mutex mutex; struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 333d2e38dd2c..73e96ccbe830 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -50,12 +50,14 @@ #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ - EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_collect, "COLLECT") \ EM(netfs_rreq_trace_complete, "COMPLET") \ + EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ + EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ @@ -63,13 +65,15 @@ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ + EM(netfs_rreq_trace_unpause, "UNPAUSE") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ - EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ - EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \ + EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \ + EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \ + EM(netfs_rreq_trace_waited_ip, "DONE-IP") \ + EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \ + EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \ EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \ - EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \ - EM(netfs_rreq_trace_unpause, "UNPAUSE") \ E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ @@ -82,6 +86,7 @@ E_(NETFS_WRITE_TO_CACHE, "WRIT") #define netfs_sreq_traces \ + EM(netfs_sreq_trace_abandoned, "ABNDN") \ EM(netfs_sreq_trace_add_donations, "+DON ") \ EM(netfs_sreq_trace_added, "ADD ") \ EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \ @@ -89,6 +94,7 @@ EM(netfs_sreq_trace_cache_write, "CA-WR") \ EM(netfs_sreq_trace_cancel, "CANCL") \ EM(netfs_sreq_trace_clear, "CLEAR") \ + EM(netfs_sreq_trace_consumed, "CONSM") \ EM(netfs_sreq_trace_discard, "DSCRD") \ EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ EM(netfs_sreq_trace_donate_to_next, "DON-N") \ @@ -96,7 +102,12 @@ EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_free, "FREE ") \ EM(netfs_sreq_trace_hit_eof, "EOF ") \ - EM(netfs_sreq_trace_io_progress, "IO ") \ + EM(netfs_sreq_trace_io_bad, "I-BAD") \ + EM(netfs_sreq_trace_io_malformed, "I-MLF") \ + EM(netfs_sreq_trace_io_unknown, "I-UNK") \ + EM(netfs_sreq_trace_io_progress, "I-OK ") \ + EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \ + EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_need_clear, "N-CLR") \ EM(netfs_sreq_trace_partial_read, "PARTR") \ @@ -142,8 +153,8 @@ #define netfs_sreq_ref_traces \ EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ - EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ - EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ + EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \ + EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \ EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \ @@ -366,7 +377,7 @@ TRACE_EVENT(netfs_sreq, __entry->slot = sreq->io_iter.folioq_slot; ), - TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d", + TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->what, netfs_sreq_traces), diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index f333a0ac4ee4..53b5a8c002b1 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -147,6 +147,9 @@ #define SO_PASSRIGHTS 83 +#define SO_INQ 84 +#define SCM_INQ SO_INQ + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 707c1844010c..9e9afdd1238a 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2314,7 +2314,7 @@ enum { IPV6_USER_FLOW = 0x0e, /* spec only (usr_ip6_spec; nfc only) */ IPV4_FLOW = 0x10, /* hash only */ IPV6_FLOW = 0x11, /* hash only */ - ETHER_FLOW = 0x12, /* spec only (ether_spec) */ + ETHER_FLOW = 0x12, /* hash or spec (ether_spec) */ /* Used for GTP-U IPv4 and IPv6. * The format of GTP packets only includes @@ -2371,7 +2371,7 @@ enum { /* Flag to enable RSS spreading of traffic matching rule (nfc only) */ #define FLOW_RSS 0x20000000 -/* L3-L4 network traffic flow hash options */ +/* L2-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) #define RXH_VLAN (1 << 2) #define RXH_L3_PROTO (1 << 3) diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 8f30ffa1cd14..96027e26ffba 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -679,6 +679,39 @@ enum { }; enum { + ETHTOOL_A_FLOW_ETHER = 1, + ETHTOOL_A_FLOW_IP4, + ETHTOOL_A_FLOW_IP6, + ETHTOOL_A_FLOW_TCP4, + ETHTOOL_A_FLOW_TCP6, + ETHTOOL_A_FLOW_UDP4, + ETHTOOL_A_FLOW_UDP6, + ETHTOOL_A_FLOW_SCTP4, + ETHTOOL_A_FLOW_SCTP6, + ETHTOOL_A_FLOW_AH4, + ETHTOOL_A_FLOW_AH6, + ETHTOOL_A_FLOW_ESP4, + ETHTOOL_A_FLOW_ESP6, + ETHTOOL_A_FLOW_AH_ESP4, + ETHTOOL_A_FLOW_AH_ESP6, + ETHTOOL_A_FLOW_GTPU4, + ETHTOOL_A_FLOW_GTPU6, + ETHTOOL_A_FLOW_GTPC4, + ETHTOOL_A_FLOW_GTPC6, + ETHTOOL_A_FLOW_GTPC_TEID4, + ETHTOOL_A_FLOW_GTPC_TEID6, + ETHTOOL_A_FLOW_GTPU_EH4, + ETHTOOL_A_FLOW_GTPU_EH6, + ETHTOOL_A_FLOW_GTPU_UL4, + ETHTOOL_A_FLOW_GTPU_UL6, + ETHTOOL_A_FLOW_GTPU_DL4, + ETHTOOL_A_FLOW_GTPU_DL6, + + __ETHTOOL_A_FLOW_CNT, + ETHTOOL_A_FLOW_MAX = (__ETHTOOL_A_FLOW_CNT - 1) +}; + +enum { ETHTOOL_A_RSS_UNSPEC, ETHTOOL_A_RSS_HEADER, ETHTOOL_A_RSS_CONTEXT, @@ -687,6 +720,7 @@ enum { ETHTOOL_A_RSS_HKEY, ETHTOOL_A_RSS_INPUT_XFRM, ETHTOOL_A_RSS_START_CONTEXT, + ETHTOOL_A_RSS_FLOW_HASH, __ETHTOOL_A_RSS_CNT, ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1) diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h index 3d7ea58778c9..662e7de46c54 100644 --- a/include/uapi/linux/handshake.h +++ b/include/uapi/linux/handshake.h @@ -45,6 +45,7 @@ enum { HANDSHAKE_A_ACCEPT_PEER_IDENTITY, HANDSHAKE_A_ACCEPT_CERTIFICATE, HANDSHAKE_A_ACCEPT_PEERNAME, + HANDSHAKE_A_ACCEPT_KEYRING, __HANDSHAKE_A_ACCEPT_MAX, HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 873c285996fe..784ace3a519c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1535,6 +1535,7 @@ enum { IFLA_BOND_MISSED_MAX, IFLA_BOND_NS_IP6_TARGET, IFLA_BOND_COUPLED_CONTROL, + IFLA_BOND_BROADCAST_NEIGH, __IFLA_BOND_MAX, }; diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 287cdc81c939..79d53c7a1ebd 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -93,6 +93,15 @@ #define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */ #define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */ +/* I can handle TSO/USO for UDP tunneled packets */ +#define TUN_F_UDP_TUNNEL_GSO 0x080 + +/* + * I can handle TSO/USO for UDP tunneled packets requiring csum offload for + * the outer header + */ +#define TUN_F_UDP_TUNNEL_GSO_CSUM 0x100 + /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ #define TUN_PKT_STRIP 0x0001 struct tun_pi { diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 44f2bb93e7e6..23a062781468 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -79,6 +79,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 #define XDP_OPTIONS 8 +#define XDP_MAX_TX_SKB_BUDGET 9 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37891580d05d..7a4c35ff03fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -467,6 +467,10 @@ struct kvm_run { __u64 leaf; __u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; }; } tdx; /* Fix the size of the union. */ diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index e1db65df9359..19ad12a0cd4b 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -37,6 +37,14 @@ struct sockaddr_mctp_ext { __u8 smctp_haddr[MAX_ADDR_LEN]; }; +/* A "fully qualified" MCTP address, which includes the system-local network ID, + * required to uniquely resolve a routable EID. + */ +struct mctp_fq_addr { + unsigned int net; + mctp_eid_t eid; +}; + #define MCTP_NET_ANY 0x0 #define MCTP_ADDR_NULL 0x00 diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 3a701bd1f31b..3092c2c6f1d2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -186,6 +186,11 @@ enum ovs_packet_cmd { * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment * size. * @OVS_PACKET_ATTR_HASH: Packet hash info (e.g. hash, sw_hash and l4_hash in skb). + * @OVS_PACKET_ATTR_UPCALL_PID: Netlink PID to use for upcalls while + * processing %OVS_PACKET_CMD_EXECUTE. Takes precedence over all other ways + * to determine the Netlink PID including %OVS_USERSPACE_ATTR_PID, + * %OVS_DP_ATTR_UPCALL_PID, %OVS_DP_ATTR_PER_CPU_PIDS and the + * %OVS_VPORT_ATTR_UPCALL_PID. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -205,6 +210,7 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */ OVS_PACKET_ATTR_HASH, /* Packet hash. */ + OVS_PACKET_ATTR_UPCALL_PID, /* u32 Netlink PID. */ __OVS_PACKET_ATTR_MAX }; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 77d9d6af46da..c9751bdfd937 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -135,8 +135,28 @@ #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) /* - * zero copy requires 4k block size, and can remap ublk driver's io - * request into ublksrv's vm space + * ublk server can register data buffers for incoming I/O requests with a sparse + * io_uring buffer table. The request buffer can then be used as the data buffer + * for io_uring operations via the fixed buffer index. + * Note that the ublk server can never directly access the request data memory. + * + * To use this feature, the ublk server must first register a sparse buffer + * table on an io_uring instance. + * When an incoming ublk request is received, the ublk server submits a + * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The + * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register + * and addr is the index in the io_uring's buffer table to install the buffer. + * SQEs can now be submitted to the io_uring to read/write the request's buffer + * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or + * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index. + * Once the last io_uring operation using the request's buffer has completed, + * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag, + * and addr again specifying the request buffer to unregister. + * The ublk request is completed when its buffer is unregistered from all + * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ. + * + * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak + * uninitialized kernel memory by not reading into the full request buffer. */ #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) @@ -450,10 +470,10 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( __u64 sqe_addr) { struct ublk_auto_buf_reg reg = { - .index = sqe_addr & 0xffff, - .flags = (sqe_addr >> 16) & 0xff, - .reserved0 = (sqe_addr >> 24) & 0xff, - .reserved1 = sqe_addr >> 32, + .index = (__u16)sqe_addr, + .flags = (__u8)(sqe_addr >> 16), + .reserved0 = (__u8)(sqe_addr >> 24), + .reserved1 = (__u32)(sqe_addr >> 32), }; return reg; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index d4b3e2ae1314..d6ad01fbb8d2 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -235,4 +235,11 @@ */ #define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) + +/* Extended features manipulation */ +#define VHOST_GET_FEATURES_ARRAY _IOR(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) +#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) + #endif diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index d7656908f730..1c39cc5f5a31 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -110,6 +110,11 @@ struct vhost_msg_v2 { }; }; +struct vhost_features_array { + __u64 count; /* number of entries present in features array */ + __u64 features[] __counted_by(count); +}; + struct vhost_memory_region { __u64 guest_phys_addr; __u64 memory_size; /* bytes */ diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 963540deae66..8bf27ab8bcb4 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -70,6 +70,28 @@ * with the same MAC. */ #define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */ +#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO 65 /* Driver can receive + * GSO-over-UDP-tunnel packets + */ +#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM 66 /* Driver handles + * GSO-over-UDP-tunnel + * packets with partial csum + * for the outer header + */ +#define VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO 67 /* Device can receive + * GSO-over-UDP-tunnel packets + */ +#define VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM 68 /* Device handles + * GSO-over-UDP-tunnel + * packets with partial csum + * for the outer header + */ + +/* Offloads bits corresponding to VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO{,_CSUM} + * features + */ +#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED 46 +#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED 47 #ifndef VIRTIO_NET_NO_LEGACY #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ @@ -131,12 +153,17 @@ struct virtio_net_hdr_v1 { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc info in csum_ fields */ +#define VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM 8 /* UDP tunnel csum offload */ __u8 flags; #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ #define VIRTIO_NET_HDR_GSO_UDP_L4 5 /* GSO frame, IPv4& IPv6 UDP (USO) */ +#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 0x20 /* UDPv4 tunnel present */ +#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6 0x40 /* UDPv6 tunnel present */ +#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 | \ + VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6) #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ __u8 gso_type; __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ @@ -181,6 +208,12 @@ struct virtio_net_hdr_v1_hash { __le16 padding; }; +struct virtio_net_hdr_v1_hash_tunnel { + struct virtio_net_hdr_v1_hash hash_hdr; + __le16 outer_th_offset; + __le16 inner_nh_offset; +}; + #ifndef VIRTIO_NET_NO_LEGACY /* This header comes first in the scatter-gather list. * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must diff --git a/init/Kconfig b/init/Kconfig index af4c2f085455..666783eb50ab 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1716,9 +1716,13 @@ config FUTEX_PI depends on FUTEX && RT_MUTEXES default y +# +# marked broken for performance reasons; gives us one more cycle to sort things out. +# config FUTEX_PRIVATE_HASH bool depends on FUTEX && !BASE_SMALL && MMU + depends on BROKEN default y config FUTEX_MPOL diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5111ec040c53..73648d26a622 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1666,11 +1666,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags) io_req_flags_t io_file_get_flags(struct file *file) { + struct inode *inode = file_inode(file); io_req_flags_t res = 0; BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1); - if (S_ISREG(file_inode(file)->i_mode)) + if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE)) res |= REQ_F_ISREG; if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) res |= REQ_F_SUPPORT_NOWAIT; diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index ce95e3af44a9..f2d2cc319faa 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -271,6 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, if (len > arg->max_len) { len = arg->max_len; if (!(bl->flags & IOBL_INC)) { + arg->partial_map = 1; if (iov != arg->iovs) break; buf->len = len; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 5d83c7adc739..723d0361898e 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -58,7 +58,8 @@ struct buf_sel_arg { size_t max_len; unsigned short nr_iovs; unsigned short mode; - unsigned buf_group; + unsigned short buf_group; + unsigned short partial_map; }; void __user *io_buffer_select(struct io_kiocb *req, size_t *len, diff --git a/io_uring/net.c b/io_uring/net.c index 9550d4c8f866..43a43522f406 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -75,12 +75,17 @@ struct io_sr_msg { u16 flags; /* initialised and used only by !msg send variants */ u16 buf_group; - bool retry; + unsigned short retry_flags; void __user *msg_control; /* used only for send zerocopy */ struct io_kiocb *notif; }; +enum sr_retry_flags { + IO_SR_MSG_RETRY = 1, + IO_SR_MSG_PARTIAL_MAP = 2, +}; + /* * Number of times we'll try and do receives if there's more data. If we * exceed this limit, then add us to the back of the queue and retry from @@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req, req->flags &= ~REQ_F_BL_EMPTY; sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; sr->len = 0; /* get from the provided buffer */ } @@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~SENDMSG_FLAGS) @@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; @@ -823,7 +828,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), issue_flags); - if (sr->retry) + if (sr->retry_flags & IO_SR_MSG_RETRY) cflags = req->cqe.flags | (cflags & CQE_F_MASK); /* bundle with no more immediate buffers, we're done */ if (req->flags & REQ_F_BL_EMPTY) @@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, * If more is available AND it was a full transfer, retry and * append to this one */ - if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 && + if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 && !iov_iter_count(&kmsg->msg.msg_iter)) { req->cqe.flags = cflags & ~CQE_F_MASK; sr->len = kmsg->msg.msg_inq; sr->done_io += this_ret; - sr->retry = true; + sr->retry_flags |= IO_SR_MSG_RETRY; return false; } } else { @@ -1077,6 +1082,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg if (unlikely(ret < 0)) return ret; + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { + kmsg->vec.nr = ret; + kmsg->vec.iovec = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; + } + if (arg.partial_map) + sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP; + /* special case 1 vec, can be a fast path */ if (ret == 1) { sr->buf = arg.iovs[0].iov_base; @@ -1085,11 +1098,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg } iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, arg.out_len); - if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { - kmsg->vec.nr = ret; - kmsg->vec.iovec = arg.iovs; - req->flags |= REQ_F_NEED_CLEANUP; - } } else { void __user *buf; @@ -1275,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) int ret; zc->done_io = 0; - zc->retry = false; + zc->retry_flags = 0; if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) return -EINVAL; diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 6e0882b051f9..6de6229207a8 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -216,6 +216,7 @@ const struct io_issue_def io_issue_defs[] = { }, [IORING_OP_FALLOCATE] = { .needs_file = 1, + .hash_reg_file = 1, .prep = io_fallocate_prep, .issue = io_fallocate, }, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d724602697e7..f2b31fb68992 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -112,8 +112,11 @@ static void io_release_ubuf(void *priv) struct io_mapped_ubuf *imu = priv; unsigned int i; - for (i = 0; i < imu->nr_bvecs; i++) - unpin_user_page(imu->bvec[i].bv_page); + for (i = 0; i < imu->nr_bvecs; i++) { + struct folio *folio = page_folio(imu->bvec[i].bv_page); + + unpin_user_folio(folio, 1); + } } static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx, @@ -731,6 +734,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages, data->nr_pages_mid = folio_nr_pages(folio); data->folio_shift = folio_shift(folio); + data->first_folio_page_idx = folio_page_idx(folio, page_array[0]); /* * Check if pages are contiguous inside a folio, and all folios have @@ -824,7 +828,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (coalesced) imu->folio_shift = data.folio_shift; refcount_set(&imu->refs, 1); - off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1); + + off = (unsigned long)iov->iov_base & ~PAGE_MASK; + if (coalesced) + off += data.first_folio_page_idx << PAGE_SHIFT; + node->buf = imu; ret = 0; @@ -840,8 +848,10 @@ done: if (ret) { if (imu) io_free_imu(ctx, imu); - if (pages) - unpin_user_pages(pages, nr_pages); + if (pages) { + for (i = 0; i < nr_pages; i++) + unpin_user_folio(page_folio(pages[i]), 1); + } io_cache_free(&ctx->node_cache, node); node = ERR_PTR(ret); } @@ -1329,7 +1339,6 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, { unsigned long folio_size = 1 << imu->folio_shift; unsigned long folio_mask = folio_size - 1; - u64 folio_addr = imu->ubuf & ~folio_mask; struct bio_vec *res_bvec = vec->bvec; size_t total_len = 0; unsigned bvec_idx = 0; @@ -1351,8 +1360,13 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) return -EOVERFLOW; - /* by using folio address it also accounts for bvec offset */ - offset = buf_addr - folio_addr; + offset = buf_addr - imu->ubuf; + /* + * Only the first bvec can have non zero bv_offset, account it + * here and work with full folios below. + */ + offset += imu->bvec[0].bv_offset; + src_bvec = imu->bvec + (offset >> imu->folio_shift); offset &= folio_mask; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 0d2138f16322..25e7e998dcfd 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -49,6 +49,7 @@ struct io_imu_folio_data { unsigned int nr_pages_mid; unsigned int folio_shift; unsigned int nr_folios; + unsigned long first_folio_page_idx; }; bool io_rsrc_cache_init(struct io_ring_ctx *ctx); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 797247a34cb7..085eeed8cd50 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -106,8 +106,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq, for_each_sgtable_dma_sg(mem->sgt, sg, i) total_size += sg_dma_len(sg); - if (total_size < off + len) - return -EINVAL; + if (total_size < off + len) { + ret = -EINVAL; + goto err; + } mem->dmabuf_offset = off; mem->size = len; diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index e64ce21f9a80..2ee603a98813 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -134,6 +134,7 @@ config CRASH_DM_CRYPT depends on KEXEC_FILE depends on CRASH_DUMP depends on DM_CRYPT + depends on KEYS help With this option enabled, user space can intereact with /sys/kernel/config/crash_dm_crypt_keys to make the dm crypt keys diff --git a/kernel/events/core.c b/kernel/events/core.c index 1f746469fda5..0db36b2b2448 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -951,8 +951,6 @@ static void perf_cgroup_switch(struct task_struct *task) if (READ_ONCE(cpuctx->cgrp) == NULL) return; - WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); - cgrp = perf_cgroup_from_task(task, NULL); if (READ_ONCE(cpuctx->cgrp) == cgrp) return; @@ -964,6 +962,8 @@ static void perf_cgroup_switch(struct task_struct *task) if (READ_ONCE(cpuctx->cgrp) == NULL) return; + WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + perf_ctx_disable(&cpuctx->ctx, true); ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); @@ -7251,15 +7251,15 @@ static void __perf_pending_disable(struct perf_event *event) * CPU-A CPU-B * * perf_event_disable_inatomic() - * @pending_disable = CPU-A; + * @pending_disable = 1; * irq_work_queue(); * * sched-out - * @pending_disable = -1; + * @pending_disable = 0; * * sched-in * perf_event_disable_inatomic() - * @pending_disable = CPU-B; + * @pending_disable = 1; * irq_work_queue(); // FAILS * * irq_work_run() @@ -11116,7 +11116,7 @@ static int perf_uprobe_event_init(struct perf_event *event) if (event->attr.type != perf_uprobe.type) return -ENOENT; - if (!perfmon_capable()) + if (!capable(CAP_SYS_ADMIN)) return -EACCES; /* diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index d2aef87c7e9f..aa9a759e824f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -441,7 +441,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * store that will be enabled on successful return */ if (!handle->size) { /* A, matches D */ - event->pending_disable = smp_processor_id(); + perf_event_disable_inatomic(handle->event); perf_output_wakeup(handle); WRITE_ONCE(rb->aux_nest, 0); goto err_put; @@ -526,7 +526,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) if (wakeup) { if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) - handle->event->pending_disable = smp_processor_id(); + perf_event_disable_inatomic(handle->event); perf_output_wakeup(handle); } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9c59fa480b0b..3a9a9f240dbc 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1136,6 +1136,7 @@ int kernel_kexec(void) Resume_devices: dpm_resume_end(PMSG_RESTORE); Resume_console: + pm_restore_gfp_mask(); console_resume_all(); thaw_processes(); Restore_console: diff --git a/kernel/module/main.c b/kernel/module/main.c index 413ac6ea3702..c2c08007029d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1573,8 +1573,14 @@ static int apply_relocations(struct module *mod, const struct load_info *info) if (infosec >= info->hdr->e_shnum) continue; - /* Don't bother with non-allocated sections */ - if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC)) + /* + * Don't bother with non-allocated sections. + * An exception is the percpu section, which has separate allocations + * for individual CPUs. We relocate the percpu section in the initial + * ELF template and subsequently copy it to the per-CPU destinations. + */ + if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC) && + (!infosec || infosec != info->index.pcpu)) continue; if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH) @@ -2696,9 +2702,8 @@ static int find_module_sections(struct module *mod, struct load_info *info) static int move_module(struct module *mod, struct load_info *info) { - int i; - enum mod_mem_type t = 0; - int ret = -ENOMEM; + int i, ret; + enum mod_mem_type t = MOD_MEM_NUM_TYPES; bool codetag_section_found = false; for_each_mod_mem_type(type) { @@ -2776,7 +2781,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_err: module_memory_restore_rox(mod); - for (t--; t >= 0; t--) + while (t--) module_memory_free(mod, t); if (codetag_section_found) codetag_free_module_sections(mod); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 519fb09de5e0..9216e3b91d3b 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -423,7 +423,6 @@ int hibernation_snapshot(int platform_mode) } console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend(PMSG_FREEZE); @@ -559,7 +558,6 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); @@ -571,7 +569,6 @@ int hibernation_restore(int platform_mode) BUG_ON(!error); } dpm_resume_end(PMSG_RECOVER); - pm_restore_gfp_mask(); console_resume_all(); pm_restore_console(); return error; diff --git a/kernel/power/power.h b/kernel/power/power.h index cb1d71562002..7ccd709af93f 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -239,11 +239,6 @@ static inline void suspend_test_finish(const char *label) {} /* kernel/power/main.c */ extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down); extern int pm_notifier_call_chain(unsigned long val); -void pm_restrict_gfp_mask(void); -void pm_restore_gfp_mask(void); -#else -static inline void pm_restrict_gfp_mask(void) {} -static inline void pm_restore_gfp_mask(void) {} #endif #ifdef CONFIG_HIGHMEM diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 76b141b9aac0..bb608b68fb30 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -540,6 +540,7 @@ int suspend_devices_and_enter(suspend_state_t state) return error; Recover_platform: + pm_restore_gfp_mask(); platform_recover(state); goto Resume_devices; } @@ -606,9 +607,7 @@ static int enter_state(suspend_state_t state) trace_suspend_resume(TPS("suspend_enter"), state, false); pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]); - pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); - pm_restore_gfp_mask(); Finish: events_check_enabled = false; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8988d38d46a3..ec68fc686bd7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3943,6 +3943,11 @@ static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) if (!scx_allow_ttwu_queue(p)) return false; +#ifdef CONFIG_SMP + if (p->sched_class == &stop_sched_class) + return false; +#endif + /* * Do not complicate things with the async wake_list while the CPU is * in hotplug state. @@ -7663,7 +7668,7 @@ const char *preempt_model_str(void) if (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC)) { seq_buf_printf(&s, "(%s)%s", - preempt_dynamic_mode > 0 ? + preempt_dynamic_mode >= 0 ? preempt_modes[preempt_dynamic_mode] : "undef", brace ? "}" : ""); return seq_buf_str(&s); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index ad45a8fea245..89019a140826 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1504,7 +1504,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 if (dl_entity_is_special(dl_se)) return; - scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec); + scaled_delta_exec = delta_exec; + if (!dl_server(dl_se)) + scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec); dl_se->runtime -= scaled_delta_exec; @@ -1611,7 +1613,7 @@ throttle: */ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) { - s64 delta_exec, scaled_delta_exec; + s64 delta_exec; if (!rq->fair_server.dl_defer) return; @@ -1624,9 +1626,7 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) if (delta_exec < 0) return; - scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec); - - rq->fair_server.runtime -= scaled_delta_exec; + rq->fair_server.runtime -= delta_exec; if (rq->fair_server.runtime < 0) { rq->fair_server.dl_defer_running = 0; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 5d2d0562115b..3fe6b0c99f3d 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -82,18 +82,15 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done) } static void __cpu_stop_queue_work(struct cpu_stopper *stopper, - struct cpu_stop_work *work, - struct wake_q_head *wakeq) + struct cpu_stop_work *work) { list_add_tail(&work->list, &stopper->works); - wake_q_add(wakeq, stopper->thread); } /* queue @work to @stopper. if offline, @work is completed immediately */ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); - DEFINE_WAKE_Q(wakeq); unsigned long flags; bool enabled; @@ -101,12 +98,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) raw_spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) - __cpu_stop_queue_work(stopper, work, &wakeq); + __cpu_stop_queue_work(stopper, work); else if (work->done) cpu_stop_signal_done(work->done); raw_spin_unlock_irqrestore(&stopper->lock, flags); - wake_up_q(&wakeq); + if (enabled) + wake_up_process(stopper->thread); preempt_enable(); return enabled; @@ -264,7 +262,6 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, { struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); - DEFINE_WAKE_Q(wakeq); int err; retry: @@ -300,8 +297,8 @@ retry: } err = 0; - __cpu_stop_queue_work(stopper1, work1, &wakeq); - __cpu_stop_queue_work(stopper2, work2, &wakeq); + __cpu_stop_queue_work(stopper1, work1); + __cpu_stop_queue_work(stopper2, work2); unlock: raw_spin_unlock(&stopper2->lock); @@ -316,7 +313,10 @@ unlock: goto retry; } - wake_up_q(&wakeq); + if (!err) { + wake_up_process(stopper1->thread); + wake_up_process(stopper2->thread); + } preempt_enable(); return err; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a009c91f7b05..572e3bd0cc94 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1573,6 +1573,39 @@ void ktime_get_raw_ts64(struct timespec64 *ts) } EXPORT_SYMBOL(ktime_get_raw_ts64); +/** + * ktime_get_clock_ts64 - Returns time of a clock in a timespec + * @id: POSIX clock ID of the clock to read + * @ts: Pointer to the timespec64 to be set + * + * The timestamp is invalidated (@ts->sec is set to -1) if the + * clock @id is not available. + */ +void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts) +{ + /* Invalidate time stamp */ + ts->tv_sec = -1; + ts->tv_nsec = 0; + + switch (id) { + case CLOCK_REALTIME: + ktime_get_real_ts64(ts); + return; + case CLOCK_MONOTONIC: + ktime_get_ts64(ts); + return; + case CLOCK_MONOTONIC_RAW: + ktime_get_raw_ts64(ts); + return; + case CLOCK_AUX ... CLOCK_AUX_LAST: + if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) + ktime_get_aux_ts64(id, ts); + return; + default: + WARN_ON_ONCE(1); + } +} +EXPORT_SYMBOL_GPL(ktime_get_clock_ts64); /** * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 08141f105c95..3885aadc434d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1436,13 +1436,6 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, INIT_LIST_HEAD(&head->list); - item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) - goto free_now; - - item->filter = filter; - list_add_tail(&item->list, &head->list); - list_for_each_entry(file, &tr->events, list) { if (file->system != dir) continue; @@ -1454,6 +1447,13 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, event_clear_filter(file); } + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) + goto free_now; + + item->filter = filter; + list_add_tail(&item->list, &head->list); + delay_free_filter(head); return; free_now: diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index d48b80f3f007..3a74d63a959e 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -10,6 +10,7 @@ #include <linux/seq_buf.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> +#include <linux/kmemleak.h> #define ALLOCINFO_FILE_NAME "allocinfo" #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) @@ -632,8 +633,13 @@ static int load_module(struct module *mod, struct codetag *start, struct codetag mod->name); return -ENOMEM; } - } + /* + * Avoid a kmemleak false positive. The pointer to the counters is stored + * in the alloc_tag section of the module and cannot be directly accessed. + */ + kmemleak_ignore_percpu(tag->counters); + } return 0; } diff --git a/lib/group_cpus.c b/lib/group_cpus.c index ee272c4cefcc..18d43a406114 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -352,6 +352,9 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) int ret = -ENOMEM; struct cpumask *masks = NULL; + if (numgrps == 0) + return NULL; + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; @@ -426,8 +429,12 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) #else /* CONFIG_SMP */ struct cpumask *group_cpus_evenly(unsigned int numgrps) { - struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + struct cpumask *masks; + if (numgrps == 0) + return NULL; + + masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); if (!masks) return NULL; diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index f0887344b274..7d82efa5b14f 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -26,9 +26,9 @@ static int rvv_has_vector(void) static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -36,8 +36,9 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ @@ -99,7 +100,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -108,8 +109,9 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ @@ -195,9 +197,9 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -205,8 +207,9 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -287,7 +290,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -296,8 +299,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -413,9 +417,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -423,8 +427,9 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -539,7 +544,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -548,8 +553,9 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -721,9 +727,9 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -731,8 +737,9 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -915,7 +922,7 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -924,8 +931,9 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* diff --git a/lib/test_objagg.c b/lib/test_objagg.c index a67b8ef5c5be..ce5c4c36a084 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -899,8 +899,10 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints, int err; stats = objagg_hints_stats_get(objagg_hints); - if (IS_ERR(stats)) + if (IS_ERR(stats)) { + *errmsg = "objagg_hints_stats_get() failed."; return PTR_ERR(stats); + } err = __check_expect_stats(stats, expect_stats, errmsg); objagg_stats_put(stats); return err; diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 0f6c9e1fec0b..30ae7518ffbf 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -472,6 +472,7 @@ static ssize_t memcg_path_store(struct kobject *kobj, return -ENOMEM; strscpy(path, buf, count + 1); + kfree(filter->memcg_path); filter->memcg_path = path; return count; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8746ed2fec13..9dc95eac558c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2787,20 +2787,24 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, /* * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve * the old one - * @h: struct hstate old page belongs to * @old_folio: Old folio to dissolve * @list: List to isolate the page in case we need to * Returns 0 on success, otherwise negated error. */ -static int alloc_and_dissolve_hugetlb_folio(struct hstate *h, - struct folio *old_folio, struct list_head *list) +static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio, + struct list_head *list) { - gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; + gfp_t gfp_mask; + struct hstate *h; int nid = folio_nid(old_folio); struct folio *new_folio = NULL; int ret = 0; retry: + /* + * The old_folio might have been dissolved from under our feet, so make sure + * to carefully check the state under the lock. + */ spin_lock_irq(&hugetlb_lock); if (!folio_test_hugetlb(old_folio)) { /* @@ -2829,8 +2833,10 @@ retry: cond_resched(); goto retry; } else { + h = folio_hstate(old_folio); if (!new_folio) { spin_unlock_irq(&hugetlb_lock); + gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL); if (!new_folio) @@ -2874,35 +2880,24 @@ free_new: int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) { - struct hstate *h; int ret = -EBUSY; - /* - * The page might have been dissolved from under our feet, so make sure - * to carefully check the state under the lock. - * Return success when racing as if we dissolved the page ourselves. - */ - spin_lock_irq(&hugetlb_lock); - if (folio_test_hugetlb(folio)) { - h = folio_hstate(folio); - } else { - spin_unlock_irq(&hugetlb_lock); + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!folio_test_hugetlb(folio)) return 0; - } - spin_unlock_irq(&hugetlb_lock); /* * Fence off gigantic pages as there is a cyclic dependency between * alloc_contig_range and them. Return -ENOMEM as this has the effect * of bailing out right away without further retrying. */ - if (hstate_is_gigantic(h)) + if (folio_order(folio) > MAX_PAGE_ORDER) return -ENOMEM; if (folio_ref_count(folio) && folio_isolate_hugetlb(folio, list)) ret = 0; else if (!folio_ref_count(folio)) - ret = alloc_and_dissolve_hugetlb_folio(h, folio, list); + ret = alloc_and_dissolve_hugetlb_folio(folio, list); return ret; } @@ -2916,7 +2911,6 @@ int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) */ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) { - struct hstate *h; struct folio *folio; int ret = 0; @@ -2925,23 +2919,9 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) while (start_pfn < end_pfn) { folio = pfn_folio(start_pfn); - /* - * The folio might have been dissolved from under our feet, so make sure - * to carefully check the state under the lock. - */ - spin_lock_irq(&hugetlb_lock); - if (folio_test_hugetlb(folio)) { - h = folio_hstate(folio); - } else { - spin_unlock_irq(&hugetlb_lock); - start_pfn++; - continue; - } - spin_unlock_irq(&hugetlb_lock); - - if (!folio_ref_count(folio)) { - ret = alloc_and_dissolve_hugetlb_folio(h, folio, - &isolate_list); + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (folio_test_hugetlb(folio) && !folio_ref_count(folio)) { + ret = alloc_and_dissolve_hugetlb_folio(folio, &isolate_list); if (ret) break; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index da9cee34ee1b..8d588e685311 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1247,6 +1247,20 @@ void __ref kmemleak_transient_leak(const void *ptr) EXPORT_SYMBOL(kmemleak_transient_leak); /** + * kmemleak_ignore_percpu - similar to kmemleak_ignore but taking a percpu + * address argument + * @ptr: percpu address of the object + */ +void __ref kmemleak_ignore_percpu(const void __percpu *ptr) +{ + pr_debug("%s(0x%px)\n", __func__, ptr); + + if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr)) + make_black_object((unsigned long)ptr, OBJECT_PERCPU); +} +EXPORT_SYMBOL_GPL(kmemleak_ignore_percpu); + +/** * kmemleak_ignore - ignore an allocated object * @ptr: pointer to beginning of the object * diff --git a/mm/secretmem.c b/mm/secretmem.c index 589b26c2d553..9a11a38a6770 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -195,18 +195,11 @@ static struct file *secretmem_file_create(unsigned long flags) struct file *file; struct inode *inode; const char *anon_name = "[secretmem]"; - int err; - inode = alloc_anon_inode(secretmem_mnt->mnt_sb); + inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL); if (IS_ERR(inode)) return ERR_CAST(inode); - err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL); - if (err) { - file = ERR_PTR(err); - goto err_free_inode; - } - file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", O_RDWR, &secretmem_fops); if (IS_ERR(file)) diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c index c40b98f7743c..868d28583c0a 100644 --- a/net/6lowpan/ndisc.c +++ b/net/6lowpan/ndisc.c @@ -20,9 +20,8 @@ static int lowpan_ndisc_parse_802154_options(const struct net_device *dev, switch (nd_opt->nd_opt_len) { case NDISC_802154_SHORT_ADDR_LENGTH: if (ndopts->nd_802154_opt_array[nd_opt->nd_opt_type]) - ND_PRINTK(2, warn, - "%s: duplicated short addr ND6 option found: type=%d\n", - __func__, nd_opt->nd_opt_type); + net_dbg_ratelimited("%s: duplicated short addr ND6 option found: type=%d\n", + __func__, nd_opt->nd_opt_type); else ndopts->nd_802154_opt_array[nd_opt->nd_opt_type] = nd_opt; return 1; @@ -63,8 +62,7 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags, lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_src_lladdr, IEEE802154_SHORT_ADDR_LEN, 0); if (!lladdr_short) { - ND_PRINTK(2, warn, - "NA: invalid short link-layer address length\n"); + net_dbg_ratelimited("NA: invalid short link-layer address length\n"); return; } } @@ -75,8 +73,7 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags, lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_tgt_lladdr, IEEE802154_SHORT_ADDR_LEN, 0); if (!lladdr_short) { - ND_PRINTK(2, warn, - "NA: invalid short link-layer address length\n"); + net_dbg_ratelimited("NA: invalid short link-layer address length\n"); return; } } @@ -209,9 +206,8 @@ static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net, sllao, tokenized, valid_lft, prefered_lft); if (err) - ND_PRINTK(2, warn, - "RA: could not add a short address based address for prefix: %pI6c\n", - &pinfo->prefix); + net_dbg_ratelimited("RA: could not add a short address based address for prefix: %pI6c\n", + &pinfo->prefix); } } #endif diff --git a/net/atm/clip.c b/net/atm/clip.c index b234dc3bcb0d..f7a5565e794e 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -45,7 +45,8 @@ #include <net/atmclip.h> static struct net_device *clip_devs; -static struct atm_vcc *atmarpd; +static struct atm_vcc __rcu *atmarpd; +static DEFINE_MUTEX(atmarpd_lock); static struct timer_list idle_timer; static const struct neigh_ops clip_neigh_ops; @@ -53,24 +54,35 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) { struct sock *sk; struct atmarp_ctrl *ctrl; + struct atm_vcc *vcc; struct sk_buff *skb; + int err = 0; pr_debug("(%d)\n", type); - if (!atmarpd) - return -EUNATCH; + + rcu_read_lock(); + vcc = rcu_dereference(atmarpd); + if (!vcc) { + err = -EUNATCH; + goto unlock; + } skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC); - if (!skb) - return -ENOMEM; + if (!skb) { + err = -ENOMEM; + goto unlock; + } ctrl = skb_put(skb, sizeof(struct atmarp_ctrl)); ctrl->type = type; ctrl->itf_num = itf; ctrl->ip = ip; - atm_force_charge(atmarpd, skb->truesize); + atm_force_charge(vcc, skb->truesize); - sk = sk_atm(atmarpd); + sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); - return 0; +unlock: + rcu_read_unlock(); + return err; } static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) @@ -417,6 +429,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) if (!vcc->push) return -EBADFD; + if (vcc->user_back) + return -EINVAL; clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL); if (!clip_vcc) return -ENOMEM; @@ -607,17 +621,27 @@ static void atmarpd_close(struct atm_vcc *vcc) { pr_debug("\n"); - rtnl_lock(); - atmarpd = NULL; + mutex_lock(&atmarpd_lock); + RCU_INIT_POINTER(atmarpd, NULL); + mutex_unlock(&atmarpd_lock); + + synchronize_rcu(); skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); - rtnl_unlock(); pr_debug("(done)\n"); module_put(THIS_MODULE); } +static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + atm_return_tx(vcc, skb); + dev_kfree_skb_any(skb); + return 0; +} + static const struct atmdev_ops atmarpd_dev_ops = { - .close = atmarpd_close + .close = atmarpd_close, + .send = atmarpd_send }; @@ -631,15 +655,18 @@ static struct atm_dev atmarpd_dev = { static int atm_init_atmarp(struct atm_vcc *vcc) { - rtnl_lock(); + if (vcc->push == clip_push) + return -EINVAL; + + mutex_lock(&atmarpd_lock); if (atmarpd) { - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return -EADDRINUSE; } mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - atmarpd = vcc; + rcu_assign_pointer(atmarpd, vcc); set_bit(ATM_VF_META, &vcc->flags); set_bit(ATM_VF_READY, &vcc->flags); /* allow replies and avoid getting closed if signaling dies */ @@ -648,13 +675,14 @@ static int atm_init_atmarp(struct atm_vcc *vcc) vcc->push = NULL; vcc->pop = NULL; /* crash */ vcc->push_oam = NULL; /* crash */ - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return 0; } static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct atm_vcc *vcc = ATM_SD(sock); + struct sock *sk = sock->sk; int err = 0; switch (cmd) { @@ -675,14 +703,18 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = clip_create(arg); break; case ATMARPD_CTRL: + lock_sock(sk); err = atm_init_atmarp(vcc); if (!err) { sock->state = SS_CONNECTED; __module_get(THIS_MODULE); } + release_sock(sk); break; case ATMARP_MKIP: + lock_sock(sk); err = clip_mkip(vcc, arg); + release_sock(sk); break; case ATMARP_SETENTRY: err = clip_setentry(vcc, (__force __be32)arg); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 66052d6aaa1d..992131f88a45 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2150,40 +2150,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_le_set_ext_adv_params *rp = data; - struct hci_cp_le_set_ext_adv_params *cp; - struct adv_info *adv_instance; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS); - if (!cp) - return rp->status; - - hci_dev_lock(hdev); - hdev->adv_addr_type = cp->own_addr_type; - if (!cp->handle) { - /* Store in hdev for instance 0 */ - hdev->adv_tx_power = rp->tx_power; - } else { - adv_instance = hci_find_adv_instance(hdev, cp->handle); - if (adv_instance) - adv_instance->tx_power = rp->tx_power; - } - /* Update adv data as tx power is known now */ - hci_update_adv_data(hdev, cp->handle); - - hci_dev_unlock(hdev); - - return rp->status; -} - static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -4164,8 +4130,6 @@ static const struct hci_cc { HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS, hci_cc_le_read_num_adv_sets, sizeof(struct hci_rp_le_read_num_supported_adv_sets)), - HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param, - sizeof(struct hci_rp_le_set_ext_adv_params)), HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE, hci_cc_le_set_ext_adv_enable), HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR, @@ -7002,7 +6966,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu); if (!ev->status) { + bis->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_SYNC, &bis->flags); + hci_debugfs_create_conn(bis); + hci_conn_add_sysfs(bis); hci_iso_setup_path(bis); } } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 6687f2a4d1eb..5f178db8d40d 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } +static int +hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv, + const struct hci_cp_le_set_ext_adv_params *cp, + struct hci_rp_le_set_ext_adv_params *rp) +{ + struct sk_buff *skb; + + skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp), + cp, HCI_CMD_TIMEOUT); + + /* If command return a status event, skb will be set to -ENODATA */ + if (skb == ERR_PTR(-ENODATA)) + return 0; + + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", + HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb)); + return PTR_ERR(skb); + } + + if (skb->len != sizeof(*rp)) { + bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u", + HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len); + kfree_skb(skb); + return -EIO; + } + + memcpy(rp, skb->data, sizeof(*rp)); + kfree_skb(skb); + + if (!rp->status) { + hdev->adv_addr_type = cp->own_addr_type; + if (!cp->handle) { + /* Store in hdev for instance 0 */ + hdev->adv_tx_power = rp->tx_power; + } else if (adv) { + adv->tx_power = rp->tx_power; + } + } + + return rp->status; +} + +static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); + u8 len; + struct adv_info *adv = NULL; + int err; + + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv || !adv->adv_data_changed) + return 0; + } + + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); + + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; + + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, + struct_size(pdu, data, len), pdu, + HCI_CMD_TIMEOUT); + if (err) + return err; + + /* Update data if the command succeed */ + if (adv) { + adv->adv_data_changed = false; + } else { + memcpy(hdev->adv_data, pdu->data, len); + hdev->adv_data_len = len; + } + + return 0; +} + +static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + struct hci_cp_le_set_adv_data cp; + u8 len; + + memset(&cp, 0, sizeof(cp)); + + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); + + /* There's nothing to do if the data hasn't changed */ + if (hdev->adv_data_len == len && + memcmp(cp.data, hdev->adv_data, len) == 0) + return 0; + + memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); + hdev->adv_data_len = len; + + cp.length = len; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return 0; + + if (ext_adv_capable(hdev)) + return hci_set_ext_adv_data_sync(hdev, instance); + + return hci_set_adv_data_sync(hdev, instance); +} + int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; bool connectable; u32 flags; bdaddr_t random_addr; @@ -1228,7 +1345,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) * Command Disallowed error, so we must first disable the * instance if it is active. */ - if (adv && !adv->pending) { + if (adv) { err = hci_disable_ext_adv_instance_sync(hdev, instance); if (err) return err; @@ -1316,8 +1433,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp.secondary_phy = HCI_ADV_PHY_1M; } - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; @@ -1822,79 +1943,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) sizeof(cp), &cp, HCI_CMD_TIMEOUT); } -static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, - HCI_MAX_EXT_AD_LENGTH); - u8 len; - struct adv_info *adv = NULL; - int err; - - if (instance) { - adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->adv_data_changed) - return 0; - } - - len = eir_create_adv_data(hdev, instance, pdu->data, - HCI_MAX_EXT_AD_LENGTH); - - pdu->length = len; - pdu->handle = adv ? adv->handle : instance; - pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; - - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, - struct_size(pdu, data, len), pdu, - HCI_CMD_TIMEOUT); - if (err) - return err; - - /* Update data if the command succeed */ - if (adv) { - adv->adv_data_changed = false; - } else { - memcpy(hdev->adv_data, pdu->data, len); - hdev->adv_data_len = len; - } - - return 0; -} - -static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - struct hci_cp_le_set_adv_data cp; - u8 len; - - memset(&cp, 0, sizeof(cp)); - - len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return 0; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); -} - -int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return 0; - - if (ext_adv_capable(hdev)) - return hci_set_ext_adv_data_sync(hdev, instance); - - return hci_set_adv_data_sync(hdev, instance); -} - int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force) { @@ -1970,13 +2018,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk) static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) { struct adv_info *adv, *n; - int err = 0; if (ext_adv_capable(hdev)) /* Remove all existing sets */ - err = hci_clear_adv_sets_sync(hdev, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_clear_adv_sets_sync(hdev, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2004,13 +2049,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { - int err = 0; + int err; /* If we use extended advertising, instance has to be removed first. */ if (ext_adv_capable(hdev)) - err = hci_remove_ext_adv_instance_sync(hdev, instance, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_remove_ext_adv_instance_sync(hdev, instance, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2109,16 +2152,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type) int hci_disable_advertising_sync(struct hci_dev *hdev) { u8 enable = 0x00; - int err = 0; /* If controller is not advertising we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; if (ext_adv_capable(hdev)) - err = hci_disable_ext_adv_instance_sync(hdev, 0x00); - if (ext_adv_capable(hdev)) - return err; + return hci_disable_ext_adv_instance_sync(hdev, 0x00); return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); @@ -2481,6 +2521,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev) int err; int old_state; + /* If controller is not advertising we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) + return 0; + /* If already been paused there is nothing to do. */ if (hdev->advertising_paused) return 0; @@ -5449,7 +5493,7 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, { struct hci_cp_disconnect cp; - if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { + if (conn->type == BIS_LINK) { /* This is a BIS connection, hci_conn_del will * do the necessary cleanup. */ @@ -6277,6 +6321,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, struct hci_conn *conn) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; int err; bdaddr_t random_addr; u8 own_addr_type; @@ -6318,8 +6363,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, if (err) return err; - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d540f7b4f75f..1485b455ade4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1080,7 +1080,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data) struct mgmt_mesh_tx *mesh_tx; hci_dev_clear_flag(hdev, HCI_MESH_SENDING); - hci_disable_advertising_sync(hdev); + if (list_empty(&hdev->adv_instances)) + hci_disable_advertising_sync(hdev); mesh_tx = mgmt_mesh_next(hdev, NULL); if (mesh_tx) @@ -2153,6 +2154,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) else hci_dev_clear_flag(hdev, HCI_MESH); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); + len -= sizeof(*cp); /* If filters don't fit, forward all adv pkts */ @@ -2167,6 +2171,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_mesh *cp = data; struct mgmt_pending_cmd *cmd; + __u16 period, window; int err = 0; bt_dev_dbg(hdev, "sock %p", sk); @@ -2180,6 +2185,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_INVALID_PARAMS); + /* Keep allowed ranges in sync with set_scan_params() */ + period = __le16_to_cpu(cp->period); + + if (period < 0x0004 || period > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + window = __le16_to_cpu(cp->window); + + if (window < 0x0004 || window > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + if (window > period) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len); @@ -6432,6 +6454,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_NOT_SUPPORTED); + /* Keep allowed ranges in sync with set_mesh() */ interval = __le16_to_cpu(cp->interval); if (interval < 0x0004 || interval > 0x4000) diff --git a/net/core/dev.c b/net/core/dev.c index 96d33dead604..e365b099484e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1267,33 +1267,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * __dev_get_by_flags - find any device with given flags - * @net: the applicable net namespace - * @if_flags: IFF_* values - * @mask: bitmask of bits in if_flags to check + * dev_get_by_flags_rcu - find any device with given flags + * @net: the applicable net namespace + * @if_flags: IFF_* values + * @mask: bitmask of bits in if_flags to check + * + * Search for any interface with the given flags. * - * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. Must be called inside - * rtnl_lock(), and result refcount is unchanged. + * Context: rcu_read_lock() must be held. + * Returns: NULL if a device is not found or a pointer to the device. */ - -struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags, - unsigned short mask) +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, + unsigned short mask) { - struct net_device *dev, *ret; - - ASSERT_RTNL(); + struct net_device *dev; - ret = NULL; - for_each_netdev(net, dev) { - if (((dev->flags ^ if_flags) & mask) == 0) { - ret = dev; - break; + for_each_netdev_rcu(net, dev) { + if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) { + dev_hold(dev); + return dev; } } - return ret; + + return NULL; } -EXPORT_SYMBOL(__dev_get_by_flags); +EXPORT_IPV6_MOD(dev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device @@ -4026,7 +4024,10 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) unsigned int hdr_len; /* mac layer + network layer */ - hdr_len = skb_transport_offset(skb); + if (!skb->encapsulation) + hdr_len = skb_transport_offset(skb); + else + hdr_len = skb_inner_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { @@ -11976,21 +11977,8 @@ static void netdev_rss_contexts_free(struct net_device *dev) mutex_lock(&dev->ethtool->rss_lock); xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { - struct ethtool_rxfh_param rxfh; - - rxfh.indir = ethtool_rxfh_context_indir(ctx); - rxfh.key = ethtool_rxfh_context_key(ctx); - rxfh.hfunc = ctx->hfunc; - rxfh.input_xfrm = ctx->input_xfrm; - rxfh.rss_context = context; - rxfh.rss_delete = true; - xa_erase(&dev->ethtool->rss_ctx, context); - if (dev->ethtool_ops->create_rxfh_context) - dev->ethtool_ops->remove_rxfh_context(dev, ctx, - context, NULL); - else - dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + dev->ethtool_ops->remove_rxfh_context(dev, ctx, context, NULL); kfree(ctx); } xa_destroy(&dev->ethtool->rss_ctx); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c9b969386399..8f897e2c8b4f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1210,12 +1210,21 @@ static int rx_queue_default_mask(struct net_device *dev, struct netdev_rx_queue *queue) { #if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL) - struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask); + struct cpumask *rps_default_mask; + int res = 0; + mutex_lock(&rps_default_mask_mutex); + + rps_default_mask = dev_net(dev)->core.rps_default_mask; if (rps_default_mask && !cpumask_empty(rps_default_mask)) - return netdev_rx_queue_set_rps_mask(queue, rps_default_mask); -#endif + res = netdev_rx_queue_set_rps_mask(queue, rps_default_mask); + + mutex_unlock(&rps_default_mask_mutex); + + return res; +#else return 0; +#endif } static int rx_queue_add_kobject(struct net_device *dev, int index) diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 8a5b04c2699a..e938f25e8e86 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -11,4 +11,6 @@ int netdev_queue_update_kobjects(struct net_device *net, int netdev_change_owner(struct net_device *, const struct net *net_old, const struct net *net_new); +extern struct mutex rps_default_mask_mutex; + #endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 54f9d505895f..a1da97b5b30b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -372,6 +372,31 @@ out: return ret; } +static void netpoll_udp_checksum(struct netpoll *np, struct sk_buff *skb, + int len) +{ + struct udphdr *udph; + int udp_len; + + udp_len = len + sizeof(struct udphdr); + udph = udp_hdr(skb); + + /* check needs to be set, since it will be consumed in csum_partial */ + udph->check = 0; + if (np->ipv6) + udph->check = csum_ipv6_magic(&np->local_ip.in6, + &np->remote_ip.in6, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + else + udph->check = csum_tcpudp_magic(np->local_ip.ip, + np->remote_ip.ip, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; +} + netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { unsigned long flags; @@ -389,24 +414,101 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) } EXPORT_SYMBOL(netpoll_send_skb); +static void push_ipv6(struct netpoll *np, struct sk_buff *skb, int len) +{ + struct ipv6hdr *ip6h; + + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + + /* ip6h->version = 6; ip6h->priority = 0; */ + *(unsigned char *)ip6h = 0x60; + ip6h->flow_lbl[0] = 0; + ip6h->flow_lbl[1] = 0; + ip6h->flow_lbl[2] = 0; + + ip6h->payload_len = htons(sizeof(struct udphdr) + len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 32; + ip6h->saddr = np->local_ip.in6; + ip6h->daddr = np->remote_ip.in6; + + skb->protocol = htons(ETH_P_IPV6); +} + +static void push_ipv4(struct netpoll *np, struct sk_buff *skb, int len) +{ + static atomic_t ip_ident; + struct iphdr *iph; + int ip_len; + + ip_len = len + sizeof(struct udphdr) + sizeof(struct iphdr); + + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + + /* iph->version = 4; iph->ihl = 5; */ + *(unsigned char *)iph = 0x45; + iph->tos = 0; + put_unaligned(htons(ip_len), &iph->tot_len); + iph->id = htons(atomic_inc_return(&ip_ident)); + iph->frag_off = 0; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->check = 0; + put_unaligned(np->local_ip.ip, &iph->saddr); + put_unaligned(np->remote_ip.ip, &iph->daddr); + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + skb->protocol = htons(ETH_P_IP); +} + +static void push_udp(struct netpoll *np, struct sk_buff *skb, int len) +{ + struct udphdr *udph; + int udp_len; + + udp_len = len + sizeof(struct udphdr); + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + udph = udp_hdr(skb); + udph->source = htons(np->local_port); + udph->dest = htons(np->remote_port); + udph->len = htons(udp_len); + + netpoll_udp_checksum(np, skb, len); +} + +static void push_eth(struct netpoll *np, struct sk_buff *skb) +{ + struct ethhdr *eth; + + eth = skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + ether_addr_copy(eth->h_source, np->dev->dev_addr); + ether_addr_copy(eth->h_dest, np->remote_mac); + if (np->ipv6) + eth->h_proto = htons(ETH_P_IPV6); + else + eth->h_proto = htons(ETH_P_IP); +} + int netpoll_send_udp(struct netpoll *np, const char *msg, int len) { int total_len, ip_len, udp_len; struct sk_buff *skb; - struct udphdr *udph; - struct iphdr *iph; - struct ethhdr *eth; - static atomic_t ip_ident; - struct ipv6hdr *ip6h; if (!IS_ENABLED(CONFIG_PREEMPT_RT)) WARN_ON_ONCE(!irqs_disabled()); - udp_len = len + sizeof(*udph); + udp_len = len + sizeof(struct udphdr); if (np->ipv6) - ip_len = udp_len + sizeof(*ip6h); + ip_len = udp_len + sizeof(struct ipv6hdr); else - ip_len = udp_len + sizeof(*iph); + ip_len = udp_len + sizeof(struct iphdr); total_len = ip_len + LL_RESERVED_SPACE(np->dev); @@ -418,74 +520,12 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb_copy_to_linear_data(skb, msg, len); skb_put(skb, len); - skb_push(skb, sizeof(*udph)); - skb_reset_transport_header(skb); - udph = udp_hdr(skb); - udph->source = htons(np->local_port); - udph->dest = htons(np->remote_port); - udph->len = htons(udp_len); - - udph->check = 0; - if (np->ipv6) { - udph->check = csum_ipv6_magic(&np->local_ip.in6, - &np->remote_ip.in6, - udp_len, IPPROTO_UDP, - csum_partial(udph, udp_len, 0)); - if (udph->check == 0) - udph->check = CSUM_MANGLED_0; - - skb_push(skb, sizeof(*ip6h)); - skb_reset_network_header(skb); - ip6h = ipv6_hdr(skb); - - /* ip6h->version = 6; ip6h->priority = 0; */ - *(unsigned char *)ip6h = 0x60; - ip6h->flow_lbl[0] = 0; - ip6h->flow_lbl[1] = 0; - ip6h->flow_lbl[2] = 0; - - ip6h->payload_len = htons(sizeof(struct udphdr) + len); - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = 32; - ip6h->saddr = np->local_ip.in6; - ip6h->daddr = np->remote_ip.in6; - - eth = skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb->protocol = eth->h_proto = htons(ETH_P_IPV6); - } else { - udph->check = csum_tcpudp_magic(np->local_ip.ip, - np->remote_ip.ip, - udp_len, IPPROTO_UDP, - csum_partial(udph, udp_len, 0)); - if (udph->check == 0) - udph->check = CSUM_MANGLED_0; - - skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - iph = ip_hdr(skb); - - /* iph->version = 4; iph->ihl = 5; */ - *(unsigned char *)iph = 0x45; - iph->tos = 0; - put_unaligned(htons(ip_len), &(iph->tot_len)); - iph->id = htons(atomic_inc_return(&ip_ident)); - iph->frag_off = 0; - iph->ttl = 64; - iph->protocol = IPPROTO_UDP; - iph->check = 0; - put_unaligned(np->local_ip.ip, &(iph->saddr)); - put_unaligned(np->remote_ip.ip, &(iph->daddr)); - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - - eth = skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb->protocol = eth->h_proto = htons(ETH_P_IP); - } - - ether_addr_copy(eth->h_source, np->dev->dev_addr); - ether_addr_copy(eth->h_dest, np->remote_mac); - + push_udp(np, skb, len); + if (np->ipv6) + push_ipv6(np, skb, len); + else + push_ipv4(np, skb, len); + push_eth(np, skb); skb->dev = np->dev; return (int)netpoll_send_skb(np, skb); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index ba7cf3e3c32f..05e2e22a8f7c 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -371,7 +371,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); -static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem); +static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem); static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) { @@ -409,7 +409,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) * (2) break out to fallthrough to alloc_pages_node. * This limit stress on page buddy alloactor. */ - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); alloc_stat_inc(pool, waive); netmem = 0; break; @@ -544,8 +544,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, } /* slow path */ -static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, - gfp_t gfp) +static noinline netmem_ref __page_pool_alloc_netmems_slow(struct page_pool *pool, + gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; unsigned int pp_order = pool->p.order; @@ -615,7 +615,7 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) netmem = pool->mp_ops->alloc_netmems(pool, gfp); else - netmem = __page_pool_alloc_pages_slow(pool, gfp); + netmem = __page_pool_alloc_netmems_slow(pool, gfp); return netmem; } EXPORT_SYMBOL(page_pool_alloc_netmems); @@ -673,8 +673,8 @@ void page_pool_clear_pp_info(netmem_ref netmem) netmem_set_pp(netmem, NULL); } -static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, - netmem_ref netmem) +static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool, + netmem_ref netmem) { struct page *old, *page = netmem_to_page(netmem); unsigned long id; @@ -712,7 +712,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, * a regular page (that will eventually be returned to the normal * page-allocator via put_page). */ -void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) +static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem) { int count; bool put; @@ -721,7 +721,7 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) put = pool->mp_ops->release_netmem(pool, netmem); else - __page_pool_release_page_dma(pool, netmem); + __page_pool_release_netmem_dma(pool, netmem); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. @@ -826,7 +826,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem, * will be invoking put_page. */ recycle_stat_inc(pool, released_refcnt); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); return 0; } @@ -869,7 +869,7 @@ void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, if (netmem && !page_pool_recycle_in_ring(pool, netmem)) { /* Cache full, fallback to free pages */ recycle_stat_inc(pool, ring_full); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } EXPORT_SYMBOL(page_pool_put_unrefed_netmem); @@ -912,7 +912,7 @@ static void page_pool_recycle_ring_bulk(struct page_pool *pool, * since put_page() with refcnt == 1 can be an expensive operation. */ for (; i < bulk_len; i++) - page_pool_return_page(pool, bulk[i]); + page_pool_return_netmem(pool, bulk[i]); } /** @@ -995,7 +995,7 @@ static netmem_ref page_pool_drain_frag(struct page_pool *pool, return netmem; } - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); return 0; } @@ -1009,7 +1009,7 @@ static void page_pool_free_frag(struct page_pool *pool) if (!netmem || page_pool_unref_netmem(netmem, drain_count)) return; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, @@ -1076,7 +1076,7 @@ static void page_pool_empty_ring(struct page_pool *pool) pr_crit("%s() page_pool refcnt %d violation\n", __func__, netmem_ref_count(netmem)); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } @@ -1109,7 +1109,7 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) */ while (pool->alloc.count) { netmem = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } @@ -1136,7 +1136,7 @@ static void page_pool_scrub(struct page_pool *pool) } xa_for_each(&pool->dma_mapped, id, ptr) - __page_pool_release_page_dma(pool, page_to_netmem(ptr)); + __page_pool_release_netmem_dma(pool, page_to_netmem((struct page *)ptr)); } /* No more consumers should exist, but producers could still @@ -1253,7 +1253,7 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) /* Flush pool alloc cache, as refill will check NUMA node */ while (pool->alloc.count) { netmem = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } EXPORT_SYMBOL(page_pool_update_nid); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d6420b74ea9c..ee0274417948 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -384,8 +384,7 @@ static inline void __finalize_skb_around(struct sk_buff *skb, void *data, skb_set_kcov_handle(skb, kcov_common_handle()); } -static inline void *__slab_build_skb(struct sk_buff *skb, void *data, - unsigned int *size) +static inline void *__slab_build_skb(void *data, unsigned int *size) { void *resized; @@ -418,7 +417,7 @@ struct sk_buff *slab_build_skb(void *data) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); - data = __slab_build_skb(skb, data, &size); + data = __slab_build_skb(data, &size); __finalize_skb_around(skb, data, size); return skb; @@ -435,7 +434,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data, * using slab buffer should use slab_build_skb() instead. */ if (WARN_ONCE(size == 0, "Use slab_build_skb() instead")) - data = __slab_build_skb(skb, data, &size); + data = __slab_build_skb(data, &size); __finalize_skb_around(skb, data, size); } @@ -3060,10 +3059,8 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd, /* * Fill page/offset/length into spd, if it can hold more pages. */ -static bool spd_fill_page(struct splice_pipe_desc *spd, - struct pipe_inode_info *pipe, struct page *page, - unsigned int *len, unsigned int offset, - bool linear, +static bool spd_fill_page(struct splice_pipe_desc *spd, struct page *page, + unsigned int *len, unsigned int offset, bool linear, struct sock *sk) { if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) @@ -3091,8 +3088,7 @@ static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct splice_pipe_desc *spd, bool linear, - struct sock *sk, - struct pipe_inode_info *pipe) + struct sock *sk) { if (!*len) return true; @@ -3111,8 +3107,7 @@ static bool __splice_segment(struct page *page, unsigned int poff, do { unsigned int flen = min(*len, plen); - if (spd_fill_page(spd, pipe, page, &flen, poff, - linear, sk)) + if (spd_fill_page(spd, page, &flen, poff, linear, sk)) return true; poff += flen; plen -= flen; @@ -3130,8 +3125,8 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, unsigned int *offset, unsigned int *len, struct splice_pipe_desc *spd, struct sock *sk) { - int seg; struct sk_buff *iter; + int seg; /* map the linear part : * If skb->head_frag is set, this 'linear' part is backed by a @@ -3143,7 +3138,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, skb_headlen(skb), offset, len, spd, skb_head_is_locked(skb), - sk, pipe)) + sk)) return true; /* @@ -3160,7 +3155,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(skb_frag_page(f), skb_frag_off(f), skb_frag_size(f), - offset, len, spd, false, sk, pipe)) + offset, len, spd, false, sk)) return true; } @@ -3235,6 +3230,7 @@ typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg); static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len, sendmsg_func sendmsg, int flags) { + int more_hint = sk_is_tcp(sk) ? MSG_MORE : 0; unsigned int orig_len = len; struct sk_buff *head = skb; unsigned short fragidx; @@ -3252,6 +3248,8 @@ do_frag_list: kv.iov_len = slen; memset(&msg, 0, sizeof(msg)); msg.msg_flags = MSG_DONTWAIT | flags; + if (slen < len) + msg.msg_flags |= more_hint; iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen); ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, @@ -3292,6 +3290,8 @@ do_frag_list: flags, }; + if (slen < len) + msg.msg_flags |= more_hint; bvec_set_page(&bvec, skb_frag_page(frag), slen, skb_frag_off(frag) + offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, @@ -6763,8 +6763,7 @@ static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp); /* carve out the first eat bytes from skb's frag_list. May recurse into * pskb_carve() */ -static int pskb_carve_frag_list(struct sk_buff *skb, - struct skb_shared_info *shinfo, int eat, +static int pskb_carve_frag_list(struct skb_shared_info *shinfo, int eat, gfp_t gfp_mask) { struct sk_buff *list = shinfo->frag_list; @@ -6869,7 +6868,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb_clone_fraglist(skb); /* split line is in frag list */ - if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) { + if (k == 0 && pskb_carve_frag_list(shinfo, off - pos, gfp_mask)) { /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ if (skb_has_frag_list(skb)) kfree_skb_list(skb_shinfo(skb)->frag_list); @@ -7234,7 +7233,6 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, * @skb: The buffer to add pages to * @iter: Iterator representing the pages to be added * @maxsize: Maximum amount of pages to be added - * @gfp: Allocation flags * * This is a common helper function for supporting MSG_SPLICE_PAGES. It * extracts pages from an iterator and adds them to the socket buffer if @@ -7245,7 +7243,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, * insufficient space in the buffer to transfer anything. */ ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, - ssize_t maxsize, gfp_t gfp) + ssize_t maxsize) { size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags); struct page *pages[8], **ppages = pages; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5dbb2c6f371d..8cf04b57ade1 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -28,6 +28,7 @@ #include <net/rps.h> #include "dev.h" +#include "net-sysfs.h" static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; @@ -96,50 +97,40 @@ free_buf: #ifdef CONFIG_RPS -static struct cpumask *rps_default_mask_cow_alloc(struct net *net) -{ - struct cpumask *rps_default_mask; - - if (net->core.rps_default_mask) - return net->core.rps_default_mask; - - rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); - if (!rps_default_mask) - return NULL; - - /* pairs with READ_ONCE in rx_queue_default_mask() */ - WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); - return rps_default_mask; -} +DEFINE_MUTEX(rps_default_mask_mutex); static int rps_default_mask_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)table->data; + struct cpumask *mask; int err = 0; - rtnl_lock(); + mutex_lock(&rps_default_mask_mutex); + mask = net->core.rps_default_mask; if (write) { - struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); - + if (!mask) { + mask = kzalloc(cpumask_size(), GFP_KERNEL); + net->core.rps_default_mask = mask; + } err = -ENOMEM; - if (!rps_default_mask) + if (!mask) goto done; - err = cpumask_parse(buffer, rps_default_mask); + err = cpumask_parse(buffer, mask); if (err) goto done; - err = rps_cpumask_housekeeping(rps_default_mask); + err = rps_cpumask_housekeeping(mask); if (err) goto done; } else { err = dump_cpumask(buffer, lenp, ppos, - net->core.rps_default_mask ? : cpu_none_mask); + mask ?: cpu_none_mask); } done: - rtnl_unlock(); + mutex_unlock(&rps_default_mask_mutex); return err; } diff --git a/net/devlink/param.c b/net/devlink/param.c index 396b8a7f6013..41dcc86cfd94 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -97,6 +97,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, + .name = DEVLINK_PARAM_GENERIC_CLOCK_ID_NAME, + .type = DEVLINK_PARAM_GENERIC_CLOCK_ID_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -200,6 +205,11 @@ devlink_nl_param_value_fill_one(struct sk_buff *msg, if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32)) goto value_nest_cancel; break; + case DEVLINK_PARAM_TYPE_U64: + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, + val.vu64)) + goto value_nest_cancel; + break; case DEVLINK_PARAM_TYPE_STRING: if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vstr)) @@ -434,6 +444,11 @@ devlink_param_value_get_from_info(const struct devlink_param *param, return -EINVAL; value->vu32 = nla_get_u32(param_data); break; + case DEVLINK_PARAM_TYPE_U64: + if (nla_len(param_data) != sizeof(u64)) + return -EINVAL; + value->vu64 = nla_get_u64(param_data); + break; case DEVLINK_PARAM_TYPE_STRING: len = strnlen(nla_data(param_data), nla_len(param_data)); if (len == nla_len(param_data) || diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b6d96e562c9a..cccb4694f5e1 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -981,6 +981,7 @@ static int ethtool_rxnfc_copy_to_user(void __user *useraddr, static bool flow_type_hashable(u32 flow_type) { switch (flow_type) { + case ETHER_FLOW: case TCP_V4_FLOW: case UDP_V4_FLOW: case SCTP_V4_FLOW: @@ -1100,7 +1101,11 @@ ethtool_set_rxfh_fields(struct net_device *dev, u32 cmd, void __user *useraddr) rc = ops->set_rxfh_fields(dev, &fields, NULL); exit_unlock: mutex_unlock(&dev->ethtool->rss_lock); - return rc; + if (rc) + return rc; + + ethtool_rss_notify(dev, fields.rss_context); + return 0; } static noinline_for_stack int @@ -1391,8 +1396,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !(ops->cap_rss_ctx_supported || - ops->create_rxfh_context)) + if (rxfh.rss_context && !ops->create_rxfh_context) return -EOPNOTSUPP; rxfh.indir_size = rxfh_dev.indir_size; @@ -1534,8 +1538,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !(ops->cap_rss_ctx_supported || - ops->create_rxfh_context)) + if (rxfh.rss_context && !ops->create_rxfh_context) return -EOPNOTSUPP; /* Check input data transformation capabilities */ if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR && @@ -1634,6 +1637,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } if (create) { + u32 limit, ctx_id; + if (rxfh_dev.rss_delete) { ret = -EINVAL; goto out_unlock; @@ -1644,21 +1649,15 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, goto out_unlock; } - if (ops->create_rxfh_context) { - u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX; - u32 ctx_id; - - /* driver uses new API, core allocates ID */ - ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, - XA_LIMIT(1, limit - 1), - GFP_KERNEL_ACCOUNT); - if (ret < 0) { - kfree(ctx); - goto out_unlock; - } - WARN_ON(!ctx_id); /* can't happen */ - rxfh.rss_context = ctx_id; + limit = ops->rxfh_max_num_contexts ?: U32_MAX; + ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, + XA_LIMIT(1, limit - 1), GFP_KERNEL_ACCOUNT); + if (ret < 0) { + kfree(ctx); + goto out_unlock; } + WARN_ON(!ctx_id); /* can't happen */ + rxfh.rss_context = ctx_id; } else if (rxfh.rss_context) { ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); if (!ctx) { @@ -1670,31 +1669,24 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; - if (rxfh.rss_context && ops->create_rxfh_context) { - if (create) { - ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, - extack); - /* Make sure driver populates defaults */ - WARN_ON_ONCE(!ret && !rxfh_dev.key && - ops->rxfh_per_ctx_key && - !memchr_inv(ethtool_rxfh_context_key(ctx), - 0, ctx->key_size)); - } else if (rxfh_dev.rss_delete) { - ret = ops->remove_rxfh_context(dev, ctx, - rxfh.rss_context, - extack); - } else { - ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, - extack); - } - } else { + if (!rxfh.rss_context) { ret = ops->set_rxfh(dev, &rxfh_dev, extack); + } else if (create) { + ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, extack); + /* Make sure driver populates defaults */ + WARN_ON_ONCE(!ret && !rxfh_dev.key && ops->rxfh_per_ctx_key && + !memchr_inv(ethtool_rxfh_context_key(ctx), 0, + ctx->key_size)); + } else if (rxfh_dev.rss_delete) { + ret = ops->remove_rxfh_context(dev, ctx, rxfh.rss_context, + extack); + } else { + ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, extack); } if (ret) { if (create) { /* failed to create, free our new tracking entry */ - if (ops->create_rxfh_context) - xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); kfree(ctx); } goto out_unlock; @@ -1713,36 +1705,6 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, dev->priv_flags |= IFF_RXFH_CONFIGURED; } /* Update rss_ctx tracking */ - if (create && !ops->create_rxfh_context) { - /* driver uses old API, it chose context ID */ - if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh_dev.rss_context))) { - /* context ID reused, our tracking is screwed */ - kfree(ctx); - goto out_unlock; - } - /* Allocate the exact ID the driver gave us */ - if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh_dev.rss_context, - ctx, GFP_KERNEL))) { - kfree(ctx); - goto out_unlock; - } - - /* Fetch the defaults for the old API, in the new API drivers - * should write defaults into ctx themselves. - */ - rxfh_dev.indir = (u32 *)rss_config; - rxfh_dev.indir_size = dev_indir_size; - - rxfh_dev.key = rss_config + indir_bytes; - rxfh_dev.key_size = dev_key_size; - - ret = ops->get_rxfh(dev, &rxfh_dev); - if (WARN_ON(ret)) { - xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); - kfree(ctx); - goto out_unlock; - } - } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); kfree(ctx); diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index e717f23cbc10..41ab9fc67652 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -12,6 +12,7 @@ struct rss_req_info { struct rss_reply_data { struct ethnl_reply_data base; + bool has_flow_hash; bool no_key_fields; u32 indir_size; u32 hkey_size; @@ -19,6 +20,37 @@ struct rss_reply_data { u32 input_xfrm; u32 *indir_table; u8 *hkey; + int flow_hash[__ETHTOOL_A_FLOW_CNT]; +}; + +static const u8 ethtool_rxfh_ft_nl2ioctl[] = { + [ETHTOOL_A_FLOW_ETHER] = ETHER_FLOW, + [ETHTOOL_A_FLOW_IP4] = IPV4_FLOW, + [ETHTOOL_A_FLOW_IP6] = IPV6_FLOW, + [ETHTOOL_A_FLOW_TCP4] = TCP_V4_FLOW, + [ETHTOOL_A_FLOW_UDP4] = UDP_V4_FLOW, + [ETHTOOL_A_FLOW_SCTP4] = SCTP_V4_FLOW, + [ETHTOOL_A_FLOW_AH_ESP4] = AH_ESP_V4_FLOW, + [ETHTOOL_A_FLOW_TCP6] = TCP_V6_FLOW, + [ETHTOOL_A_FLOW_UDP6] = UDP_V6_FLOW, + [ETHTOOL_A_FLOW_SCTP6] = SCTP_V6_FLOW, + [ETHTOOL_A_FLOW_AH_ESP6] = AH_ESP_V6_FLOW, + [ETHTOOL_A_FLOW_AH4] = AH_V4_FLOW, + [ETHTOOL_A_FLOW_ESP4] = ESP_V4_FLOW, + [ETHTOOL_A_FLOW_AH6] = AH_V6_FLOW, + [ETHTOOL_A_FLOW_ESP6] = ESP_V6_FLOW, + [ETHTOOL_A_FLOW_GTPU4] = GTPU_V4_FLOW, + [ETHTOOL_A_FLOW_GTPU6] = GTPU_V6_FLOW, + [ETHTOOL_A_FLOW_GTPC4] = GTPC_V4_FLOW, + [ETHTOOL_A_FLOW_GTPC6] = GTPC_V6_FLOW, + [ETHTOOL_A_FLOW_GTPC_TEID4] = GTPC_TEID_V4_FLOW, + [ETHTOOL_A_FLOW_GTPC_TEID6] = GTPC_TEID_V6_FLOW, + [ETHTOOL_A_FLOW_GTPU_EH4] = GTPU_EH_V4_FLOW, + [ETHTOOL_A_FLOW_GTPU_EH6] = GTPU_EH_V6_FLOW, + [ETHTOOL_A_FLOW_GTPU_UL4] = GTPU_UL_V4_FLOW, + [ETHTOOL_A_FLOW_GTPU_UL6] = GTPU_UL_V6_FLOW, + [ETHTOOL_A_FLOW_GTPU_DL4] = GTPU_DL_V4_FLOW, + [ETHTOOL_A_FLOW_GTPU_DL6] = GTPU_DL_V6_FLOW, }; #define RSS_REQINFO(__req_base) \ @@ -49,6 +81,37 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, return 0; } +static void +rss_prepare_flow_hash(const struct rss_req_info *req, struct net_device *dev, + struct rss_reply_data *data, const struct genl_info *info) +{ + int i; + + data->has_flow_hash = false; + + if (!dev->ethtool_ops->get_rxfh_fields) + return; + if (req->rss_context && !dev->ethtool_ops->rxfh_per_ctx_fields) + return; + + mutex_lock(&dev->ethtool->rss_lock); + for (i = 1; i < __ETHTOOL_A_FLOW_CNT; i++) { + struct ethtool_rxfh_fields fields = { + .flow_type = ethtool_rxfh_ft_nl2ioctl[i], + .rss_context = req->rss_context, + }; + + if (dev->ethtool_ops->get_rxfh_fields(dev, &fields)) { + data->flow_hash[i] = -1; /* Unsupported */ + continue; + } + + data->flow_hash[i] = fields.data; + data->has_flow_hash = true; + } + mutex_unlock(&dev->ethtool->rss_lock); +} + static int rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, struct rss_reply_data *data, const struct genl_info *info) @@ -64,6 +127,7 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, ret = ethnl_ops_begin(dev); if (ret < 0) return ret; + mutex_lock(&dev->ethtool->rss_lock); data->indir_size = 0; data->hkey_size = 0; @@ -77,7 +141,7 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, rss_config = kzalloc(total_size, GFP_KERNEL); if (!rss_config) { ret = -ENOMEM; - goto out_ops; + goto out_unlock; } if (data->indir_size) @@ -92,11 +156,12 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, ret = ops->get_rxfh(dev, &rxfh); if (ret) - goto out_ops; + goto out_unlock; data->hfunc = rxfh.hfunc; data->input_xfrm = rxfh.input_xfrm; -out_ops: +out_unlock: + mutex_unlock(&dev->ethtool->rss_lock); ethnl_ops_complete(dev); return ret; } @@ -108,12 +173,16 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, struct ethtool_rxfh_context *ctx; u32 total_size, indir_bytes; u8 *rss_config; + int ret; data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + mutex_lock(&dev->ethtool->rss_lock); ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); - if (!ctx) - return -ENOENT; + if (!ctx) { + ret = -ENOENT; + goto out_unlock; + } data->indir_size = ctx->indir_size; data->hkey_size = ctx->key_size; @@ -123,8 +192,10 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, indir_bytes = data->indir_size * sizeof(u32); total_size = indir_bytes + data->hkey_size; rss_config = kzalloc(total_size, GFP_KERNEL); - if (!rss_config) - return -ENOMEM; + if (!rss_config) { + ret = -ENOMEM; + goto out_unlock; + } data->indir_table = (u32 *)rss_config; memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx), indir_bytes); @@ -135,13 +206,18 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, data->hkey_size); } - return 0; + ret = 0; +out_unlock: + mutex_unlock(&dev->ethtool->rss_lock); + return ret; } static int rss_prepare(const struct rss_req_info *request, struct net_device *dev, struct rss_reply_data *data, const struct genl_info *info) { + rss_prepare_flow_hash(request, dev, data, info); + if (request->rss_context) return rss_prepare_ctx(request, dev, data, info); return rss_prepare_get(request, dev, data, info); @@ -156,22 +232,16 @@ rss_prepare_data(const struct ethnl_req_info *req_base, struct rss_req_info *request = RSS_REQINFO(req_base); struct net_device *dev = reply_base->dev; const struct ethtool_ops *ops; - int ret; ops = dev->ethtool_ops; if (!ops->get_rxfh) return -EOPNOTSUPP; /* Some drivers don't handle rss_context */ - if (request->rss_context && - !ops->cap_rss_ctx_supported && !ops->create_rxfh_context) + if (request->rss_context && !ops->create_rxfh_context) return -EOPNOTSUPP; - mutex_lock(&dev->ethtool->rss_lock); - ret = rss_prepare(request, dev, data, info); - mutex_unlock(&dev->ethtool->rss_lock); - - return ret; + return rss_prepare(request, dev, data, info); } static int @@ -185,7 +255,10 @@ rss_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ - nla_total_size(data->hkey_size); /* _RSS_HKEY */ + nla_total_size(data->hkey_size) + /* _RSS_HKEY */ + nla_total_size(0) + /* _RSS_FLOW_HASH */ + nla_total_size(sizeof(u32)) * ETHTOOL_A_FLOW_MAX + + 0; return len; } @@ -206,17 +279,34 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, sizeof(u32) * data->indir_size, data->indir_table))) return -EMSGSIZE; - if (data->no_key_fields) - return 0; - - if ((data->hfunc && - nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || - (data->input_xfrm && - nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) || - (data->hkey_size && - nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))) + if (!data->no_key_fields && + ((data->hfunc && + nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || + (data->input_xfrm && + nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) || + (data->hkey_size && + nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))) return -EMSGSIZE; + if (data->has_flow_hash) { + struct nlattr *nest; + int i; + + nest = nla_nest_start(skb, ETHTOOL_A_RSS_FLOW_HASH); + if (!nest) + return -EMSGSIZE; + + for (i = 1; i < __ETHTOOL_A_FLOW_CNT; i++) { + if (data->flow_hash[i] >= 0 && + nla_put_uint(skb, i, data->flow_hash[i])) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + } + + nla_nest_end(skb, nest); + } + return 0; } @@ -295,11 +385,7 @@ rss_dump_one_ctx(struct sk_buff *skb, struct netlink_callback *cb, if (ret < 0) goto err_cancel; - /* Context 0 is not currently storred or cached in the XArray */ - if (!rss_context) - ret = rss_prepare_get(&req, dev, &data, info); - else - ret = rss_prepare_ctx(&req, dev, &data, info); + ret = rss_prepare(&req, dev, &data, info); if (ret) goto err_cancel; diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index d6f52839827e..081093dfd553 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -230,6 +230,12 @@ static int tls_handshake_accept(struct handshake_req *req, if (ret < 0) goto out_cancel; } + if (treq->th_keyring) { + ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_KEYRING, + treq->th_keyring); + if (ret < 0) + goto out_cancel; + } ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_AUTH_MODE, treq->th_auth_mode); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f8696e67def4..fc323994b1fa 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -325,8 +325,8 @@ static int ip_rcv_finish_core(struct net *net, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int err, drop_reason; struct rtable *rt; + int drop_reason; if (ip_can_use_hint(skb, iph, hint)) { drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr, @@ -351,9 +351,10 @@ static int ip_rcv_finish_core(struct net *net, break; case IPPROTO_UDP: if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { - err = udp_v4_early_demux(skb); - if (unlikely(err)) + drop_reason = udp_v4_early_demux(skb); + if (unlikely(drop_reason)) goto drop_error; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 414b47a0d513..10a1d182fd84 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1222,8 +1222,7 @@ alloc_new_skb: if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) goto error; copy = err; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index c56b6fe6f0d7..22a7889876c1 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -274,9 +274,9 @@ static int __init ic_open_devs(void) /* wait for a carrier on at least one device */ start = jiffies; - next_msg = start + msecs_to_jiffies(20000); + next_msg = start + secs_to_jiffies(20); while (time_before(jiffies, start + - msecs_to_jiffies(carrier_timeout * 1000))) { + secs_to_jiffies(carrier_timeout))) { int wait, elapsed; rtnl_lock(); @@ -295,7 +295,7 @@ static int __init ic_open_devs(void) elapsed = jiffies_to_msecs(jiffies - start); wait = (carrier_timeout * 1000 - elapsed + 500) / 1000; pr_info("Waiting up to %d more seconds for network.\n", wait); - next_msg = jiffies + msecs_to_jiffies(20000); + next_msg = jiffies + secs_to_jiffies(20); } have_carrier: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 925b2c572ca2..31149a0ac849 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1174,7 +1174,7 @@ restart: goto do_error; while (msg_data_left(msg)) { - ssize_t copy = 0; + int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) @@ -1295,8 +1295,7 @@ new_segment: if (!copy) goto wait_for_space; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) { if (err == -EMSGSIZE) { tcp_mark_push(tp, skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 79e3bfb0108f..9b03c44c12b8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3714,7 +3714,7 @@ static int tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } /* This routine deals with acks during a TLP episode and ends an episode by - * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack + * resetting tlp_high_seq. Ref: TLP algorithm in RFC8985 */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { @@ -5042,7 +5042,9 @@ end: skb_condense(skb); skb_set_owner_r(skb, sk); } - tcp_rcvbuf_grow(sk); + /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ + if (sk->sk_socket) + tcp_rcvbuf_grow(sk); } static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index bba10110fbbc..c52fd3254b6e 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -35,7 +35,7 @@ s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd) tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(skb)); } -/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01): +/* RACK loss detection (IETF RFC8985): * * Marks a packet lost, if some packet sent later has been (s)acked. * The underlying idea is similar to the traditional dupthresh and FACK diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 85b5aa82d7d7..75c489edc438 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -44,7 +44,7 @@ struct udp_tunnel_type_entry { DEFINE_STATIC_CALL(udp_tunnel_gro_rcv, dummy_gro_rcv); static DEFINE_STATIC_KEY_FALSE(udp_tunnel_static_call); -static struct mutex udp_tunnel_gro_type_lock; +static DEFINE_MUTEX(udp_tunnel_gro_type_lock); static struct udp_tunnel_type_entry udp_tunnel_gro_types[UDP_MAX_TUNNEL_TYPES]; static unsigned int udp_tunnel_gro_type_nr; static DEFINE_SPINLOCK(udp_tunnel_gro_lock); @@ -144,11 +144,6 @@ out: } EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_rcv); -static void udp_tunnel_gro_init(void) -{ - mutex_init(&udp_tunnel_gro_type_lock); -} - static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb) @@ -165,8 +160,6 @@ static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, #else -static void udp_tunnel_gro_init(void) {} - static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb) @@ -1000,6 +993,5 @@ int __init udpv4_offload_init(void) }, }; - udp_tunnel_gro_init(); return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9c297974d3a6..3c200157634e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2229,32 +2229,29 @@ errdad: in6_ifa_put(ifp); } -/* Join to solicited addr multicast group. - * caller must hold RTNL */ +/* Join to solicited addr multicast group. */ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; - if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (READ_ONCE(dev->flags) & (IFF_LOOPBACK | IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); } -/* caller must hold RTNL */ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; - if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (READ_ONCE(idev->dev->flags) & (IFF_LOOPBACK | IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); __ipv6_dev_mc_dec(idev, &maddr); } -/* caller must hold RTNL */ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; @@ -2267,7 +2264,6 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) __ipv6_dev_ac_inc(ifp->idev, &addr); } -/* caller must hold RTNL */ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; @@ -3865,7 +3861,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) * Do not dev_put! */ if (unregister) { - idev->dead = 1; + WRITE_ONCE(idev->dead, 1); /* protected by rtnl_lock */ RCU_INIT_POINTER(dev->ip6_ptr, NULL); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index fb63ffbcfc64..567efd626ab4 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -20,12 +20,6 @@ #include <linux/netlink.h> #include <linux/rtnetlink.h> -#if 0 -#define ADDRLABEL(x...) printk(x) -#else -#define ADDRLABEL(x...) do { ; } while (0) -#endif - /* * Policy Table */ @@ -150,8 +144,8 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", - __func__, addr, type, ifindex, label); + net_dbg_ratelimited("%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", __func__, addr, type, + ifindex, label); return label; } @@ -164,8 +158,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", - __func__, prefix, prefixlen, ifindex, (unsigned int)label); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", __func__, + prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -207,8 +201,7 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, struct hlist_node *n; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, - replace); + net_dbg_ratelimited("%s(newp=%p, replace=%d)\n", __func__, newp, replace); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && @@ -247,9 +240,8 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", - __func__, prefix, prefixlen, ifindex, (unsigned int)label, - replace); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) @@ -271,8 +263,8 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", - __func__, prefix, prefixlen, ifindex); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, + prefixlen, ifindex); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && @@ -294,8 +286,8 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", - __func__, prefix, prefixlen, ifindex); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, + prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&net->ipv6.ip6addrlbl_table.lock); @@ -312,8 +304,6 @@ static int __net_init ip6addrlbl_net_init(struct net *net) int err; int i; - ADDRLABEL(KERN_DEBUG "%s\n", __func__); - spin_lock_init(&net->ipv6.ip6addrlbl_table.lock); INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 21e01695b48c..53cf68e0242b 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -47,6 +47,9 @@ static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(acaddr_hash_lock); +#define ac_dereference(a, idev) \ + rcu_dereference_protected(a, lockdep_is_held(&(idev)->lock)) + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); static u32 inet6_acaddr_hash(const struct net *net, @@ -64,14 +67,11 @@ static u32 inet6_acaddr_hash(const struct net *net, int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_ac_socklist *pac = NULL; + struct net *net = sock_net(sk); struct net_device *dev = NULL; struct inet6_dev *idev; - struct ipv6_ac_socklist *pac; - struct net *net = sock_net(sk); - int ishost = !net->ipv6.devconf_all->forwarding; - int err = 0; - - ASSERT_RTNL(); + int err = 0, ishost; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -79,32 +79,43 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; if (ifindex) - dev = __dev_get_by_index(net, ifindex); + dev = dev_get_by_index(net, ifindex); - if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) - return -EINVAL; + if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) { + err = -EINVAL; + goto error; + } pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); - if (!pac) - return -ENOMEM; + if (!pac) { + err = -ENOMEM; + goto error; + } + pac->acl_next = NULL; pac->acl_addr = *addr; + ishost = !READ_ONCE(net->ipv6.devconf_all->forwarding); + if (ifindex == 0) { struct rt6_info *rt; + rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } else if (ishost) { + rcu_read_unlock(); err = -EADDRNOTAVAIL; goto error; } else { /* router, no matching interface: just pick one */ - dev = __dev_get_by_flags(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = dev_get_by_flags_rcu(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } + rcu_read_unlock(); } if (!dev) { @@ -112,7 +123,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } - idev = __in6_dev_get(dev); + idev = in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; @@ -120,8 +131,9 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = -EADDRNOTAVAIL; goto error; } + /* reset ishost, now that we have a specific device */ - ishost = !idev->cnf.forwarding; + ishost = !READ_ONCE(idev->cnf.forwarding); pac->acl_ifindex = dev->ifindex; @@ -134,7 +146,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ishost) err = -EADDRNOTAVAIL; if (err) - goto error; + goto error_idev; } err = __ipv6_dev_ac_inc(idev, addr); @@ -144,7 +156,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) pac = NULL; } +error_idev: + in6_dev_put(idev); error: + dev_put(dev); + if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; @@ -155,12 +171,10 @@ error: */ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct net_device *dev; struct ipv6_ac_socklist *pac, *prev_pac; + struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); - - ASSERT_RTNL(); + struct net_device *dev; prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { @@ -176,9 +190,11 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) else np->ipv6_ac_list = pac->acl_next; - dev = __dev_get_by_index(net, pac->acl_ifindex); - if (dev) + dev = dev_get_by_index(net, pac->acl_ifindex); + if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); + dev_put(dev); + } sock_kfree_s(sk, pac, sizeof(*pac)); return 0; @@ -187,21 +203,20 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct net_device *dev = NULL; struct ipv6_ac_socklist *pac; - struct net *net = sock_net(sk); - int prev_index; + int prev_index = 0; - ASSERT_RTNL(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - prev_index = 0; while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - dev = __dev_get_by_index(net, pac->acl_ifindex); + dev_put(dev); + dev = dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -209,6 +224,8 @@ void __ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } + + dev_put(dev); } void ipv6_sock_ac_close(struct sock *sk) @@ -217,9 +234,8 @@ void ipv6_sock_ac_close(struct sock *sk) if (!np->ipv6_ac_list) return; - rtnl_lock(); + __ipv6_sock_ac_close(sk); - rtnl_unlock(); } static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) @@ -319,16 +335,14 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) struct net *net; int err; - ASSERT_RTNL(); - write_lock_bh(&idev->lock); if (idev->dead) { err = -ENODEV; goto out; } - for (aca = rtnl_dereference(idev->ac_list); aca; - aca = rtnl_dereference(aca->aca_next)) { + for (aca = ac_dereference(idev->ac_list, idev); aca; + aca = ac_dereference(aca->aca_next, idev)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) { aca->aca_users++; err = 0; @@ -380,12 +394,10 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca, *prev_aca; - ASSERT_RTNL(); - write_lock_bh(&idev->lock); prev_aca = NULL; - for (aca = rtnl_dereference(idev->ac_list); aca; - aca = rtnl_dereference(aca->aca_next)) { + for (aca = ac_dereference(idev->ac_list, idev); aca; + aca = ac_dereference(aca->aca_next, idev)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) break; prev_aca = aca; @@ -414,14 +426,18 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } -/* called with rtnl_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { - struct inet6_dev *idev = __in6_dev_get(dev); + struct inet6_dev *idev = in6_dev_get(dev); + int err; if (!idev) return -ENODEV; - return __ipv6_dev_ac_dec(idev, addr); + + err = __ipv6_dev_ac_dec(idev, addr); + in6_dev_put(idev); + + return err; } void ipv6_ac_destroy_dev(struct inet6_dev *idev) @@ -429,7 +445,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) struct ifacaddr6 *aca; write_lock_bh(&idev->lock); - while ((aca = rtnl_dereference(idev->ac_list)) != NULL) { + while ((aca = ac_dereference(idev->ac_list, idev)) != NULL) { rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 93578b2ec35f..7272d7e0fc36 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -963,8 +963,7 @@ insert_above: } static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, - const struct fib6_info *match, - const struct fib6_table *table) + const struct fib6_info *match) { int cpu; @@ -999,21 +998,15 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, rcu_read_unlock(); } -struct fib6_nh_pcpu_arg { - struct fib6_info *from; - const struct fib6_table *table; -}; - static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg) { - struct fib6_nh_pcpu_arg *arg = _arg; + struct fib6_info *arg = _arg; - __fib6_drop_pcpu_from(nh, arg->from, arg->table); + __fib6_drop_pcpu_from(nh, arg); return 0; } -static void fib6_drop_pcpu_from(struct fib6_info *f6i, - const struct fib6_table *table) +static void fib6_drop_pcpu_from(struct fib6_info *f6i) { /* Make sure rt6_make_pcpu_route() wont add other percpu routes * while we are cleaning them here. @@ -1022,19 +1015,14 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */ if (f6i->nh) { - struct fib6_nh_pcpu_arg arg = { - .from = f6i, - .table = table - }; - rcu_read_lock(); - nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, &arg); + nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, f6i); rcu_read_unlock(); } else { struct fib6_nh *fib6_nh; fib6_nh = f6i->fib6_nh; - __fib6_drop_pcpu_from(fib6_nh, f6i, table); + __fib6_drop_pcpu_from(fib6_nh, f6i); } } @@ -1045,7 +1033,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, /* Flush all cached dst in exception table */ rt6_flush_exceptions(rt); - fib6_drop_pcpu_from(rt, table); + fib6_drop_pcpu_from(rt); if (rt->nh) { spin_lock(&rt->nh->lock); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 877bee7ffee9..fcc20c7250eb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1762,8 +1762,7 @@ alloc_new_skb: if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) goto error; copy = err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1e225e6489ea..e66ec623972e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -117,26 +117,6 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, return opt; } -static bool setsockopt_needs_rtnl(int optname) -{ - switch (optname) { - case IPV6_ADDRFORM: - case IPV6_ADD_MEMBERSHIP: - case IPV6_DROP_MEMBERSHIP: - case IPV6_JOIN_ANYCAST: - case IPV6_LEAVE_ANYCAST: - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - case MCAST_MSFILTER: - return true; - } - return false; -} - static int copy_group_source_from_sockptr(struct group_source_req *greqs, sockptr_t optval, int optlen) { @@ -395,9 +375,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); - int val, valbool; int retv = -ENOPROTOOPT; - bool needs_rtnl = setsockopt_needs_rtnl(optname); + int val, valbool; if (sockptr_is_null(optval)) val = 0; @@ -562,8 +541,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return 0; } } - if (needs_rtnl) - rtnl_lock(); + sockopt_lock_sock(sk); /* Another thread has converted the socket into IPv4 with @@ -969,8 +947,6 @@ done: unlock: sockopt_release_sock(sk); - if (needs_rtnl) - rtnl_unlock(); return retv; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 65831b4fee1f..6c875721d423 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -108,9 +108,9 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; -/* - * socket join on multicast group - */ +#define mc_assert_locked(idev) \ + lockdep_assert_held(&(idev)->mc_lock) + #define mc_dereference(e, idev) \ rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock)) @@ -169,17 +169,18 @@ static int unsolicited_report_interval(struct inet6_dev *idev) return iv > 0 ? iv : 1; } +/* + * socket join on multicast group + */ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr, unsigned int mode) { - struct net_device *dev = NULL; - struct ipv6_mc_socklist *mc_lst; struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); + struct net_device *dev = NULL; int err; - ASSERT_RTNL(); - if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -199,13 +200,18 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, if (ifindex == 0) { struct rt6_info *rt; + + rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } - } else - dev = __dev_get_by_index(net, ifindex); + rcu_read_unlock(); + } else { + dev = dev_get_by_index(net, ifindex); + } if (!dev) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -216,12 +222,11 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, mc_lst->sfmode = mode; RCU_INIT_POINTER(mc_lst->sflist, NULL); - /* - * now add/increase the group membership on the device - */ - + /* now add/increase the group membership on the device */ err = __ipv6_dev_mc_inc(dev, addr, mode); + dev_put(dev); + if (err) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return err; @@ -248,14 +253,36 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex, /* * socket leave on multicast group */ +static void __ipv6_sock_mc_drop(struct sock *sk, struct ipv6_mc_socklist *mc_lst) +{ + struct net *net = sock_net(sk); + struct net_device *dev; + + dev = dev_get_by_index(net, mc_lst->ifindex); + if (dev) { + struct inet6_dev *idev = in6_dev_get(dev); + + ip6_mc_leave_src(sk, mc_lst, idev); + + if (idev) { + __ipv6_dev_mc_dec(idev, &mc_lst->addr); + in6_dev_put(idev); + } + + dev_put(dev); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } + + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + kfree_rcu(mc_lst, rcu); +} + int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst; struct ipv6_mc_socklist __rcu **lnk; - struct net *net = sock_net(sk); - - ASSERT_RTNL(); + struct ipv6_mc_socklist *mc_lst; if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -265,23 +292,8 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - struct net_device *dev; - *lnk = mc_lst->next; - - dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev) { - struct inet6_dev *idev = __in6_dev_get(dev); - - ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) - __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else { - ip6_mc_leave_src(sk, mc_lst, NULL); - } - - atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); - kfree_rcu(mc_lst, rcu); + __ipv6_sock_mc_drop(sk, mc_lst); return 0; } } @@ -290,31 +302,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } EXPORT_SYMBOL(ipv6_sock_mc_drop); -static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net, - const struct in6_addr *group, - int ifindex) +static struct inet6_dev *ip6_mc_find_dev(struct net *net, + const struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; - struct inet6_dev *idev = NULL; + struct inet6_dev *idev; if (ifindex == 0) { - struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0); + struct rt6_info *rt; + rcu_read_lock(); + rt = rt6_lookup(net, group, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } + rcu_read_unlock(); } else { - dev = __dev_get_by_index(net, ifindex); + dev = dev_get_by_index(net, ifindex); } - if (!dev) return NULL; - idev = __in6_dev_get(dev); + + idev = in6_dev_get(dev); + dev_put(dev); + if (!idev) return NULL; - if (idev->dead) - return NULL; + return idev; } @@ -322,28 +339,10 @@ void __ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; - struct net *net = sock_net(sk); - - ASSERT_RTNL(); while ((mc_lst = sock_dereference(np->ipv6_mc_list, sk)) != NULL) { - struct net_device *dev; - np->ipv6_mc_list = mc_lst->next; - - dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev) { - struct inet6_dev *idev = __in6_dev_get(dev); - - ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) - __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else { - ip6_mc_leave_src(sk, mc_lst, NULL); - } - - atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); - kfree_rcu(mc_lst, rcu); + __ipv6_sock_mc_drop(sk, mc_lst); } } @@ -354,24 +353,22 @@ void ipv6_sock_mc_close(struct sock *sk) if (!rcu_access_pointer(np->ipv6_mc_list)) return; - rtnl_lock(); lock_sock(sk); __ipv6_sock_mc_close(sk); release_sock(sk); - rtnl_unlock(); } int ip6_mc_source(int add, int omode, struct sock *sk, - struct group_source_req *pgsr) + struct group_source_req *pgsr) { + struct ipv6_pinfo *inet6 = inet6_sk(sk); struct in6_addr *source, *group; + struct net *net = sock_net(sk); struct ipv6_mc_socklist *pmc; - struct inet6_dev *idev; - struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; - struct net *net = sock_net(sk); - int i, j, rv; + struct inet6_dev *idev; int leavegroup = 0; + int i, j, rv; int err; source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; @@ -380,13 +377,19 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface); + idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); if (!idev) return -ENODEV; + mutex_lock(&idev->mc_lock); + + if (idev->dead) { + err = -ENODEV; + goto done; + } + err = -EADDRNOTAVAIL; - mutex_lock(&idev->mc_lock); for_each_pmc_socklock(inet6, sk, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; @@ -483,6 +486,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, ip6_mc_add_src(idev, group, omode, 1, source, 1); done: mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); if (leavegroup) err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -491,12 +495,12 @@ done: int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage *list) { - const struct in6_addr *group; - struct ipv6_mc_socklist *pmc; - struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *newpsl, *psl; struct net *net = sock_net(sk); + const struct in6_addr *group; + struct ipv6_mc_socklist *pmc; + struct inet6_dev *idev; int leavegroup = 0; int i, err; @@ -508,10 +512,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); + idev = ip6_mc_find_dev(net, group, gsf->gf_interface); if (!idev) return -ENODEV; + mutex_lock(&idev->mc_lock); + + if (idev->dead) { + err = -ENODEV; + goto done; + } + err = 0; if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { @@ -544,24 +555,19 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } - mutex_lock(&idev->mc_lock); + err = ip6_mc_add_src(idev, group, gsf->gf_fmode, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { - mutex_unlock(&idev->mc_lock); sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr, newpsl->sl_max)); goto done; } - mutex_unlock(&idev->mc_lock); } else { newpsl = NULL; - mutex_lock(&idev->mc_lock); ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); - mutex_unlock(&idev->mc_lock); } - mutex_lock(&idev->mc_lock); psl = sock_dereference(pmc->sflist, sk); if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, @@ -571,12 +577,14 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); } + rcu_assign_pointer(pmc->sflist, newpsl); - mutex_unlock(&idev->mc_lock); kfree_rcu(psl, rcu); pmc->sfmode = gsf->gf_fmode; err = 0; done: + mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -597,10 +605,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - /* changes to the ipv6_mc_list require the socket lock and - * rtnl lock. We have the socket lock, so reading the list is safe. - */ - for_each_pmc_socklock(inet6, sk, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; @@ -668,12 +672,13 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, return rv; } -/* called with mc_lock */ static void igmp6_group_added(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + mc_assert_locked(mc->idev); + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; @@ -703,12 +708,13 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) mld_ifc_event(mc->idev); } -/* called with mc_lock */ static void igmp6_group_dropped(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + mc_assert_locked(mc->idev); + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; @@ -729,14 +735,13 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) refcount_dec(&mc->mca_refcnt); } -/* - * deleted ifmcaddr6 manipulation - * called with mc_lock - */ +/* deleted ifmcaddr6 manipulation */ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { struct ifmcaddr6 *pmc; + mc_assert_locked(idev); + /* this is an "ifmcaddr6" for convenience; only the fields below * are actually used. In particular, the refcnt and users are not * used for management of the delete list. Using the same structure @@ -770,13 +775,14 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) rcu_assign_pointer(idev->mc_tomb, pmc); } -/* called with mc_lock */ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { struct ip6_sf_list *psf, *sources, *tomb; struct in6_addr *pmca = &im->mca_addr; struct ifmcaddr6 *pmc, *pmc_prev; + mc_assert_locked(idev); + pmc_prev = NULL; for_each_mc_tomb(idev, pmc) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) @@ -813,11 +819,12 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) } } -/* called with mc_lock */ static void mld_clear_delrec(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *nextpmc; + mc_assert_locked(idev); + pmc = mc_dereference(idev->mc_tomb, idev); RCU_INIT_POINTER(idev->mc_tomb, NULL); @@ -861,11 +868,6 @@ static void mld_clear_report(struct inet6_dev *idev) spin_unlock_bh(&idev->mc_report_lock); } -static void mca_get(struct ifmcaddr6 *mc) -{ - refcount_inc(&mc->mca_refcnt); -} - static void ma_put(struct ifmcaddr6 *mc) { if (refcount_dec_and_test(&mc->mca_refcnt)) { @@ -874,13 +876,14 @@ static void ma_put(struct ifmcaddr6 *mc) } } -/* called with mc_lock */ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, const struct in6_addr *addr, unsigned int mode) { struct ifmcaddr6 *mc; + mc_assert_locked(idev); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; @@ -945,23 +948,22 @@ error: static int __ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr, unsigned int mode) { - struct ifmcaddr6 *mc; struct inet6_dev *idev; - - ASSERT_RTNL(); + struct ifmcaddr6 *mc; /* we need to take a reference on idev */ idev = in6_dev_get(dev); - if (!idev) return -EINVAL; - if (idev->dead) { + mutex_lock(&idev->mc_lock); + + if (READ_ONCE(idev->dead)) { + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return -ENODEV; } - mutex_lock(&idev->mc_lock); for_each_mc_mclock(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, addr)) { mc->mca_users++; @@ -982,13 +984,11 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, rcu_assign_pointer(mc->next, idev->mc_list); rcu_assign_pointer(idev->mc_list, mc); - mca_get(mc); - mld_del_delrec(idev, mc); igmp6_group_added(mc); inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST); mutex_unlock(&idev->mc_lock); - ma_put(mc); + return 0; } @@ -1005,9 +1005,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifmcaddr6 *ma, __rcu **map; - ASSERT_RTNL(); - mutex_lock(&idev->mc_lock); + for (map = &idev->mc_list; (ma = mc_dereference(*map, idev)); map = &ma->next) { @@ -1038,13 +1037,12 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) struct inet6_dev *idev; int err; - ASSERT_RTNL(); - - idev = __in6_dev_get(dev); + idev = in6_dev_get(dev); if (!idev) - err = -ENODEV; - else - err = __ipv6_dev_mc_dec(idev, addr); + return -ENODEV; + + err = __ipv6_dev_mc_dec(idev, addr); + in6_dev_put(idev); return err; } @@ -1091,46 +1089,51 @@ unlock: return rv; } -/* called with mc_lock */ static void mld_gq_start_work(struct inet6_dev *idev) { unsigned long tv = get_random_u32_below(idev->mc_maxdelay); + mc_assert_locked(idev); + idev->mc_gq_running = 1; if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2)) in6_dev_hold(idev); } -/* called with mc_lock */ static void mld_gq_stop_work(struct inet6_dev *idev) { + mc_assert_locked(idev); + idev->mc_gq_running = 0; if (cancel_delayed_work(&idev->mc_gq_work)) __in6_dev_put(idev); } -/* called with mc_lock */ static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = get_random_u32_below(delay); + mc_assert_locked(idev); + if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2)) in6_dev_hold(idev); } -/* called with mc_lock */ static void mld_ifc_stop_work(struct inet6_dev *idev) { + mc_assert_locked(idev); + idev->mc_ifc_count = 0; if (cancel_delayed_work(&idev->mc_ifc_work)) __in6_dev_put(idev); } -/* called with mc_lock */ static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = get_random_u32_below(delay); + mc_assert_locked(idev); + if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2)) in6_dev_hold(idev); } @@ -1155,14 +1158,13 @@ static void mld_report_stop_work(struct inet6_dev *idev) __in6_dev_put(idev); } -/* - * IGMP handling (alias multicast ICMPv6 messages) - * called with mc_lock - */ +/* IGMP handling (alias multicast ICMPv6 messages) */ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) { unsigned long delay = resptime; + mc_assert_locked(ma->idev); + /* Do not start work for these addresses */ if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) || IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) @@ -1181,15 +1183,15 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) ma->mca_flags |= MAF_TIMER_RUNNING; } -/* mark EXCLUDE-mode sources - * called with mc_lock - */ +/* mark EXCLUDE-mode sources */ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { struct ip6_sf_list *psf; int i, scount; + mc_assert_locked(pmc->idev); + scount = 0; for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) @@ -1212,13 +1214,14 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, return true; } -/* called with mc_lock */ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { struct ip6_sf_list *psf; int i, scount; + mc_assert_locked(pmc->idev); + if (pmc->mca_sfmode == MCAST_EXCLUDE) return mld_xmarksources(pmc, nsrcs, srcs); @@ -1913,7 +1916,6 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, #define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) -/* called with mc_lock */ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted, int crsend) @@ -1927,6 +1929,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_report *pmr; unsigned int mtu; + mc_assert_locked(idev); + if (pmc->mca_flags & MAF_NOREPORT) return skb; @@ -2045,12 +2049,13 @@ empty_source: return skb; } -/* called with mc_lock */ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) { struct sk_buff *skb = NULL; int type; + mc_assert_locked(idev); + if (!pmc) { for_each_mc_mclock(idev, pmc) { if (pmc->mca_flags & MAF_NOREPORT) @@ -2072,10 +2077,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) mld_sendpack(skb); } -/* - * remove zero-count source records from a source filter list - * called with mc_lock - */ +/* remove zero-count source records from a source filter list */ static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev) { struct ip6_sf_list *psf_prev, *psf_next, *psf; @@ -2099,7 +2101,6 @@ static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *i } } -/* called with mc_lock */ static void mld_send_cr(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next; @@ -2263,13 +2264,14 @@ err_out: goto out; } -/* called with mc_lock */ static void mld_send_initial_cr(struct inet6_dev *idev) { - struct sk_buff *skb; struct ifmcaddr6 *pmc; + struct sk_buff *skb; int type; + mc_assert_locked(idev); + if (mld_in_v1_mode(idev)) return; @@ -2316,13 +2318,14 @@ static void mld_dad_work(struct work_struct *work) in6_dev_put(idev); } -/* called with mc_lock */ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, - const struct in6_addr *psfsrc) + const struct in6_addr *psfsrc) { struct ip6_sf_list *psf, *psf_prev; int rv = 0; + mc_assert_locked(pmc->idev); + psf_prev = NULL; for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) @@ -2359,7 +2362,6 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, return rv; } -/* called with mc_lock */ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2371,6 +2373,8 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; + mc_assert_locked(idev); + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; @@ -2412,15 +2416,14 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, return err; } -/* - * Add multicast single-source filter to the interface list - * called with mc_lock - */ +/* Add multicast single-source filter to the interface list */ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, - const struct in6_addr *psfsrc) + const struct in6_addr *psfsrc) { struct ip6_sf_list *psf, *psf_prev; + mc_assert_locked(pmc->idev); + psf_prev = NULL; for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) @@ -2443,11 +2446,12 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, return 0; } -/* called with mc_lock */ static void sf_markstate(struct ifmcaddr6 *pmc) { - struct ip6_sf_list *psf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; + struct ip6_sf_list *psf; + + mc_assert_locked(pmc->idev); for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { @@ -2460,14 +2464,15 @@ static void sf_markstate(struct ifmcaddr6 *pmc) } } -/* called with mc_lock */ static int sf_setstate(struct ifmcaddr6 *pmc) { - struct ip6_sf_list *psf, *dpsf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; + struct ip6_sf_list *psf, *dpsf; int qrv = pmc->idev->mc_qrv; int new_in, rv; + mc_assert_locked(pmc->idev); + rv = 0; for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { @@ -2526,10 +2531,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) return rv; } -/* - * Add multicast source filter list to the interface list - * called with mc_lock - */ +/* Add multicast source filter list to the interface list */ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2541,6 +2543,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; + mc_assert_locked(idev); + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; @@ -2588,11 +2592,12 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, return err; } -/* called with mc_lock */ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *nextpsf; + mc_assert_locked(pmc->idev); + for (psf = mc_dereference(pmc->mca_tomb, pmc->idev); psf; psf = nextpsf) { @@ -2613,11 +2618,12 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1); } -/* called with mc_lock */ static void igmp6_join_group(struct ifmcaddr6 *ma) { unsigned long delay; + mc_assert_locked(ma->idev); + if (ma->mca_flags & MAF_NOREPORT) return; @@ -2664,9 +2670,10 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, return err; } -/* called with mc_lock */ static void igmp6_leave_group(struct ifmcaddr6 *ma) { + mc_assert_locked(ma->idev); + if (mld_in_v1_mode(ma->idev)) { if (ma->mca_flags & MAF_LAST_REPORTER) { igmp6_send(&ma->mca_addr, ma->idev->dev, @@ -2711,9 +2718,10 @@ static void mld_ifc_work(struct work_struct *work) in6_dev_put(idev); } -/* called with mc_lock */ static void mld_ifc_event(struct inet6_dev *idev) { + mc_assert_locked(idev); + if (mld_in_v1_mode(idev)) return; @@ -2868,8 +2876,6 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) { struct ifmcaddr6 *pmc; - ASSERT_RTNL(); - mutex_lock(&idev->mc_lock); if (mld_in_v1_mode(idev)) { for_each_mc_mclock(idev, pmc) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f2299b61221b..d4c5876e1771 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -243,9 +243,8 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, case ND_OPT_NONCE: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { - ND_PRINTK(2, warn, - "%s: duplicated ND6 option found: type=%d\n", - __func__, nd_opt->nd_opt_type); + net_dbg_ratelimited("%s: duplicated ND6 option found: type=%d\n", + __func__, nd_opt->nd_opt_type); } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } @@ -275,11 +274,8 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, * to accommodate future extension to the * protocol. */ - ND_PRINTK(2, notice, - "%s: ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, - nd_opt->nd_opt_len); + net_dbg_ratelimited("%s: ignored unsupported option; type=%d, len=%d\n", + __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len); } next_opt: opt_len -= l; @@ -377,24 +373,25 @@ static int ndisc_constructor(struct neighbour *neigh) static int pndisc_constructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; - struct in6_addr maddr; struct net_device *dev = n->dev; + struct in6_addr maddr; - if (!dev || !__in6_dev_get(dev)) + if (!dev) return -EINVAL; + addrconf_addr_solict_mult(addr, &maddr); - ipv6_dev_mc_inc(dev, &maddr); - return 0; + return ipv6_dev_mc_inc(dev, &maddr); } static void pndisc_destructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; - struct in6_addr maddr; struct net_device *dev = n->dev; + struct in6_addr maddr; - if (!dev || !__in6_dev_get(dev)) + if (!dev) return; + addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); } @@ -753,9 +750,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { - ND_PRINTK(1, dbg, - "%s: trying to ucast probe in NUD_INVALID: %pI6\n", - __func__, target); + net_dbg_ratelimited("%s: trying to ucast probe in NUD_INVALID: %pI6\n", + __func__, target); } ndisc_send_ns(dev, target, target, saddr, 0); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { @@ -813,7 +809,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { - ND_PRINTK(2, warn, "NS: multicast target address\n"); + net_dbg_ratelimited("NS: multicast target address\n"); return reason; } @@ -822,7 +818,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) * DAD has to be destined for solicited node multicast address. */ if (dad && !ipv6_addr_is_solict_mult(daddr)) { - ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); + net_dbg_ratelimited("NS: bad DAD packet (wrong destination)\n"); return reason; } @@ -832,8 +828,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev); if (!lladdr) { - ND_PRINTK(2, warn, - "NS: invalid link-layer address length\n"); + net_dbg_ratelimited("NS: invalid link-layer address length\n"); return reason; } @@ -843,8 +838,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) * in the message. */ if (dad) { - ND_PRINTK(2, warn, - "NS: bad DAD packet (link-layer address option)\n"); + net_dbg_ratelimited("NS: bad DAD packet (link-layer address option)\n"); return reason; } } @@ -861,10 +855,8 @@ have_ifp: if (nonce != 0 && ifp->dad_nonce == nonce) { u8 *np = (u8 *)&nonce; /* Matching nonce if looped back */ - ND_PRINTK(2, notice, - "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", - ifp->idev->dev->name, - &ifp->addr, np); + net_dbg_ratelimited("%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", + ifp->idev->dev->name, &ifp->addr, np); goto out; } /* @@ -1015,13 +1007,13 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { - ND_PRINTK(2, warn, "NA: target address is multicast\n"); + net_dbg_ratelimited("NA: target address is multicast\n"); return reason; } if (ipv6_addr_is_multicast(daddr) && msg->icmph.icmp6_solicited) { - ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n"); + net_dbg_ratelimited("NA: solicited NA is multicasted\n"); return reason; } @@ -1040,8 +1032,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) if (ndopts.nd_opts_tgt_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev); if (!lladdr) { - ND_PRINTK(2, warn, - "NA: invalid link-layer address length\n"); + net_dbg_ratelimited("NA: invalid link-layer address length\n"); return reason; } } @@ -1062,9 +1053,9 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) unsolicited advertisement. */ if (skb->pkt_type != PACKET_LOOPBACK) - ND_PRINTK(1, warn, - "NA: %pM advertised our address %pI6c on %s!\n", - eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name); + net_warn_ratelimited("NA: %pM advertised our address %pI6c on %s!\n", + eth_hdr(skb)->h_source, &ifp->addr, + ifp->idev->dev->name); in6_ifa_put(ifp); return reason; } @@ -1151,7 +1142,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); if (!idev) { - ND_PRINTK(1, err, "RS: can't find in6 device\n"); + net_err_ratelimited("RS: can't find in6 device\n"); return reason; } @@ -1259,11 +1250,9 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); - ND_PRINTK(2, info, - "RA: %s, dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, dev: %s\n", __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, "RA: source address is not link-local\n"); + net_dbg_ratelimited("RA: source address is not link-local\n"); return reason; } if (optlen < 0) @@ -1271,15 +1260,14 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { - ND_PRINTK(2, warn, "RA: from host or unauthorized router\n"); + net_dbg_ratelimited("RA: from host or unauthorized router\n"); return reason; } #endif in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) { - ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", - skb->dev->name); + net_err_ratelimited("RA: can't find inet6 device for %s\n", skb->dev->name); return reason; } @@ -1287,18 +1275,16 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ipv6_accept_ra(in6_dev)) { - ND_PRINTK(2, info, - "RA: %s, did not accept ra for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, did not accept ra for dev: %s\n", __func__, + skb->dev->name); goto skip_linkparms; } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { - ND_PRINTK(2, info, - "RA: %s, nodetype is NODEFAULT, dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__, + skb->dev->name); goto skip_linkparms; } #endif @@ -1327,18 +1313,16 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) send_ifinfo_notify = true; if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { - ND_PRINTK(2, info, - "RA: %s, defrtr is false for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, defrtr is false for dev: %s\n", __func__, + skb->dev->name); goto skip_defrtr; } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); if (lifetime != 0 && lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { - ND_PRINTK(2, info, - "RA: router lifetime (%ds) is too short: %s\n", - lifetime, skb->dev->name); + net_dbg_ratelimited("RA: router lifetime (%ds) is too short: %s\n", lifetime, + skb->dev->name); goto skip_defrtr; } @@ -1348,9 +1332,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) net = dev_net(in6_dev->dev); if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { - ND_PRINTK(2, info, - "RA from local address detected on dev: %s: default router ignored\n", - skb->dev->name); + net_dbg_ratelimited("RA from local address detected on dev: %s: default router ignored\n", + skb->dev->name); goto skip_defrtr; } @@ -1368,9 +1351,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { - ND_PRINTK(0, err, - "RA: %s got default router without neighbour\n", - __func__); + net_err_ratelimited("RA: %s got default router without neighbour\n", + __func__); fib6_info_release(rt); return reason; } @@ -1383,10 +1365,10 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt = NULL; } - ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", - rt, lifetime, defrtr_usr_metric, skb->dev->name); + net_dbg_ratelimited("RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime, + defrtr_usr_metric, skb->dev->name); if (!rt && lifetime) { - ND_PRINTK(3, info, "RA: adding default router\n"); + net_dbg_ratelimited("RA: adding default router\n"); if (neigh) neigh_release(neigh); @@ -1395,9 +1377,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) skb->dev, pref, defrtr_usr_metric, lifetime); if (!rt) { - ND_PRINTK(0, err, - "RA: %s failed to add default route\n", - __func__); + net_err_ratelimited("RA: %s failed to add default route\n", __func__); return reason; } @@ -1405,9 +1385,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { - ND_PRINTK(0, err, - "RA: %s got default router without neighbour\n", - __func__); + net_err_ratelimited("RA: %s got default router without neighbour\n", + __func__); fib6_info_release(rt); return reason; } @@ -1438,7 +1417,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { - ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); + net_dbg_ratelimited("RA: Got route advertisement with lower hop_limit than minimum\n"); } } @@ -1494,8 +1473,7 @@ skip_linkparms: lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) { - ND_PRINTK(2, warn, - "RA: invalid link-layer address length\n"); + net_dbg_ratelimited("RA: invalid link-layer address length\n"); goto out; } } @@ -1509,9 +1487,8 @@ skip_linkparms: } if (!ipv6_accept_ra(in6_dev)) { - ND_PRINTK(2, info, - "RA: %s, accept_ra is false for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, accept_ra is false for dev: %s\n", __func__, + skb->dev->name); goto out; } @@ -1519,9 +1496,8 @@ skip_linkparms: if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { - ND_PRINTK(2, info, - "RA from local address detected on dev: %s: router info ignored.\n", - skb->dev->name); + net_dbg_ratelimited("RA from local address detected on dev: %s: router info ignored.\n", + skb->dev->name); goto skip_routeinfo; } @@ -1557,9 +1533,8 @@ skip_routeinfo: #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { - ND_PRINTK(2, info, - "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", + __func__, skb->dev->name); goto out; } #endif @@ -1588,7 +1563,7 @@ skip_routeinfo: } if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { - ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); + net_dbg_ratelimited("RA: invalid mtu: %d\n", mtu); } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); @@ -1607,7 +1582,7 @@ skip_routeinfo: } if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { - ND_PRINTK(2, warn, "RA: invalid RA options\n"); + net_dbg_ratelimited("RA: invalid RA options\n"); } out: /* Send a notify if RA changed managed/otherconf flags or @@ -1635,15 +1610,13 @@ static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb) switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: case NDISC_NODETYPE_NODEFAULT: - ND_PRINTK(2, warn, - "Redirect: from host or unauthorized router\n"); + net_dbg_ratelimited("Redirect: from host or unauthorized router\n"); return reason; } #endif if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, - "Redirect: source address is not link-local\n"); + net_dbg_ratelimited("Redirect: source address is not link-local\n"); return reason; } @@ -1704,15 +1677,13 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { - ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", - dev->name); + net_dbg_ratelimited("Redirect: no link-local address on %s\n", dev->name); return; } if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, - "Redirect: target address is not link-local unicast\n"); + net_dbg_ratelimited("Redirect: target address is not link-local unicast\n"); return; } @@ -1731,8 +1702,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { - ND_PRINTK(2, warn, - "Redirect: destination is not a neighbour\n"); + net_dbg_ratelimited("Redirect: destination is not a neighbour\n"); goto release; } @@ -1745,8 +1715,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) if (dev->addr_len) { struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target); if (!neigh) { - ND_PRINTK(2, warn, - "Redirect: no neigh for target address\n"); + net_dbg_ratelimited("Redirect: no neigh for target address\n"); goto release; } @@ -1847,14 +1816,12 @@ enum skb_drop_reason ndisc_rcv(struct sk_buff *skb) __skb_push(skb, skb->data - skb_transport_header(skb)); if (ipv6_hdr(skb)->hop_limit != 255) { - ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", - ipv6_hdr(skb)->hop_limit); + net_dbg_ratelimited("NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit); return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT; } if (msg->icmph.icmp6_code != 0) { - ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", - msg->icmph.icmp6_code); + net_dbg_ratelimited("NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code); return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE; } @@ -2005,9 +1972,8 @@ static int __net_init ndisc_net_init(struct net *net) err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { - ND_PRINTK(0, err, - "NDISC: Failed to initialize the control socket (err %d)\n", - err); + net_err_ratelimited("NDISC: Failed to initialize the control socket (err %d)\n", + err); return err; } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 24aec295a51c..a0be3896a934 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -835,8 +835,7 @@ start: if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) { if (err == -EMSGSIZE) goto wait_for_memory; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 69e03630f64c..e8b78ec682da 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -4,7 +4,7 @@ * * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018 - 2019, 2021-2024 Intel Corporation + * Copyright (C) 2018 - 2019, 2021-2025 Intel Corporation */ #include <linux/debugfs.h> @@ -490,7 +490,6 @@ static const char *hw_flag_names[] = { FLAG(DETECTS_COLOR_COLLISION), FLAG(MLO_MCAST_MULTI_LINK_TX), FLAG(DISALLOW_PUNCTURING), - FLAG(DISALLOW_PUNCTURING_5GHZ), FLAG(HANDLES_QUIET_CSA), FLAG(STRICT), #undef FLAG diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4ef7b3656aca..ec68204fddc9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2642,6 +2642,8 @@ int ieee80211_put_eht_cap(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata, const struct ieee80211_supported_band *sband, const struct ieee80211_conn_settings *conn); +int ieee80211_put_reg_conn(struct sk_buff *skb, + enum ieee80211_channel_flags flags); /* channel management */ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 4f7b7d0f64f2..d71eabe5abf8 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -2,7 +2,7 @@ /* * MLO link handling * - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -368,6 +368,13 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, ieee80211_update_apvlan_links(sdata); } + /* + * Ignore errors if we are only removing links as removal should + * always succeed + */ + if (!new_links) + ret = 0; + if (ret) { /* restore config */ memcpy(sdata->link, old_data, sizeof(old_data)); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6001c8897d7c..8b9c132cce3d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -776,10 +776,6 @@ static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata, ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING)) return false; - if (chandef->punctured && chandef->chan->band == NL80211_BAND_5GHZ && - ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING_5GHZ)) - return false; - return true; } @@ -1645,6 +1641,30 @@ static size_t ieee80211_add_before_he_elems(struct sk_buff *skb, return noffset; } +static size_t ieee80211_add_before_reg_conn(struct sk_buff *skb, + const u8 *elems, size_t elems_len, + size_t offset) +{ + static const u8 before_reg_conn[] = { + /* + * no need to list the ones split off before HE + * or generated here + */ + WLAN_EID_EXTENSION, WLAN_EID_EXT_DH_PARAMETER, + WLAN_EID_EXTENSION, WLAN_EID_EXT_KNOWN_STA_IDENTIFCATION, + }; + size_t noffset; + + if (!elems_len) + return offset; + + noffset = ieee80211_ie_split(elems, elems_len, before_reg_conn, + ARRAY_SIZE(before_reg_conn), offset); + skb_put_data(skb, elems + offset, noffset - offset); + + return noffset; +} + #define PRESENT_ELEMS_MAX 8 #define PRESENT_ELEM_EXT_OFFS 0x100 @@ -1806,6 +1826,22 @@ ieee80211_add_link_elems(struct ieee80211_sub_if_data *sdata, } /* + * if present, add any custom IEs that go before regulatory + * connectivity element + */ + offset = ieee80211_add_before_reg_conn(skb, extra_elems, + extra_elems_len, offset); + + if (sband->band == NL80211_BAND_6GHZ) { + /* + * as per Section E.2.7 of IEEE 802.11 REVme D7.0, non-AP STA + * capable of operating on the 6 GHz band shall transmit + * regulatory connectivity element. + */ + ieee80211_put_reg_conn(skb, chan->flags); + } + + /* * careful - need to know about all the present elems before * calling ieee80211_assoc_add_ml_elem(), so add this one if * we're going to put it after the ML element @@ -1943,14 +1979,7 @@ ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata, } skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops)); - /* Many APs have broken parsing of the extended MLD capa/ops field, - * dropping (re-)association request frames or replying with association - * response with a failure status if it's present. Without a clear - * indication as to whether the AP supports parsing this field or not do - * not include it in the common information unless strict mode is set. - */ - if (ieee80211_hw_check(&local->hw, STRICT) && - assoc_data->ext_mld_capa_ops) { + if (assoc_data->ext_mld_capa_ops) { ml_elem->control |= cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP); common->len += 2; @@ -2381,9 +2410,26 @@ static void ieee80211_csa_switch_work(struct wiphy *wiphy, * update cfg80211 directly. */ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) { + struct link_sta_info *link_sta; + struct sta_info *ap_sta; + link->conf->chanreq = link->csa.chanreq; cfg80211_ch_switch_notify(sdata->dev, &link->csa.chanreq.oper, link->link_id); + link->conf->csa_active = false; + + ap_sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); + if (WARN_ON(!ap_sta)) + return; + + link_sta = wiphy_dereference(wiphy, + ap_sta->link[link->link_id]); + if (WARN_ON(!link_sta)) + return; + + link_sta->pub->bandwidth = + _ieee80211_sta_cur_vht_bw(link_sta, + &link->csa.chanreq.oper); return; } @@ -4734,6 +4780,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_prep_tx_info info = { .subtype = IEEE80211_STYPE_AUTH, }; + bool sae_need_confirm = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -4779,6 +4826,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY; ifmgd->auth_data->timeout_started = true; run_again(sdata, ifmgd->auth_data->timeout); + if (auth_transaction == 1) + sae_need_confirm = true; goto notify_driver; } @@ -4822,6 +4871,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (!ieee80211_mark_sta_auth(sdata)) return; /* ignore frame -- wait for timeout */ } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && + auth_transaction == 1) { + sae_need_confirm = true; + } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && auth_transaction == 2) { sdata_info(sdata, "SAE peer confirmed\n"); ifmgd->auth_data->peer_confirmed = true; @@ -4829,7 +4881,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); notify_driver: - drv_mgd_complete_tx(sdata->local, sdata, &info); + if (!sae_need_confirm) + drv_mgd_complete_tx(sdata->local, sdata, &info); } #define case_WLAN(type) \ @@ -5925,6 +5978,7 @@ ieee80211_ap_power_type(u8 control) return IEEE80211_REG_LPI_AP; case IEEE80211_6GHZ_CTRL_REG_SP_AP: case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD: return IEEE80211_REG_SP_AP; case IEEE80211_6GHZ_CTRL_REG_VLP_AP: return IEEE80211_REG_VLP_AP; @@ -8698,21 +8752,33 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, bool have_sta = false; bool mlo; int err; + u16 new_links; if (link_id >= 0) { mlo = true; if (WARN_ON(!ap_mld_addr)) return -EINVAL; - err = ieee80211_vif_set_links(sdata, BIT(link_id), 0); + new_links = BIT(link_id); } else { if (WARN_ON(ap_mld_addr)) return -EINVAL; ap_mld_addr = cbss->bssid; - err = ieee80211_vif_set_links(sdata, 0, 0); + new_links = 0; link_id = 0; mlo = false; } + if (assoc) { + rcu_read_lock(); + have_sta = sta_info_get(sdata, ap_mld_addr); + rcu_read_unlock(); + } + + if (mlo && !have_sta && + WARN_ON(sdata->vif.valid_links || sdata->vif.active_links)) + return -EINVAL; + + err = ieee80211_vif_set_links(sdata, new_links, 0); if (err) return err; @@ -8733,12 +8799,6 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, goto out_err; } - if (assoc) { - rcu_read_lock(); - have_sta = sta_info_get(sdata, ap_mld_addr); - rcu_read_unlock(); - } - if (!have_sta) { if (mlo) new_sta = sta_info_alloc_with_link(sdata, ap_mld_addr, @@ -9338,6 +9398,39 @@ out_rcu: return err; } +static bool +ieee80211_mgd_assoc_bss_has_mld_ext_capa_ops(struct cfg80211_assoc_request *req) +{ + const struct cfg80211_bss_ies *ies; + struct cfg80211_bss *bss; + const struct element *ml; + + /* not an MLO connection if link_id < 0, so irrelevant */ + if (req->link_id < 0) + return false; + + bss = req->links[req->link_id].bss; + + guard(rcu)(); + ies = rcu_dereference(bss->ies); + for_each_element_extid(ml, WLAN_EID_EXT_EHT_MULTI_LINK, + ies->data, ies->len) { + const struct ieee80211_multi_link_elem *mle; + + if (!ieee80211_mle_type_ok(ml->data + 1, + IEEE80211_ML_CONTROL_TYPE_BASIC, + ml->datalen - 1)) + continue; + + mle = (void *)(ml->data + 1); + if (mle->control & cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP)) + return true; + } + + return false; + +} + int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_assoc_request *req) { @@ -9390,7 +9483,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, else memcpy(assoc_data->ap_addr, cbss->bssid, ETH_ALEN); - assoc_data->ext_mld_capa_ops = cpu_to_le16(req->ext_mld_capa_ops); + /* + * Many APs have broken parsing of the extended MLD capa/ops field, + * dropping (re-)association request frames or replying with association + * response with a failure status if it's present. + * Set our value from the userspace request only in strict mode or if + * the AP also had that field present. + */ + if (ieee80211_hw_check(&local->hw, STRICT) || + ieee80211_mgd_assoc_bss_has_mld_ext_capa_ops(req)) + assoc_data->ext_mld_capa_ops = + cpu_to_le16(req->ext_mld_capa_ops); if (ifmgd->associated) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -10033,7 +10136,6 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { if (!add_links_data->link[link_id].bss || !(sdata->u.mgd.reconf.added_links & BIT(link_id))) - continue; valid_links |= BIT(link_id); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8699755081ad..caa3e6b3f46e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4245,10 +4245,16 @@ static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx, rx->link_sta = NULL; } - if (link_id < 0) - rx->link = &rx->sdata->deflink; - else if (!ieee80211_rx_data_set_link(rx, link_id)) + if (link_id < 0) { + if (ieee80211_vif_is_mld(&rx->sdata->vif) && + sta && !sta->sta.valid_links) + rx->link = + rcu_dereference(rx->sdata->link[sta->deflink.link_id]); + else + rx->link = &rx->sdata->deflink; + } else if (!ieee80211_rx_data_set_link(rx, link_id)) { return false; + } return true; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 9799164a56d9..dbf98aa4cd67 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/if_arp.h> @@ -800,6 +800,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, local->hw_scan_req->req.scan_6ghz_params = req->scan_6ghz_params; local->hw_scan_req->req.scan_6ghz = req->scan_6ghz; + local->hw_scan_req->req.first_part = req->first_part; /* * After allocating local->hw_scan_req, we must diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 89cf365b07e6..8c550aab9bdc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/module.h> @@ -729,6 +729,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, IEEE80211_RATE_MANDATORY_G; break; case NL80211_BAND_5GHZ: + case NL80211_BAND_6GHZ: mandatory = IEEE80211_RATE_MANDATORY_A; break; case NL80211_BAND_60GHZ: diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ff6c5d5e631d..a1cb63222b6d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2556,6 +2556,23 @@ end: return 0; } +int ieee80211_put_reg_conn(struct sk_buff *skb, + enum ieee80211_channel_flags flags) +{ + u8 reg_conn = IEEE80211_REG_CONN_LPI_VALID | + IEEE80211_REG_CONN_LPI_VALUE | + IEEE80211_REG_CONN_SP_VALID; + + if (!(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT)) + reg_conn |= IEEE80211_REG_CONN_SP_VALUE; + + skb_put_u8(skb, WLAN_EID_EXTENSION); + skb_put_u8(skb, 1 + sizeof(reg_conn)); + skb_put_u8(skb, WLAN_EID_EXT_NON_AP_STA_REG_CON); + skb_put_u8(skb, reg_conn); + return 0; +} + int ieee80211_put_he_6ghz_cap(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 9b12ca97f412..aef74308c18e 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -97,8 +97,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sock *sk = sock->sk; struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb; - struct mctp_route *rt; struct sk_buff *skb = NULL; + struct mctp_dst dst; int hlen; if (addr) { @@ -133,34 +133,30 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, extaddr, msg->msg_name); - struct net_device *dev; - - rc = -EINVAL; - rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), extaddr->smctp_ifindex); - /* check for correct halen */ - if (dev && extaddr->smctp_halen == dev->addr_len) { - hlen = LL_RESERVED_SPACE(dev) + sizeof(struct mctp_hdr); - rc = 0; - } - rcu_read_unlock(); + + if (!mctp_sockaddr_ext_is_ok(extaddr)) + return -EINVAL; + + rc = mctp_dst_from_extaddr(&dst, sock_net(sk), + extaddr->smctp_ifindex, + extaddr->smctp_halen, + extaddr->smctp_haddr); if (rc) - goto err_free; - rt = NULL; + return rc; + } else { - rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, - addr->smctp_addr.s_addr); - if (!rt) { - rc = -EHOSTUNREACH; - goto err_free; - } - hlen = LL_RESERVED_SPACE(rt->dev->dev) + sizeof(struct mctp_hdr); + rc = mctp_route_lookup(sock_net(sk), addr->smctp_network, + addr->smctp_addr.s_addr, &dst); + if (rc) + return rc; } + hlen = LL_RESERVED_SPACE(dst.dev->dev) + sizeof(struct mctp_hdr); + skb = sock_alloc_send_skb(sk, hlen + 1 + len, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) - return rc; + goto err_release_dst; skb_reserve(skb, hlen); @@ -175,30 +171,16 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) cb = __mctp_cb(skb); cb->net = addr->smctp_network; - if (!rt) { - /* fill extended address in cb */ - DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, - extaddr, msg->msg_name); - - if (!mctp_sockaddr_ext_is_ok(extaddr) || - extaddr->smctp_halen > sizeof(cb->haddr)) { - rc = -EINVAL; - goto err_free; - } - - cb->ifindex = extaddr->smctp_ifindex; - /* smctp_halen is checked above */ - cb->halen = extaddr->smctp_halen; - memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen); - } - - rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, + rc = mctp_local_output(sk, &dst, skb, addr->smctp_addr.s_addr, addr->smctp_tag); + mctp_dst_release(&dst); return rc ? : len; err_free: kfree_skb(skb); +err_release_dst: + mctp_dst_release(&dst); return rc; } @@ -793,3 +775,7 @@ MODULE_DESCRIPTION("MCTP core"); MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>"); MODULE_ALIAS_NETPROTO(PF_MCTP); + +#if IS_ENABLED(CONFIG_MCTP_TEST) +#include "test/sock-test.c" +#endif diff --git a/net/mctp/route.c b/net/mctp/route.c index d9c8e5a5f9ce..a20d6b11d418 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -17,6 +17,8 @@ #include <linux/rtnetlink.h> #include <linux/skbuff.h> +#include <kunit/static_stub.h> + #include <uapi/linux/if_arp.h> #include <net/mctp.h> @@ -32,7 +34,7 @@ static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ; static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev); /* route output callbacks */ -static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) +static int mctp_dst_discard(struct mctp_dst *dst, struct sk_buff *skb) { kfree_skb(skb); return 0; @@ -368,7 +370,7 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) return 0; } -static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) +static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) { struct mctp_sk_key *key, *any_key = NULL; struct net *net = dev_net(skb->dev); @@ -392,6 +394,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) */ skb_orphan(skb); + if (skb->pkt_type == PACKET_OUTGOING) + skb->pkt_type = PACKET_LOOPBACK; + /* ensure we have enough data for a header and a type */ if (skb->len < sizeof(struct mctp_hdr) + 1) goto out; @@ -556,39 +561,31 @@ out: return rc; } -static unsigned int mctp_route_mtu(struct mctp_route *rt) +static int mctp_dst_output(struct mctp_dst *dst, struct sk_buff *skb) { - return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); -} - -static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) -{ - struct mctp_skb_cb *cb = mctp_cb(skb); - struct mctp_hdr *hdr = mctp_hdr(skb); char daddr_buf[MAX_ADDR_LEN]; char *daddr = NULL; - unsigned int mtu; int rc; skb->protocol = htons(ETH_P_MCTP); + skb->pkt_type = PACKET_OUTGOING; - mtu = READ_ONCE(skb->dev->mtu); - if (skb->len > mtu) { + if (skb->len > dst->mtu) { kfree_skb(skb); return -EMSGSIZE; } - if (cb->ifindex) { - /* direct route; use the hwaddr we stashed in sendmsg */ - if (cb->halen != skb->dev->addr_len) { + /* direct route; use the hwaddr we stashed in sendmsg */ + if (dst->halen) { + if (dst->halen != skb->dev->addr_len) { /* sanity check, sendmsg should have already caught this */ kfree_skb(skb); return -EMSGSIZE; } - daddr = cb->haddr; + daddr = dst->haddr; } else { /* If lookup fails let the device handle daddr==NULL */ - if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) + if (mctp_neigh_lookup(dst->dev, dst->nexthop, daddr_buf) == 0) daddr = daddr_buf; } @@ -599,7 +596,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) return -EHOSTUNREACH; } - mctp_flow_prepare_output(skb, route->dev); + mctp_flow_prepare_output(skb, dst->dev); rc = dev_queue_xmit(skb); if (rc) @@ -612,7 +609,8 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) static void mctp_route_release(struct mctp_route *rt) { if (refcount_dec_and_test(&rt->refs)) { - mctp_dev_put(rt->dev); + if (rt->dst_type == MCTP_ROUTE_DIRECT) + mctp_dev_put(rt->dev); kfree_rcu(rt, rcu); } } @@ -628,7 +626,7 @@ static struct mctp_route *mctp_route_alloc(void) INIT_LIST_HEAD(&rt->list); refcount_set(&rt->refs, 1); - rt->output = mctp_route_discard; + rt->output = mctp_dst_discard; return rt; } @@ -801,10 +799,16 @@ static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk, } /* routing lookups */ +static unsigned int mctp_route_netid(struct mctp_route *rt) +{ + return rt->dst_type == MCTP_ROUTE_DIRECT ? + READ_ONCE(rt->dev->net) : rt->gateway.net; +} + static bool mctp_rt_match_eid(struct mctp_route *rt, unsigned int net, mctp_eid_t eid) { - return READ_ONCE(rt->dev->net) == net && + return mctp_route_netid(rt) == net && rt->min <= eid && rt->max >= eid; } @@ -813,54 +817,150 @@ static bool mctp_rt_compare_exact(struct mctp_route *rt1, struct mctp_route *rt2) { ASSERT_RTNL(); - return rt1->dev->net == rt2->dev->net && + return mctp_route_netid(rt1) == mctp_route_netid(rt2) && rt1->min == rt2->min && rt1->max == rt2->max; } -struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, - mctp_eid_t daddr) +/* must only be called on a direct route, as the final output hop */ +static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, + unsigned int mtu, struct mctp_route *route) +{ + mctp_dev_hold(route->dev); + dst->nexthop = eid; + dst->dev = route->dev; + dst->mtu = READ_ONCE(dst->dev->dev->mtu); + if (mtu) + dst->mtu = min(dst->mtu, mtu); + dst->halen = 0; + dst->output = route->output; +} + +int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, + unsigned char halen, const unsigned char *haddr) { - struct mctp_route *tmp, *rt = NULL; + struct net_device *netdev; + struct mctp_dev *dev; + int rc = -ENOENT; + + if (halen > sizeof(dst->haddr)) + return -EINVAL; rcu_read_lock(); - list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { - /* TODO: add metrics */ - if (mctp_rt_match_eid(tmp, dnet, daddr)) { - if (refcount_inc_not_zero(&tmp->refs)) { - rt = tmp; - break; - } - } + netdev = dev_get_by_index_rcu(net, ifindex); + if (!netdev) + goto out_unlock; + + if (netdev->addr_len != halen) { + rc = -EINVAL; + goto out_unlock; } + dev = __mctp_dev_get(netdev); + if (!dev) + goto out_unlock; + + dst->dev = dev; + dst->mtu = READ_ONCE(netdev->mtu); + dst->halen = halen; + dst->output = mctp_dst_output; + dst->nexthop = 0; + memcpy(dst->haddr, haddr, halen); + + rc = 0; + +out_unlock: rcu_read_unlock(); + return rc; +} - return rt; +void mctp_dst_release(struct mctp_dst *dst) +{ + mctp_dev_put(dst->dev); +} + +static struct mctp_route *mctp_route_lookup_single(struct net *net, + unsigned int dnet, + mctp_eid_t daddr) +{ + struct mctp_route *rt; + + list_for_each_entry_rcu(rt, &net->mctp.routes, list) { + if (mctp_rt_match_eid(rt, dnet, daddr)) + return rt; + } + + return NULL; } -static struct mctp_route *mctp_route_lookup_null(struct net *net, - struct net_device *dev) +/* populates *dst on successful lookup, if set */ +int mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr, struct mctp_dst *dst) { - struct mctp_route *tmp, *rt = NULL; + const unsigned int max_depth = 32; + unsigned int depth, mtu = 0; + int rc = -EHOSTUNREACH; rcu_read_lock(); - list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { - if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL && - refcount_inc_not_zero(&tmp->refs)) { - rt = tmp; + for (depth = 0; depth < max_depth; depth++) { + struct mctp_route *rt; + + rt = mctp_route_lookup_single(net, dnet, daddr); + if (!rt) break; + + /* clamp mtu to the smallest in the path, allowing 0 + * to specify no restrictions + */ + if (mtu && rt->mtu) + mtu = min(mtu, rt->mtu); + else + mtu = mtu ?: rt->mtu; + + if (rt->dst_type == MCTP_ROUTE_DIRECT) { + if (dst) + mctp_dst_from_route(dst, daddr, mtu, rt); + rc = 0; + break; + + } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) { + daddr = rt->gateway.eid; } } rcu_read_unlock(); - return rt; + return rc; } -static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, +static int mctp_route_lookup_null(struct net *net, struct net_device *dev, + struct mctp_dst *dst) +{ + int rc = -EHOSTUNREACH; + struct mctp_route *rt; + + rcu_read_lock(); + + list_for_each_entry_rcu(rt, &net->mctp.routes, list) { + if (rt->dst_type != MCTP_ROUTE_DIRECT || rt->type != RTN_LOCAL) + continue; + + if (rt->dev->dev != dev) + continue; + + mctp_dst_from_route(dst, 0, 0, rt); + rc = 0; + break; + } + + rcu_read_unlock(); + + return rc; +} + +static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb, unsigned int mtu, u8 tag) { const unsigned int hlen = sizeof(struct mctp_hdr); @@ -933,7 +1033,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, skb_ext_copy(skb2, skb); /* do route */ - rc = rt->output(rt, skb2); + rc = dst->output(dst, skb2); if (rc) break; @@ -945,68 +1045,34 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, return rc; } -int mctp_local_output(struct sock *sk, struct mctp_route *rt, +int mctp_local_output(struct sock *sk, struct mctp_dst *dst, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); - struct mctp_skb_cb *cb = mctp_cb(skb); - struct mctp_route tmp_rt = {0}; struct mctp_sk_key *key; struct mctp_hdr *hdr; unsigned long flags; unsigned int netid; unsigned int mtu; mctp_eid_t saddr; - bool ext_rt; int rc; u8 tag; - rc = -ENODEV; - - if (rt) { - ext_rt = false; - if (WARN_ON(!rt->dev)) - goto out_release; - - } else if (cb->ifindex) { - struct net_device *dev; - - ext_rt = true; - rt = &tmp_rt; - - rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); - if (!dev) { - rcu_read_unlock(); - goto out_free; - } - rt->dev = __mctp_dev_get(dev); - rcu_read_unlock(); - - if (!rt->dev) - goto out_release; - - /* establish temporary route - we set up enough to keep - * mctp_route_output happy - */ - rt->output = mctp_route_output; - rt->mtu = 0; + KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr, + req_tag); - } else { - rc = -EINVAL; - goto out_free; - } + rc = -ENODEV; - spin_lock_irqsave(&rt->dev->addrs_lock, flags); - if (rt->dev->num_addrs == 0) { + spin_lock_irqsave(&dst->dev->addrs_lock, flags); + if (dst->dev->num_addrs == 0) { rc = -EHOSTUNREACH; } else { /* use the outbound interface's first address as our source */ - saddr = rt->dev->addrs[0]; + saddr = dst->dev->addrs[0]; rc = 0; } - spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); - netid = READ_ONCE(rt->dev->net); + spin_unlock_irqrestore(&dst->dev->addrs_lock, flags); + netid = READ_ONCE(dst->dev->net); if (rc) goto out_release; @@ -1032,15 +1098,13 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, tag = req_tag & MCTP_TAG_MASK; } + skb->pkt_type = PACKET_OUTGOING; skb->protocol = htons(ETH_P_MCTP); skb->priority = 0; skb_reset_transport_header(skb); skb_push(skb, sizeof(struct mctp_hdr)); skb_reset_network_header(skb); - skb->dev = rt->dev->dev; - - /* cb->net will have been set on initial ingress */ - cb->src = saddr; + skb->dev = dst->dev->dev; /* set up common header fields */ hdr = mctp_hdr(skb); @@ -1048,73 +1112,64 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, hdr->dest = daddr; hdr->src = saddr; - mtu = mctp_route_mtu(rt); + mtu = dst->mtu; if (skb->len + sizeof(struct mctp_hdr) <= mtu) { hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | tag; - rc = rt->output(rt, skb); + rc = dst->output(dst, skb); } else { - rc = mctp_do_fragment_route(rt, skb, mtu, tag); + rc = mctp_do_fragment_route(dst, skb, mtu, tag); } /* route output functions consume the skb, even on error */ skb = NULL; out_release: - if (!ext_rt) - mctp_route_release(rt); - - mctp_dev_put(tmp_rt.dev); - -out_free: kfree_skb(skb); return rc; } /* route management */ -static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, - unsigned int daddr_extent, unsigned int mtu, - unsigned char type) + +/* mctp_route_add(): Add the provided route, previously allocated via + * mctp_route_alloc(). On success, takes ownership of @rt, which includes a + * hold on rt->dev for usage in the route table. On failure a caller will want + * to mctp_route_release(). + * + * We expect that the caller has set rt->type, rt->dst_type, rt->min, rt->max, + * rt->mtu and either rt->dev (with a reference held appropriately) or + * rt->gateway. Other fields will be populated. + */ +static int mctp_route_add(struct net *net, struct mctp_route *rt) { - int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); - struct net *net = dev_net(mdev->dev); - struct mctp_route *rt, *ert; + struct mctp_route *ert; - if (!mctp_address_unicast(daddr_start)) + if (!mctp_address_unicast(rt->min) || !mctp_address_unicast(rt->max)) return -EINVAL; - if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) + if (rt->dst_type == MCTP_ROUTE_DIRECT && !rt->dev) return -EINVAL; - switch (type) { + if (rt->dst_type == MCTP_ROUTE_GATEWAY && !rt->gateway.eid) + return -EINVAL; + + switch (rt->type) { case RTN_LOCAL: - rtfn = mctp_route_input; + rt->output = mctp_dst_input; break; case RTN_UNICAST: - rtfn = mctp_route_output; + rt->output = mctp_dst_output; break; default: return -EINVAL; } - rt = mctp_route_alloc(); - if (!rt) - return -ENOMEM; - - rt->min = daddr_start; - rt->max = daddr_start + daddr_extent; - rt->mtu = mtu; - rt->dev = mdev; - mctp_dev_hold(rt->dev); - rt->type = type; - rt->output = rtfn; - ASSERT_RTNL(); + /* Prevent duplicate identical routes. */ list_for_each_entry(ert, &net->mctp.routes, list) { if (mctp_rt_compare_exact(rt, ert)) { - mctp_route_release(rt); return -EEXIST; } } @@ -1124,10 +1179,10 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, return 0; } -static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, - unsigned int daddr_extent, unsigned char type) +static int mctp_route_remove(struct net *net, unsigned int netid, + mctp_eid_t daddr_start, unsigned int daddr_extent, + unsigned char type) { - struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *tmp; mctp_eid_t daddr_end; bool dropped; @@ -1141,7 +1196,7 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, ASSERT_RTNL(); list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { - if (rt->dev == mdev && + if (mctp_route_netid(rt) == netid && rt->min == daddr_start && rt->max == daddr_end && rt->type == type) { list_del_rcu(&rt->list); @@ -1156,12 +1211,32 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) { - return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL); + struct mctp_route *rt; + int rc; + + rt = mctp_route_alloc(); + if (!rt) + return -ENOMEM; + + rt->min = addr; + rt->max = addr; + rt->dst_type = MCTP_ROUTE_DIRECT; + rt->dev = mdev; + rt->type = RTN_LOCAL; + + mctp_dev_hold(rt->dev); + + rc = mctp_route_add(dev_net(mdev->dev), rt); + if (rc) + mctp_route_release(rt); + + return rc; } int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) { - return mctp_route_remove(mdev, addr, 0, RTN_LOCAL); + return mctp_route_remove(dev_net(mdev->dev), mdev->net, + addr, 0, RTN_LOCAL); } /* removes all entries for a given device */ @@ -1172,7 +1247,7 @@ void mctp_route_remove_dev(struct mctp_dev *mdev) ASSERT_RTNL(); list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { - if (rt->dev == mdev) { + if (rt->dst_type == MCTP_ROUTE_DIRECT && rt->dev == mdev) { list_del_rcu(&rt->list); /* TODO: immediate RTM_DELROUTE */ mctp_route_release(rt); @@ -1189,8 +1264,9 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, struct net *net = dev_net(dev); struct mctp_dev *mdev; struct mctp_skb_cb *cb; - struct mctp_route *rt; + struct mctp_dst dst; struct mctp_hdr *mh; + int rc; rcu_read_lock(); mdev = __mctp_dev_get(dev); @@ -1232,17 +1308,17 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, cb->net = READ_ONCE(mdev->net); cb->ifindex = dev->ifindex; - rt = mctp_route_lookup(net, cb->net, mh->dest); + rc = mctp_route_lookup(net, cb->net, mh->dest, &dst); /* NULL EID, but addressed to our physical address */ - if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) - rt = mctp_route_lookup_null(net, dev); + if (rc && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) + rc = mctp_route_lookup_null(net, dev, &dst); - if (!rt) + if (rc) goto err_drop; - rt->output(rt, skb); - mctp_route_release(rt); + dst.output(&dst, skb); + mctp_dst_release(&dst); mctp_dev_put(mdev); return NET_RX_SUCCESS; @@ -1264,19 +1340,28 @@ static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { [RTA_DST] = { .type = NLA_U8 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = NLA_POLICY_EXACT_LEN(sizeof(struct mctp_fq_addr)), +}; + +static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { + [RTAX_MTU] = { .type = NLA_U32 }, }; -/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. - * tb must hold RTA_MAX+1 elements. +/* base parsing; common to both _lookup and _populate variants. + * + * For gateway routes (which have a RTA_GATEWAY, and no RTA_OIF), we populate + * *gatweayp. for direct routes (RTA_OIF, no RTA_GATEWAY), we populate *mdev. */ -static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack, - struct nlattr **tb, struct rtmsg **rtm, - struct mctp_dev **mdev, mctp_eid_t *daddr_start) +static int mctp_route_nlparse_common(struct net *net, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr **tb, struct rtmsg **rtm, + struct mctp_dev **mdev, + struct mctp_fq_addr *gatewayp, + mctp_eid_t *daddr_start) { - struct net *net = sock_net(skb->sk); + struct mctp_fq_addr *gateway = NULL; + unsigned int ifindex = 0; struct net_device *dev; - unsigned int ifindex; int rc; rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, @@ -1292,11 +1377,44 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, } *daddr_start = nla_get_u8(tb[RTA_DST]); - if (!tb[RTA_OIF]) { - NL_SET_ERR_MSG(extack, "ifindex missing"); + if (tb[RTA_OIF]) + ifindex = nla_get_u32(tb[RTA_OIF]); + + if (tb[RTA_GATEWAY]) + gateway = nla_data(tb[RTA_GATEWAY]); + + if (ifindex && gateway) { + NL_SET_ERR_MSG(extack, + "cannot specify both ifindex and gateway"); + return -EINVAL; + + } else if (ifindex) { + dev = __dev_get_by_index(net, ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "bad ifindex"); + return -ENODEV; + } + *mdev = mctp_dev_get_rtnl(dev); + if (!*mdev) + return -ENODEV; + gatewayp->eid = 0; + + } else if (gateway) { + if (!mctp_address_unicast(gateway->eid)) { + NL_SET_ERR_MSG(extack, "bad gateway"); + return -EINVAL; + } + + gatewayp->eid = gateway->eid; + gatewayp->net = gateway->net != MCTP_NET_ANY ? + gateway->net : + READ_ONCE(net->mctp.default_net); + *mdev = NULL; + + } else { + NL_SET_ERR_MSG(extack, "no route output provided"); return -EINVAL; } - ifindex = nla_get_u32(tb[RTA_OIF]); *rtm = nlmsg_data(nlh); if ((*rtm)->rtm_family != AF_MCTP) { @@ -1304,82 +1422,157 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - dev = __dev_get_by_index(net, ifindex); - if (!dev) { - NL_SET_ERR_MSG(extack, "bad ifindex"); - return -ENODEV; - } - *mdev = mctp_dev_get_rtnl(dev); - if (!*mdev) - return -ENODEV; - - if (dev->flags & IFF_LOOPBACK) { - NL_SET_ERR_MSG(extack, "no routes to loopback"); + if ((*rtm)->rtm_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); return -EINVAL; } return 0; } -static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { - [RTAX_MTU] = { .type = NLA_U32 }, -}; - -static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +/* Route parsing for lookup operations; we only need the "route target" + * components (ie., network and dest-EID range). + */ +static int mctp_route_nlparse_lookup(struct net *net, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + unsigned char *type, unsigned int *netid, + mctp_eid_t *daddr_start, + unsigned int *daddr_extent) { struct nlattr *tb[RTA_MAX + 1]; + struct mctp_fq_addr gw; + struct mctp_dev *mdev; + struct rtmsg *rtm; + int rc; + + rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm, + &mdev, &gw, daddr_start); + if (rc) + return rc; + + if (mdev) { + *netid = mdev->net; + } else if (gw.eid) { + *netid = gw.net; + } else { + /* bug: _nlparse_common should not allow this */ + return -1; + } + + *type = rtm->rtm_type; + *daddr_extent = rtm->rtm_dst_len; + + return 0; +} + +/* Full route parse for RTM_NEWROUTE: populate @rt. On success, + * MCTP_ROUTE_DIRECT routes (ie, those with a direct dev) will hold a reference + * to that dev. + */ +static int mctp_route_nlparse_populate(struct net *net, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct mctp_route *rt) +{ struct nlattr *tbx[RTAX_MAX + 1]; + struct nlattr *tb[RTA_MAX + 1]; + unsigned int daddr_extent; + struct mctp_fq_addr gw; mctp_eid_t daddr_start; - struct mctp_dev *mdev; + struct mctp_dev *dev; struct rtmsg *rtm; - unsigned int mtu; + u32 mtu = 0; int rc; - rc = mctp_route_nlparse(skb, nlh, extack, tb, - &rtm, &mdev, &daddr_start); - if (rc < 0) + rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm, + &dev, &gw, &daddr_start); + if (rc) return rc; - if (rtm->rtm_type != RTN_UNICAST) { - NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); + daddr_extent = rtm->rtm_dst_len; + + if (daddr_extent > 0xff || daddr_extent + daddr_start >= 255) { + NL_SET_ERR_MSG(extack, "invalid eid range"); return -EINVAL; } - mtu = 0; if (tb[RTA_METRICS]) { rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS], rta_metrics_policy, NULL); - if (rc < 0) + if (rc < 0) { + NL_SET_ERR_MSG(extack, "incorrect RTA_METRICS format"); return rc; + } if (tbx[RTAX_MTU]) mtu = nla_get_u32(tbx[RTAX_MTU]); } - rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, - rtm->rtm_type); + rt->type = rtm->rtm_type; + rt->min = daddr_start; + rt->max = daddr_start + daddr_extent; + rt->mtu = mtu; + if (gw.eid) { + rt->dst_type = MCTP_ROUTE_GATEWAY; + rt->gateway.eid = gw.eid; + rt->gateway.net = gw.net; + } else { + rt->dst_type = MCTP_ROUTE_DIRECT; + rt->dev = dev; + mctp_dev_hold(rt->dev); + } + + return 0; +} + +static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct mctp_route *rt; + int rc; + + rt = mctp_route_alloc(); + if (!rt) + return -ENOMEM; + + rc = mctp_route_nlparse_populate(net, nlh, extack, rt); + if (rc < 0) + goto err_free; + + if (rt->dst_type == MCTP_ROUTE_DIRECT && + rt->dev->dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, "no routes to loopback"); + rc = -EINVAL; + goto err_free; + } + + rc = mctp_route_add(net, rt); + if (!rc) + return 0; + +err_free: + mctp_route_release(rt); return rc; } static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct nlattr *tb[RTA_MAX + 1]; + struct net *net = sock_net(skb->sk); + unsigned int netid, daddr_extent; + unsigned char type = RTN_UNSPEC; mctp_eid_t daddr_start; - struct mctp_dev *mdev; - struct rtmsg *rtm; int rc; - rc = mctp_route_nlparse(skb, nlh, extack, tb, - &rtm, &mdev, &daddr_start); + rc = mctp_route_nlparse_lookup(net, nlh, extack, &type, &netid, + &daddr_start, &daddr_extent); if (rc < 0) return rc; /* we only have unicast routes */ - if (rtm->rtm_type != RTN_UNICAST) + if (type != RTN_UNICAST) return -EINVAL; - rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST); + rc = mctp_route_remove(net, netid, daddr_start, daddr_extent, type); return rc; } @@ -1405,7 +1598,6 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, hdr->rtm_tos = 0; hdr->rtm_table = RT_TABLE_DEFAULT; hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ - hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ hdr->rtm_type = rt->type; if (nla_put_u8(skb, RTA_DST, rt->min)) @@ -1422,13 +1614,17 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, nla_nest_end(skb, metrics); - if (rt->dev) { + if (rt->dst_type == MCTP_ROUTE_DIRECT) { + hdr->rtm_scope = RT_SCOPE_LINK; if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) goto cancel; + } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) { + hdr->rtm_scope = RT_SCOPE_UNIVERSE; + if (nla_put(skb, RTA_GATEWAY, + sizeof(rt->gateway), &rt->gateway)) + goto cancel; } - /* TODO: conditional neighbour physaddr? */ - nlmsg_end(skb, nlh); return 0; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 06c1897b685a..7a398f41b621 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -2,132 +2,11 @@ #include <kunit/test.h> -#include "utils.h" - -struct mctp_test_route { - struct mctp_route rt; - struct sk_buff_head pkts; -}; - -static int mctp_test_route_output(struct mctp_route *rt, struct sk_buff *skb) -{ - struct mctp_test_route *test_rt = container_of(rt, struct mctp_test_route, rt); - - skb_queue_tail(&test_rt->pkts, skb); - - return 0; -} - -/* local version of mctp_route_alloc() */ -static struct mctp_test_route *mctp_route_test_alloc(void) -{ - struct mctp_test_route *rt; - - rt = kzalloc(sizeof(*rt), GFP_KERNEL); - if (!rt) - return NULL; - - INIT_LIST_HEAD(&rt->rt.list); - refcount_set(&rt->rt.refs, 1); - rt->rt.output = mctp_test_route_output; - - skb_queue_head_init(&rt->pkts); - - return rt; -} - -static struct mctp_test_route *mctp_test_create_route(struct net *net, - struct mctp_dev *dev, - mctp_eid_t eid, - unsigned int mtu) -{ - struct mctp_test_route *rt; - - rt = mctp_route_test_alloc(); - if (!rt) - return NULL; - - rt->rt.min = eid; - rt->rt.max = eid; - rt->rt.mtu = mtu; - rt->rt.type = RTN_UNSPEC; - if (dev) - mctp_dev_hold(dev); - rt->rt.dev = dev; - - list_add_rcu(&rt->rt.list, &net->mctp.routes); - - return rt; -} - -static void mctp_test_route_destroy(struct kunit *test, - struct mctp_test_route *rt) -{ - unsigned int refs; - - rtnl_lock(); - list_del_rcu(&rt->rt.list); - rtnl_unlock(); - - skb_queue_purge(&rt->pkts); - if (rt->rt.dev) - mctp_dev_put(rt->rt.dev); - - refs = refcount_read(&rt->rt.refs); - KUNIT_ASSERT_EQ_MSG(test, refs, 1, "route ref imbalance"); - - kfree_rcu(&rt->rt, rcu); -} - -static void mctp_test_skb_set_dev(struct sk_buff *skb, - struct mctp_test_dev *dev) -{ - struct mctp_skb_cb *cb; - - cb = mctp_cb(skb); - cb->net = READ_ONCE(dev->mdev->net); - skb->dev = dev->ndev; -} - -static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, - unsigned int data_len) -{ - size_t hdr_len = sizeof(*hdr); - struct sk_buff *skb; - unsigned int i; - u8 *buf; - - skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); - if (!skb) - return NULL; - - __mctp_cb(skb); - memcpy(skb_put(skb, hdr_len), hdr, hdr_len); - - buf = skb_put(skb, data_len); - for (i = 0; i < data_len; i++) - buf[i] = i & 0xff; - - return skb; -} - -static struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr, - const void *data, - size_t data_len) -{ - size_t hdr_len = sizeof(*hdr); - struct sk_buff *skb; - - skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); - if (!skb) - return NULL; - - __mctp_cb(skb); - memcpy(skb_put(skb, hdr_len), hdr, hdr_len); - memcpy(skb_put(skb, data_len), data, data_len); +/* keep clangd happy when compiled outside of the route.c include */ +#include <net/mctp.h> +#include <net/mctpdevice.h> - return skb; -} +#include "utils.h" #define mctp_test_create_skb_data(h, d) \ __mctp_test_create_skb_data(h, d, sizeof(*d)) @@ -141,8 +20,10 @@ struct mctp_frag_test { static void mctp_test_fragment(struct kunit *test) { const struct mctp_frag_test *params; + struct mctp_test_pktqueue tpq; int rc, i, n, mtu, msgsize; - struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct mctp_dst dst; struct sk_buff *skb; struct mctp_hdr hdr; u8 seq; @@ -159,13 +40,15 @@ static void mctp_test_fragment(struct kunit *test) skb = mctp_test_create_skb(&hdr, msgsize); KUNIT_ASSERT_TRUE(test, skb); - rt = mctp_test_create_route(&init_net, NULL, 10, mtu); - KUNIT_ASSERT_TRUE(test, rt); + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + mctp_test_dst_setup(test, &dst, dev, &tpq, mtu); - rc = mctp_do_fragment_route(&rt->rt, skb, mtu, MCTP_TAG_OWNER); + rc = mctp_do_fragment_route(&dst, skb, mtu, MCTP_TAG_OWNER); KUNIT_EXPECT_FALSE(test, rc); - n = rt->pkts.qlen; + n = tpq.pkts.qlen; KUNIT_EXPECT_EQ(test, n, params->n_frags); @@ -178,7 +61,7 @@ static void mctp_test_fragment(struct kunit *test) first = i == 0; last = i == (n - 1); - skb2 = skb_dequeue(&rt->pkts); + skb2 = skb_dequeue(&tpq.pkts); if (!skb2) break; @@ -216,7 +99,8 @@ static void mctp_test_fragment(struct kunit *test) kfree_skb(skb2); } - mctp_test_route_destroy(test, rt); + mctp_test_dst_release(&dst, &tpq); + mctp_test_destroy_dev(dev); } static const struct mctp_frag_test mctp_frag_tests[] = { @@ -246,25 +130,30 @@ struct mctp_rx_input_test { static void mctp_test_rx_input(struct kunit *test) { const struct mctp_rx_input_test *params; + struct mctp_test_pktqueue tpq; struct mctp_test_route *rt; struct mctp_test_dev *dev; struct sk_buff *skb; params = test->param_value; + test->priv = &tpq; dev = mctp_test_create_dev(); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); - rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); + rt = mctp_test_create_route_direct(&init_net, dev->mdev, 8, 68); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); skb = mctp_test_create_skb(¶ms->hdr, 1); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + mctp_test_pktqueue_init(&tpq); + mctp_pkttype_receive(skb, dev->ndev, &mctp_packet_type, NULL); - KUNIT_EXPECT_EQ(test, !!rt->pkts.qlen, params->input); + KUNIT_EXPECT_EQ(test, !!tpq.pkts.qlen, params->input); + skb_queue_purge(&tpq.pkts); mctp_test_route_destroy(test, rt); mctp_test_destroy_dev(dev); } @@ -292,12 +181,12 @@ KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests, /* set up a local dev, route on EID 8, and a socket listening on type 0 */ static void __mctp_route_test_init(struct kunit *test, struct mctp_test_dev **devp, - struct mctp_test_route **rtp, + struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq, struct socket **sockp, unsigned int netid) { struct sockaddr_mctp addr = {0}; - struct mctp_test_route *rt; struct mctp_test_dev *dev; struct socket *sock; int rc; @@ -307,8 +196,7 @@ static void __mctp_route_test_init(struct kunit *test, if (netid != MCTP_NET_ANY) WRITE_ONCE(dev->mdev->net, netid); - rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + mctp_test_dst_setup(test, dst, dev, tpq, 68); rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); KUNIT_ASSERT_EQ(test, rc, 0); @@ -320,18 +208,18 @@ static void __mctp_route_test_init(struct kunit *test, rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr)); KUNIT_ASSERT_EQ(test, rc, 0); - *rtp = rt; *devp = dev; *sockp = sock; } static void __mctp_route_test_fini(struct kunit *test, struct mctp_test_dev *dev, - struct mctp_test_route *rt, + struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq, struct socket *sock) { sock_release(sock); - mctp_test_route_destroy(test, rt); + mctp_test_dst_release(dst, tpq); mctp_test_destroy_dev(dev); } @@ -344,22 +232,24 @@ struct mctp_route_input_sk_test { static void mctp_test_route_input_sk(struct kunit *test) { const struct mctp_route_input_sk_test *params; + struct mctp_test_pktqueue tpq; struct sk_buff *skb, *skb2; - struct mctp_test_route *rt; struct mctp_test_dev *dev; + struct mctp_dst dst; struct socket *sock; int rc; params = test->param_value; - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); skb = mctp_test_create_skb_data(¶ms->hdr, ¶ms->type); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); mctp_test_skb_set_dev(skb, dev); + mctp_test_pktqueue_init(&tpq); - rc = mctp_route_input(&rt->rt, skb); + rc = mctp_dst_input(&dst, skb); if (params->deliver) { KUNIT_EXPECT_EQ(test, rc, 0); @@ -376,7 +266,7 @@ static void mctp_test_route_input_sk(struct kunit *test) KUNIT_EXPECT_NULL(test, skb2); } - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } #define FL_S (MCTP_HDR_FLAG_SOM) @@ -413,16 +303,17 @@ struct mctp_route_input_sk_reasm_test { static void mctp_test_route_input_sk_reasm(struct kunit *test) { const struct mctp_route_input_sk_reasm_test *params; + struct mctp_test_pktqueue tpq; struct sk_buff *skb, *skb2; - struct mctp_test_route *rt; struct mctp_test_dev *dev; + struct mctp_dst dst; struct socket *sock; int i, rc; u8 c; params = test->param_value; - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); for (i = 0; i < params->n_hdrs; i++) { c = i; @@ -431,7 +322,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) mctp_test_skb_set_dev(skb, dev); - rc = mctp_route_input(&rt->rt, skb); + rc = mctp_dst_input(&dst, skb); } skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); @@ -445,7 +336,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) KUNIT_EXPECT_NULL(test, skb2); } - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } #define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_TO | (f) | ((s) << MCTP_HDR_SEQ_SHIFT)) @@ -547,7 +438,7 @@ struct mctp_route_input_sk_keys_test { static void mctp_test_route_input_sk_keys(struct kunit *test) { const struct mctp_route_input_sk_keys_test *params; - struct mctp_test_route *rt; + struct mctp_test_pktqueue tpq; struct sk_buff *skb, *skb2; struct mctp_test_dev *dev; struct mctp_sk_key *key; @@ -555,6 +446,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) struct mctp_sock *msk; struct socket *sock; unsigned long flags; + struct mctp_dst dst; unsigned int net; int rc; u8 c; @@ -565,8 +457,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); net = READ_ONCE(dev->mdev->net); - rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + mctp_test_dst_setup(test, &dst, dev, &tpq, 68); rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); KUNIT_ASSERT_EQ(test, rc, 0); @@ -592,7 +483,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) mctp_test_skb_set_dev(skb, dev); - rc = mctp_route_input(&rt->rt, skb); + rc = mctp_dst_input(&dst, skb); /* (potentially) receive message */ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); @@ -606,7 +497,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) skb_free_datagram(sock->sk, skb2); mctp_key_unref(key); - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } static const struct mctp_route_input_sk_keys_test mctp_route_input_sk_keys_tests[] = { @@ -681,7 +572,8 @@ KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests, struct test_net { unsigned int netid; struct mctp_test_dev *dev; - struct mctp_test_route *rt; + struct mctp_test_pktqueue tpq; + struct mctp_dst dst; struct socket *sock; struct sk_buff *skb; struct mctp_sk_key *key; @@ -699,18 +591,20 @@ mctp_test_route_input_multiple_nets_bind_init(struct kunit *test, t->msg.data = t->netid; - __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid); + __mctp_route_test_init(test, &t->dev, &t->dst, &t->tpq, &t->sock, + t->netid); t->skb = mctp_test_create_skb_data(&hdr, &t->msg); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb); mctp_test_skb_set_dev(t->skb, t->dev); + mctp_test_pktqueue_init(&t->tpq); } static void mctp_test_route_input_multiple_nets_bind_fini(struct kunit *test, struct test_net *t) { - __mctp_route_test_fini(test, t->dev, t->rt, t->sock); + __mctp_route_test_fini(test, t->dev, &t->dst, &t->tpq, t->sock); } /* Test that skbs from different nets (otherwise identical) get routed to their @@ -731,9 +625,9 @@ static void mctp_test_route_input_multiple_nets_bind(struct kunit *test) mctp_test_route_input_multiple_nets_bind_init(test, &t1); mctp_test_route_input_multiple_nets_bind_init(test, &t2); - rc = mctp_route_input(&t1.rt->rt, t1.skb); + rc = mctp_dst_input(&t1.dst, t1.skb); KUNIT_ASSERT_EQ(test, rc, 0); - rc = mctp_route_input(&t2.rt->rt, t2.skb); + rc = mctp_dst_input(&t2.dst, t2.skb); KUNIT_ASSERT_EQ(test, rc, 0); rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc); @@ -767,7 +661,8 @@ mctp_test_route_input_multiple_nets_key_init(struct kunit *test, t->msg.data = t->netid; - __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid); + __mctp_route_test_init(test, &t->dev, &t->dst, &t->tpq, &t->sock, + t->netid); msk = container_of(t->sock->sk, struct mctp_sock, sk); @@ -790,7 +685,7 @@ mctp_test_route_input_multiple_nets_key_fini(struct kunit *test, struct test_net *t) { mctp_key_unref(t->key); - __mctp_route_test_fini(test, t->dev, t->rt, t->sock); + __mctp_route_test_fini(test, t->dev, &t->dst, &t->tpq, t->sock); } /* test that skbs from different nets (otherwise identical) get routed to their @@ -812,9 +707,9 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test) mctp_test_route_input_multiple_nets_key_init(test, &t1); mctp_test_route_input_multiple_nets_key_init(test, &t2); - rc = mctp_route_input(&t1.rt->rt, t1.skb); + rc = mctp_dst_input(&t1.dst, t1.skb); KUNIT_ASSERT_EQ(test, rc, 0); - rc = mctp_route_input(&t2.rt->rt, t2.skb); + rc = mctp_dst_input(&t2.dst, t2.skb); KUNIT_ASSERT_EQ(test, rc, 0); rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc); @@ -843,13 +738,14 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test) static void mctp_test_route_input_sk_fail_single(struct kunit *test) { const struct mctp_hdr hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO); - struct mctp_test_route *rt; + struct mctp_test_pktqueue tpq; struct mctp_test_dev *dev; + struct mctp_dst dst; struct socket *sock; struct sk_buff *skb; int rc; - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); /* No rcvbuf space, so delivery should fail. __sock_set_rcvbuf will * clamp the minimum to SOCK_MIN_RCVBUF, so we open-code this. @@ -865,14 +761,14 @@ static void mctp_test_route_input_sk_fail_single(struct kunit *test) mctp_test_skb_set_dev(skb, dev); /* do route input, which should fail */ - rc = mctp_route_input(&rt->rt, skb); + rc = mctp_dst_input(&dst, skb); KUNIT_EXPECT_NE(test, rc, 0); /* we should hold the only reference to skb */ KUNIT_EXPECT_EQ(test, refcount_read(&skb->users), 1); kfree_skb(skb); - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } /* Input route to socket, using a fragmented message, where sock delivery fails. @@ -880,14 +776,15 @@ static void mctp_test_route_input_sk_fail_single(struct kunit *test) static void mctp_test_route_input_sk_fail_frag(struct kunit *test) { const struct mctp_hdr hdrs[2] = { RX_FRAG(FL_S, 0), RX_FRAG(FL_E, 1) }; - struct mctp_test_route *rt; + struct mctp_test_pktqueue tpq; struct mctp_test_dev *dev; struct sk_buff *skbs[2]; + struct mctp_dst dst; struct socket *sock; unsigned int i; int rc; - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); lock_sock(sock->sk); WRITE_ONCE(sock->sk->sk_rcvbuf, 0); @@ -904,11 +801,11 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) /* first route input should succeed, we're only queueing to the * frag list */ - rc = mctp_route_input(&rt->rt, skbs[0]); + rc = mctp_dst_input(&dst, skbs[0]); KUNIT_EXPECT_EQ(test, rc, 0); /* final route input should fail to deliver to the socket */ - rc = mctp_route_input(&rt->rt, skbs[1]); + rc = mctp_dst_input(&dst, skbs[1]); KUNIT_EXPECT_NE(test, rc, 0); /* we should hold the only reference to both skbs */ @@ -918,7 +815,7 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) KUNIT_EXPECT_EQ(test, refcount_read(&skbs[1]->users), 1); kfree_skb(skbs[1]); - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } /* Input route to socket, using a fragmented message created from clones. @@ -933,23 +830,22 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) RX_FRAG(FL_S, 0), RX_FRAG(FL_E, 1), }; - struct mctp_test_route *rt; + const size_t data_len = 3; /* arbitrary */ + u8 compare[3 * ARRAY_SIZE(hdrs)]; + u8 flat[3 * ARRAY_SIZE(hdrs)]; + struct mctp_test_pktqueue tpq; struct mctp_test_dev *dev; struct sk_buff *skb[5]; struct sk_buff *rx_skb; + struct mctp_dst dst; struct socket *sock; - size_t data_len; - u8 compare[100]; - u8 flat[100]; size_t total; void *p; int rc; - /* Arbitrary length */ - data_len = 3; total = data_len + sizeof(struct mctp_hdr); - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); /* Create a single skb initially with concatenated packets */ skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total); @@ -988,7 +884,7 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) /* Feed the fragments into MCTP core */ for (int i = 0; i < 5; i++) { - rc = mctp_route_input(&rt->rt, skb[i]); + rc = mctp_dst_input(&dst, skb[i]); KUNIT_EXPECT_EQ(test, rc, 0); } @@ -1026,29 +922,29 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) kfree_skb(skb[i]); } - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, struct mctp_test_dev **devp, - struct mctp_test_route **rtp, + struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq, struct socket **sock, struct sk_buff **skbp, unsigned int len) { - struct mctp_test_route *rt; struct mctp_test_dev *dev; struct sk_buff *skb; /* we have a slightly odd routing setup here; the test route * is for EID 8, which is our local EID. We don't do a routing * lookup, so that's fine - all we require is a path through - * mctp_local_output, which will call rt->output on whatever + * mctp_local_output, which will call dst->output on whatever * route we provide */ - __mctp_route_test_init(test, &dev, &rt, sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, dst, tpq, sock, MCTP_NET_ANY); /* Assign a single EID. ->addrs is freed on mctp netdev release */ dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); @@ -1061,42 +957,41 @@ static void mctp_test_flow_init(struct kunit *test, skb_reserve(skb, sizeof(struct mctp_hdr) + 1); memset(skb_put(skb, len), 0, len); - /* take a ref for the route, we'll decrement in local output */ - refcount_inc(&rt->rt.refs); *devp = dev; - *rtp = rt; *skbp = skb; } static void mctp_test_flow_fini(struct kunit *test, struct mctp_test_dev *dev, - struct mctp_test_route *rt, + struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq, struct socket *sock) { - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, dst, tpq, sock); } /* test that an outgoing skb has the correct MCTP extension data set */ static void mctp_test_packet_flow(struct kunit *test) { + struct mctp_test_pktqueue tpq; struct sk_buff *skb, *skb2; - struct mctp_test_route *rt; struct mctp_test_dev *dev; + struct mctp_dst dst; struct mctp_flow *flow; struct socket *sock; - u8 dst = 8; + u8 dst_eid = 8; int n, rc; - mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 30); + mctp_test_flow_init(test, &dev, &dst, &tpq, &sock, &skb, 30); - rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER); KUNIT_ASSERT_EQ(test, rc, 0); - n = rt->pkts.qlen; + n = tpq.pkts.qlen; KUNIT_ASSERT_EQ(test, n, 1); - skb2 = skb_dequeue(&rt->pkts); + skb2 = skb_dequeue(&tpq.pkts); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); flow = skb_ext_find(skb2, SKB_EXT_MCTP); @@ -1105,7 +1000,7 @@ static void mctp_test_packet_flow(struct kunit *test) KUNIT_ASSERT_PTR_EQ(test, flow->key->sk, sock->sk); kfree_skb(skb2); - mctp_test_flow_fini(test, dev, rt, sock); + mctp_test_flow_fini(test, dev, &dst, &tpq, sock); } /* test that outgoing skbs, after fragmentation, all have the correct MCTP @@ -1113,26 +1008,27 @@ static void mctp_test_packet_flow(struct kunit *test) */ static void mctp_test_fragment_flow(struct kunit *test) { + struct mctp_test_pktqueue tpq; struct mctp_flow *flows[2]; struct sk_buff *tx_skbs[2]; - struct mctp_test_route *rt; struct mctp_test_dev *dev; + struct mctp_dst dst; struct sk_buff *skb; struct socket *sock; - u8 dst = 8; + u8 dst_eid = 8; int n, rc; - mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 100); + mctp_test_flow_init(test, &dev, &dst, &tpq, &sock, &skb, 100); - rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER); KUNIT_ASSERT_EQ(test, rc, 0); - n = rt->pkts.qlen; + n = tpq.pkts.qlen; KUNIT_ASSERT_EQ(test, n, 2); /* both resulting packets should have the same flow data */ - tx_skbs[0] = skb_dequeue(&rt->pkts); - tx_skbs[1] = skb_dequeue(&rt->pkts); + tx_skbs[0] = skb_dequeue(&tpq.pkts); + tx_skbs[1] = skb_dequeue(&tpq.pkts); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[0]); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[1]); @@ -1148,7 +1044,7 @@ static void mctp_test_fragment_flow(struct kunit *test) kfree_skb(tx_skbs[0]); kfree_skb(tx_skbs[1]); - mctp_test_flow_fini(test, dev, rt, sock); + mctp_test_flow_fini(test, dev, &dst, &tpq, sock); } #else @@ -1166,15 +1062,16 @@ static void mctp_test_fragment_flow(struct kunit *test) /* Test that outgoing skbs cause a suitable tag to be created */ static void mctp_test_route_output_key_create(struct kunit *test) { + const u8 dst_eid = 26, src_eid = 15; + struct mctp_test_pktqueue tpq; const unsigned int netid = 50; - const u8 dst = 26, src = 15; - struct mctp_test_route *rt; struct mctp_test_dev *dev; struct mctp_sk_key *key; struct netns_mctp *mns; unsigned long flags; struct socket *sock; struct sk_buff *skb; + struct mctp_dst dst; bool empty, single; const int len = 2; int rc; @@ -1183,15 +1080,14 @@ static void mctp_test_route_output_key_create(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); WRITE_ONCE(dev->mdev->net, netid); - rt = mctp_test_create_route(&init_net, dev->mdev, dst, 68); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + mctp_test_dst_setup(test, &dst, dev, &tpq, 68); rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); KUNIT_ASSERT_EQ(test, rc, 0); dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); dev->mdev->num_addrs = 1; - dev->mdev->addrs[0] = src; + dev->mdev->addrs[0] = src_eid; skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); KUNIT_ASSERT_TRUE(test, skb); @@ -1199,8 +1095,6 @@ static void mctp_test_route_output_key_create(struct kunit *test) skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len); memset(skb_put(skb, len), 0, len); - refcount_inc(&rt->rt.refs); - mns = &sock_net(sock->sk)->mctp; /* We assume we're starting from an empty keys list, which requires @@ -1211,7 +1105,7 @@ static void mctp_test_route_output_key_create(struct kunit *test) spin_unlock_irqrestore(&mns->keys_lock, flags); KUNIT_ASSERT_TRUE(test, empty); - rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER); KUNIT_ASSERT_EQ(test, rc, 0); key = NULL; @@ -1227,16 +1121,295 @@ static void mctp_test_route_output_key_create(struct kunit *test) KUNIT_ASSERT_TRUE(test, single); KUNIT_EXPECT_EQ(test, key->net, netid); - KUNIT_EXPECT_EQ(test, key->local_addr, src); - KUNIT_EXPECT_EQ(test, key->peer_addr, dst); + KUNIT_EXPECT_EQ(test, key->local_addr, src_eid); + KUNIT_EXPECT_EQ(test, key->peer_addr, dst_eid); /* key has incoming tag, so inverse of what we sent */ KUNIT_EXPECT_FALSE(test, key->tag & MCTP_TAG_OWNER); sock_release(sock); - mctp_test_route_destroy(test, rt); + mctp_test_dst_release(&dst, &tpq); mctp_test_destroy_dev(dev); } +static void mctp_test_route_extaddr_input(struct kunit *test) +{ + static const unsigned char haddr[] = { 0xaa, 0x55 }; + struct mctp_test_pktqueue tpq; + struct mctp_skb_cb *cb, *cb2; + const unsigned int len = 40; + struct mctp_test_dev *dev; + struct sk_buff *skb, *skb2; + struct mctp_dst dst; + struct mctp_hdr hdr; + struct socket *sock; + int rc; + + hdr.ver = 1; + hdr.src = 10; + hdr.dest = 8; + hdr.flags_seq_tag = FL_S | FL_E | FL_TO; + + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); + + skb = mctp_test_create_skb(&hdr, len); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + /* set our hardware addressing data */ + cb = mctp_cb(skb); + memcpy(cb->haddr, haddr, sizeof(haddr)); + cb->halen = sizeof(haddr); + + mctp_test_skb_set_dev(skb, dev); + + rc = mctp_dst_input(&dst, skb); + KUNIT_ASSERT_EQ(test, rc, 0); + + mctp_test_dst_release(&dst, &tpq); + + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); + KUNIT_ASSERT_EQ(test, skb2->len, len); + + cb2 = mctp_cb(skb2); + + /* Received SKB should have the hardware addressing as set above. + * We're likely to have the same actual cb here (ie., cb == cb2), + * but it's the comparison that we care about + */ + KUNIT_EXPECT_EQ(test, cb2->halen, sizeof(haddr)); + KUNIT_EXPECT_MEMEQ(test, cb2->haddr, haddr, sizeof(haddr)); + + skb_free_datagram(sock->sk, skb2); + mctp_test_destroy_dev(dev); +} + +static void mctp_test_route_gw_lookup(struct kunit *test) +{ + struct mctp_test_route *rt1, *rt2; + struct mctp_dst dst = { 0 }; + struct mctp_test_dev *dev; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + /* 8 (local) -> 10 (gateway) via 9 (direct) */ + rt1 = mctp_test_create_route_direct(&init_net, dev->mdev, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1); + rt2 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 10, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2); + + rc = mctp_route_lookup(&init_net, dev->mdev->net, 10, &dst); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_PTR_EQ(test, dst.dev, dev->mdev); + KUNIT_EXPECT_EQ(test, dst.mtu, dev->ndev->mtu); + KUNIT_EXPECT_EQ(test, dst.nexthop, 9); + KUNIT_EXPECT_EQ(test, dst.halen, 0); + + mctp_dst_release(&dst); + + mctp_test_route_destroy(test, rt2); + mctp_test_route_destroy(test, rt1); + mctp_test_destroy_dev(dev); +} + +static void mctp_test_route_gw_loop(struct kunit *test) +{ + struct mctp_test_route *rt1, *rt2; + struct mctp_dst dst = { 0 }; + struct mctp_test_dev *dev; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + /* two routes using each other as the gw */ + rt1 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 9, 10, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1); + rt2 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 10, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2); + + /* this should fail, rather than infinite-loop */ + rc = mctp_route_lookup(&init_net, dev->mdev->net, 10, &dst); + KUNIT_EXPECT_NE(test, rc, 0); + + mctp_test_route_destroy(test, rt2); + mctp_test_route_destroy(test, rt1); + mctp_test_destroy_dev(dev); +} + +struct mctp_route_gw_mtu_test { + /* working away from the local stack */ + unsigned int dev, neigh, gw, dst; + unsigned int exp; +}; + +static void mctp_route_gw_mtu_to_desc(const struct mctp_route_gw_mtu_test *t, + char *desc) +{ + sprintf(desc, "dev %d, neigh %d, gw %d, dst %d -> %d", + t->dev, t->neigh, t->gw, t->dst, t->exp); +} + +static const struct mctp_route_gw_mtu_test mctp_route_gw_mtu_tests[] = { + /* no route-specific MTUs */ + { 68, 0, 0, 0, 68 }, + { 100, 0, 0, 0, 100 }, + /* one route MTU (smaller than dev mtu), others unrestricted */ + { 100, 68, 0, 0, 68 }, + { 100, 0, 68, 0, 68 }, + { 100, 0, 0, 68, 68 }, + /* smallest applied, regardless of order */ + { 100, 99, 98, 68, 68 }, + { 99, 100, 98, 68, 68 }, + { 98, 99, 100, 68, 68 }, + { 68, 98, 99, 100, 68 }, +}; + +KUNIT_ARRAY_PARAM(mctp_route_gw_mtu, mctp_route_gw_mtu_tests, + mctp_route_gw_mtu_to_desc); + +static void mctp_test_route_gw_mtu(struct kunit *test) +{ + const struct mctp_route_gw_mtu_test *mtus = test->param_value; + struct mctp_test_route *rt1, *rt2, *rt3; + struct mctp_dst dst = { 0 }; + struct mctp_test_dev *dev; + struct mctp_dev *mdev; + unsigned int netid; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + dev->ndev->mtu = mtus->dev; + mdev = dev->mdev; + netid = mdev->net; + + /* 8 (local) -> 11 (dst) via 10 (gw) via 9 (neigh) */ + rt1 = mctp_test_create_route_direct(&init_net, mdev, 9, mtus->neigh); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1); + + rt2 = mctp_test_create_route_gw(&init_net, netid, 10, 9, mtus->gw); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2); + + rt3 = mctp_test_create_route_gw(&init_net, netid, 11, 10, mtus->dst); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt3); + + rc = mctp_route_lookup(&init_net, dev->mdev->net, 11, &dst); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, dst.mtu, mtus->exp); + + mctp_dst_release(&dst); + + mctp_test_route_destroy(test, rt3); + mctp_test_route_destroy(test, rt2); + mctp_test_route_destroy(test, rt1); + mctp_test_destroy_dev(dev); +} + +#define MCTP_TEST_LLADDR_LEN 2 +struct mctp_test_llhdr { + unsigned int magic; + unsigned char src[MCTP_TEST_LLADDR_LEN]; + unsigned char dst[MCTP_TEST_LLADDR_LEN]; +}; + +static const unsigned int mctp_test_llhdr_magic = 0x5c78339c; + +static int test_dev_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct kunit *test = current->kunit_test; + struct mctp_test_llhdr *hdr; + + hdr = skb_push(skb, sizeof(*hdr)); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, hdr); + skb_reset_mac_header(skb); + + hdr->magic = mctp_test_llhdr_magic; + memcpy(&hdr->src, saddr, sizeof(hdr->src)); + memcpy(&hdr->dst, daddr, sizeof(hdr->dst)); + + return 0; +} + +/* Test the dst_output path for a gateway-routed skb: we should have it + * lookup the nexthop EID in the neighbour table, and call into + * header_ops->create to resolve that to a lladdr. Our mock header_ops->create + * will just set a synthetic link-layer header, which we check after transmit. + */ +static void mctp_test_route_gw_output(struct kunit *test) +{ + const unsigned char haddr_self[MCTP_TEST_LLADDR_LEN] = { 0xaa, 0x03 }; + const unsigned char haddr_peer[MCTP_TEST_LLADDR_LEN] = { 0xaa, 0x02 }; + const struct header_ops ops = { + .create = test_dev_header_create, + }; + struct mctp_neigh neigh = { 0 }; + struct mctp_test_llhdr *ll_hdr; + struct mctp_dst dst = { 0 }; + struct mctp_hdr hdr = { 0 }; + struct mctp_test_dev *dev; + struct sk_buff *skb; + unsigned char *buf; + int i, rc; + + dev = mctp_test_create_dev_lladdr(sizeof(haddr_self), haddr_self); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + dev->ndev->header_ops = &ops; + + dst.dev = dev->mdev; + __mctp_dev_get(dst.dev->dev); + dst.mtu = 68; + dst.nexthop = 9; + + /* simple mctp_neigh_add for the gateway (not dest!) endpoint */ + INIT_LIST_HEAD(&neigh.list); + neigh.dev = dev->mdev; + mctp_dev_hold(dev->mdev); + neigh.eid = 9; + neigh.source = MCTP_NEIGH_STATIC; + memcpy(neigh.ha, haddr_peer, sizeof(haddr_peer)); + list_add_rcu(&neigh.list, &init_net.mctp.neighbours); + + hdr.ver = 1; + hdr.src = 8; + hdr.dest = 10; + hdr.flags_seq_tag = FL_S | FL_E | FL_TO; + + /* construct enough for a future link-layer header, the provided + * mctp header, and 4 bytes of data + */ + skb = alloc_skb(sizeof(*ll_hdr) + sizeof(hdr) + 4, GFP_KERNEL); + skb->dev = dev->ndev; + __mctp_cb(skb); + + skb_reserve(skb, sizeof(*ll_hdr)); + + memcpy(skb_put(skb, sizeof(hdr)), &hdr, sizeof(hdr)); + buf = skb_put(skb, 4); + for (i = 0; i < 4; i++) + buf[i] = i; + + /* extra ref over the dev_xmit */ + skb_get(skb); + + rc = mctp_dst_output(&dst, skb); + KUNIT_EXPECT_EQ(test, rc, 0); + + mctp_dst_release(&dst); + list_del_rcu(&neigh.list); + mctp_dev_put(dev->mdev); + + /* check that we have our header created with the correct neighbour */ + ll_hdr = (void *)skb_mac_header(skb); + KUNIT_EXPECT_EQ(test, ll_hdr->magic, mctp_test_llhdr_magic); + KUNIT_EXPECT_MEMEQ(test, ll_hdr->src, haddr_self, sizeof(haddr_self)); + KUNIT_EXPECT_MEMEQ(test, ll_hdr->dst, haddr_peer, sizeof(haddr_peer)); + kfree_skb(skb); +} + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), @@ -1253,11 +1426,16 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_fragment_flow), KUNIT_CASE(mctp_test_route_output_key_create), KUNIT_CASE(mctp_test_route_input_cloned_frag), + KUNIT_CASE(mctp_test_route_extaddr_input), + KUNIT_CASE(mctp_test_route_gw_lookup), + KUNIT_CASE(mctp_test_route_gw_loop), + KUNIT_CASE_PARAM(mctp_test_route_gw_mtu, mctp_route_gw_mtu_gen_params), + KUNIT_CASE(mctp_test_route_gw_output), {} }; static struct kunit_suite mctp_test_suite = { - .name = "mctp", + .name = "mctp-route", .test_cases = mctp_test_cases, }; diff --git a/net/mctp/test/sock-test.c b/net/mctp/test/sock-test.c new file mode 100644 index 000000000000..4eb3a724dca3 --- /dev/null +++ b/net/mctp/test/sock-test.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <kunit/static_stub.h> +#include <kunit/test.h> + +#include <linux/socket.h> +#include <linux/spinlock.h> + +#include "utils.h" + +static const u8 dev_default_lladdr[] = { 0x01, 0x02 }; + +/* helper for simple sock setup: single device, with dev_default_lladdr as its + * hardware address, assigned with a local EID 8, and a route to EID 9 + */ +static void __mctp_sock_test_init(struct kunit *test, + struct mctp_test_dev **devp, + struct mctp_test_route **rtp, + struct socket **sockp) +{ + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct socket *sock; + unsigned long flags; + u8 *addrs; + int rc; + + dev = mctp_test_create_dev_lladdr(sizeof(dev_default_lladdr), + dev_default_lladdr); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + addrs = kmalloc(1, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, addrs); + addrs[0] = 8; + + spin_lock_irqsave(&dev->mdev->addrs_lock, flags); + dev->mdev->num_addrs = 1; + swap(addrs, dev->mdev->addrs); + spin_unlock_irqrestore(&dev->mdev->addrs_lock, flags); + + kfree(addrs); + + rt = mctp_test_create_route_direct(dev_net(dev->ndev), dev->mdev, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + *devp = dev; + *rtp = rt; + *sockp = sock; +} + +static void __mctp_sock_test_fini(struct kunit *test, + struct mctp_test_dev *dev, + struct mctp_test_route *rt, + struct socket *sock) +{ + sock_release(sock); + mctp_test_route_destroy(test, rt); + mctp_test_destroy_dev(dev); +} + +struct mctp_test_sock_local_output_config { + struct mctp_test_dev *dev; + size_t halen; + u8 haddr[MAX_ADDR_LEN]; + bool invoked; + int rc; +}; + +static int mctp_test_sock_local_output(struct sock *sk, + struct mctp_dst *dst, + struct sk_buff *skb, + mctp_eid_t daddr, u8 req_tag) +{ + struct kunit *test = kunit_get_current_test(); + struct mctp_test_sock_local_output_config *cfg = test->priv; + + KUNIT_EXPECT_PTR_EQ(test, dst->dev, cfg->dev->mdev); + KUNIT_EXPECT_EQ(test, dst->halen, cfg->halen); + KUNIT_EXPECT_MEMEQ(test, dst->haddr, cfg->haddr, dst->halen); + + cfg->invoked = true; + + kfree_skb(skb); + + return cfg->rc; +} + +static void mctp_test_sock_sendmsg_extaddr(struct kunit *test) +{ + struct sockaddr_mctp_ext addr = { + .smctp_base = { + .smctp_family = AF_MCTP, + .smctp_tag = MCTP_TAG_OWNER, + .smctp_network = MCTP_NET_ANY, + }, + }; + struct mctp_test_sock_local_output_config cfg = { 0 }; + u8 haddr[] = { 0xaa, 0x01 }; + u8 buf[4] = { 0, 1, 2, 3 }; + struct mctp_test_route *rt; + struct msghdr msg = { 0 }; + struct mctp_test_dev *dev; + struct mctp_sock *msk; + struct socket *sock; + ssize_t send_len; + struct kvec vec = { + .iov_base = buf, + .iov_len = sizeof(buf), + }; + + __mctp_sock_test_init(test, &dev, &rt, &sock); + + /* Expect to see the dst configured up with the addressing data we + * provide in the struct sockaddr_mctp_ext + */ + cfg.dev = dev; + cfg.halen = sizeof(haddr); + memcpy(cfg.haddr, haddr, sizeof(haddr)); + + test->priv = &cfg; + + kunit_activate_static_stub(test, mctp_local_output, + mctp_test_sock_local_output); + + /* enable and configure direct addressing */ + msk = container_of(sock->sk, struct mctp_sock, sk); + msk->addr_ext = true; + + addr.smctp_ifindex = dev->ndev->ifindex; + addr.smctp_halen = sizeof(haddr); + memcpy(addr.smctp_haddr, haddr, sizeof(haddr)); + + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &vec, 1, sizeof(buf)); + send_len = mctp_sendmsg(sock, &msg, sizeof(buf)); + KUNIT_EXPECT_EQ(test, send_len, sizeof(buf)); + KUNIT_EXPECT_TRUE(test, cfg.invoked); + + __mctp_sock_test_fini(test, dev, rt, sock); +} + +static void mctp_test_sock_recvmsg_extaddr(struct kunit *test) +{ + struct sockaddr_mctp_ext recv_addr = { 0 }; + u8 rcv_buf[1], rcv_data[] = { 0, 1 }; + u8 haddr[] = { 0xaa, 0x02 }; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct mctp_skb_cb *cb; + struct mctp_sock *msk; + struct sk_buff *skb; + struct mctp_hdr hdr; + struct socket *sock; + struct msghdr msg; + ssize_t recv_len; + int rc; + struct kvec vec = { + .iov_base = rcv_buf, + .iov_len = sizeof(rcv_buf), + }; + + __mctp_sock_test_init(test, &dev, &rt, &sock); + + /* enable extended addressing on recv */ + msk = container_of(sock->sk, struct mctp_sock, sk); + msk->addr_ext = true; + + /* base incoming header, using a nul-EID dest */ + hdr.ver = 1; + hdr.dest = 0; + hdr.src = 9; + hdr.flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | + MCTP_HDR_FLAG_TO; + + skb = mctp_test_create_skb_data(&hdr, &rcv_data); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + mctp_test_skb_set_dev(skb, dev); + + /* set incoming extended address data */ + cb = mctp_cb(skb); + cb->halen = sizeof(haddr); + cb->ifindex = dev->ndev->ifindex; + memcpy(cb->haddr, haddr, sizeof(haddr)); + + /* Deliver to socket. The route input path pulls the network header, + * leaving skb data at type byte onwards. recvmsg will consume the + * type for addr.smctp_type + */ + skb_pull(skb, sizeof(hdr)); + rc = sock_queue_rcv_skb(sock->sk, skb); + KUNIT_ASSERT_EQ(test, rc, 0); + + msg.msg_name = &recv_addr; + msg.msg_namelen = sizeof(recv_addr); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, sizeof(rcv_buf)); + + recv_len = mctp_recvmsg(sock, &msg, sizeof(rcv_buf), + MSG_DONTWAIT | MSG_TRUNC); + + KUNIT_EXPECT_EQ(test, recv_len, sizeof(rcv_buf)); + + /* expect our extended address to be populated from hdr and cb */ + KUNIT_EXPECT_EQ(test, msg.msg_namelen, sizeof(recv_addr)); + KUNIT_EXPECT_EQ(test, recv_addr.smctp_base.smctp_family, AF_MCTP); + KUNIT_EXPECT_EQ(test, recv_addr.smctp_ifindex, dev->ndev->ifindex); + KUNIT_EXPECT_EQ(test, recv_addr.smctp_halen, sizeof(haddr)); + KUNIT_EXPECT_MEMEQ(test, recv_addr.smctp_haddr, haddr, sizeof(haddr)); + + __mctp_sock_test_fini(test, dev, rt, sock); +} + +static struct kunit_case mctp_test_cases[] = { + KUNIT_CASE(mctp_test_sock_sendmsg_extaddr), + KUNIT_CASE(mctp_test_sock_recvmsg_extaddr), + {} +}; + +static struct kunit_suite mctp_test_suite = { + .name = "mctp-sock", + .test_cases = mctp_test_cases, +}; + +kunit_test_suite(mctp_test_suite); diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index 565763eb0211..01f5af416b81 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -26,19 +26,22 @@ static void mctp_test_dev_setup(struct net_device *ndev) ndev->type = ARPHRD_MCTP; ndev->mtu = MCTP_DEV_TEST_MTU; ndev->hard_header_len = 0; - ndev->addr_len = 0; ndev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; ndev->flags = IFF_NOARP; ndev->netdev_ops = &mctp_test_netdev_ops; ndev->needs_free_netdev = true; } -struct mctp_test_dev *mctp_test_create_dev(void) +static struct mctp_test_dev *__mctp_test_create_dev(unsigned short lladdr_len, + const unsigned char *lladdr) { struct mctp_test_dev *dev; struct net_device *ndev; int rc; + if (WARN_ON(lladdr_len > MAX_ADDR_LEN)) + return NULL; + ndev = alloc_netdev(sizeof(*dev), "mctptest%d", NET_NAME_ENUM, mctp_test_dev_setup); if (!ndev) @@ -46,6 +49,8 @@ struct mctp_test_dev *mctp_test_create_dev(void) dev = netdev_priv(ndev); dev->ndev = ndev; + ndev->addr_len = lladdr_len; + dev_addr_set(ndev, lladdr); rc = register_netdev(ndev); if (rc) { @@ -61,8 +66,195 @@ struct mctp_test_dev *mctp_test_create_dev(void) return dev; } +struct mctp_test_dev *mctp_test_create_dev(void) +{ + return __mctp_test_create_dev(0, NULL); +} + +struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len, + const unsigned char *lladdr) +{ + return __mctp_test_create_dev(lladdr_len, lladdr); +} + void mctp_test_destroy_dev(struct mctp_test_dev *dev) { mctp_dev_put(dev->mdev); unregister_netdev(dev->ndev); } + +static const unsigned int test_pktqueue_magic = 0x5f713aef; + +void mctp_test_pktqueue_init(struct mctp_test_pktqueue *tpq) +{ + tpq->magic = test_pktqueue_magic; + skb_queue_head_init(&tpq->pkts); +} + +static int mctp_test_dst_output(struct mctp_dst *dst, struct sk_buff *skb) +{ + struct kunit *test = current->kunit_test; + struct mctp_test_pktqueue *tpq = test->priv; + + KUNIT_ASSERT_EQ(test, tpq->magic, test_pktqueue_magic); + + skb_queue_tail(&tpq->pkts, skb); + + return 0; +} + +/* local version of mctp_route_alloc() */ +static struct mctp_test_route *mctp_route_test_alloc(void) +{ + struct mctp_test_route *rt; + + rt = kzalloc(sizeof(*rt), GFP_KERNEL); + if (!rt) + return NULL; + + INIT_LIST_HEAD(&rt->rt.list); + refcount_set(&rt->rt.refs, 1); + rt->rt.output = mctp_test_dst_output; + + return rt; +} + +struct mctp_test_route *mctp_test_create_route_direct(struct net *net, + struct mctp_dev *dev, + mctp_eid_t eid, + unsigned int mtu) +{ + struct mctp_test_route *rt; + + rt = mctp_route_test_alloc(); + if (!rt) + return NULL; + + rt->rt.min = eid; + rt->rt.max = eid; + rt->rt.mtu = mtu; + rt->rt.type = RTN_UNSPEC; + rt->rt.dst_type = MCTP_ROUTE_DIRECT; + if (dev) + mctp_dev_hold(dev); + rt->rt.dev = dev; + + list_add_rcu(&rt->rt.list, &net->mctp.routes); + + return rt; +} + +struct mctp_test_route *mctp_test_create_route_gw(struct net *net, + unsigned int netid, + mctp_eid_t eid, + mctp_eid_t gw, + unsigned int mtu) +{ + struct mctp_test_route *rt; + + rt = mctp_route_test_alloc(); + if (!rt) + return NULL; + + rt->rt.min = eid; + rt->rt.max = eid; + rt->rt.mtu = mtu; + rt->rt.type = RTN_UNSPEC; + rt->rt.dst_type = MCTP_ROUTE_GATEWAY; + rt->rt.gateway.eid = gw; + rt->rt.gateway.net = netid; + + list_add_rcu(&rt->rt.list, &net->mctp.routes); + + return rt; +} + +/* Convenience function for our test dst; release with mctp_test_dst_release() + */ +void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst, + struct mctp_test_dev *dev, + struct mctp_test_pktqueue *tpq, unsigned int mtu) +{ + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, dev); + + memset(dst, 0, sizeof(*dst)); + + dst->dev = dev->mdev; + __mctp_dev_get(dst->dev->dev); + dst->mtu = mtu; + dst->output = mctp_test_dst_output; + mctp_test_pktqueue_init(tpq); + test->priv = tpq; +} + +void mctp_test_dst_release(struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq) +{ + mctp_dst_release(dst); + skb_queue_purge(&tpq->pkts); +} + +void mctp_test_route_destroy(struct kunit *test, struct mctp_test_route *rt) +{ + unsigned int refs; + + rtnl_lock(); + list_del_rcu(&rt->rt.list); + rtnl_unlock(); + + if (rt->rt.dst_type == MCTP_ROUTE_DIRECT && rt->rt.dev) + mctp_dev_put(rt->rt.dev); + + refs = refcount_read(&rt->rt.refs); + KUNIT_ASSERT_EQ_MSG(test, refs, 1, "route ref imbalance"); + + kfree_rcu(&rt->rt, rcu); +} + +void mctp_test_skb_set_dev(struct sk_buff *skb, struct mctp_test_dev *dev) +{ + struct mctp_skb_cb *cb; + + cb = mctp_cb(skb); + cb->net = READ_ONCE(dev->mdev->net); + skb->dev = dev->ndev; +} + +struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, + unsigned int data_len) +{ + size_t hdr_len = sizeof(*hdr); + struct sk_buff *skb; + unsigned int i; + u8 *buf; + + skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); + if (!skb) + return NULL; + + __mctp_cb(skb); + memcpy(skb_put(skb, hdr_len), hdr, hdr_len); + + buf = skb_put(skb, data_len); + for (i = 0; i < data_len; i++) + buf[i] = i & 0xff; + + return skb; +} + +struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr, + const void *data, size_t data_len) +{ + size_t hdr_len = sizeof(*hdr); + struct sk_buff *skb; + + skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); + if (!skb) + return NULL; + + __mctp_cb(skb); + memcpy(skb_put(skb, hdr_len), hdr, hdr_len); + memcpy(skb_put(skb, data_len), data, data_len); + + return skb; +} diff --git a/net/mctp/test/utils.h b/net/mctp/test/utils.h index df6aa1c03440..f10d1d9066cc 100644 --- a/net/mctp/test/utils.h +++ b/net/mctp/test/utils.h @@ -3,6 +3,11 @@ #ifndef __NET_MCTP_TEST_UTILS_H #define __NET_MCTP_TEST_UTILS_H +#include <uapi/linux/netdevice.h> + +#include <net/mctp.h> +#include <net/mctpdevice.h> + #include <kunit/test.h> #define MCTP_DEV_TEST_MTU 68 @@ -10,11 +15,50 @@ struct mctp_test_dev { struct net_device *ndev; struct mctp_dev *mdev; + + unsigned short lladdr_len; + unsigned char lladdr[MAX_ADDR_LEN]; }; struct mctp_test_dev; +struct mctp_test_route { + struct mctp_route rt; +}; + +struct mctp_test_pktqueue { + unsigned int magic; + struct sk_buff_head pkts; +}; + struct mctp_test_dev *mctp_test_create_dev(void); +struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len, + const unsigned char *lladdr); void mctp_test_destroy_dev(struct mctp_test_dev *dev); +struct mctp_test_route *mctp_test_create_route_direct(struct net *net, + struct mctp_dev *dev, + mctp_eid_t eid, + unsigned int mtu); +struct mctp_test_route *mctp_test_create_route_gw(struct net *net, + unsigned int netid, + mctp_eid_t eid, + mctp_eid_t gw, + unsigned int mtu); +void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst, + struct mctp_test_dev *dev, + struct mctp_test_pktqueue *tpq, unsigned int mtu); +void mctp_test_dst_release(struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq); +void mctp_test_pktqueue_init(struct mctp_test_pktqueue *tpq); +void mctp_test_route_destroy(struct kunit *test, struct mctp_test_route *rt); +void mctp_test_skb_set_dev(struct sk_buff *skb, struct mctp_test_dev *dev); +struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, + unsigned int data_len); +struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr, + const void *data, size_t data_len); + +#define mctp_test_create_skb_data(h, d) \ + __mctp_test_create_skb_data(h, d, sizeof(*d)) + #endif /* __NET_MCTP_TEST_UTILS_H */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e8972a857e51..1c651bf3cae8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) WARN_ON(skb->sk != NULL); skb->sk = sk; skb->destructor = netlink_skb_destructor; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); sk_mem_charge(sk, skb->truesize); } @@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk) { + DECLARE_WAITQUEUE(wait, current); struct netlink_sock *nlk; + unsigned int rmem; nlk = nlk_sk(sk); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state))) { - DECLARE_WAITQUEUE(wait, current); - if (!*timeo) { - if (!ssk || netlink_is_kernel(ssk)) - netlink_overrun(sk); - sock_put(sk); - kfree_skb(skb); - return -EAGAIN; - } - - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&nlk->wait, &wait); + if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) && + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { + netlink_skb_set_owner_r(skb, sk); + return 0; + } - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !sock_flag(sk, SOCK_DEAD)) - *timeo = schedule_timeout(*timeo); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&nlk->wait, &wait); + if (!*timeo) { + if (!ssk || netlink_is_kernel(ssk)) + netlink_overrun(sk); sock_put(sk); + kfree_skb(skb); + return -EAGAIN; + } - if (signal_pending(current)) { - kfree_skb(skb); - return sock_intr_errno(*timeo); - } - return 1; + __set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&nlk->wait, &wait); + rmem = atomic_read(&sk->sk_rmem_alloc); + + if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) || + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && + !sock_flag(sk, SOCK_DEAD)) + *timeo = schedule_timeout(*timeo); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&nlk->wait, &wait); + sock_put(sk); + + if (signal_pending(current)) { + kfree_skb(skb); + return sock_intr_errno(*timeo); } - netlink_skb_set_owner_r(skb, sk); - return 0; + + return 1; } static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) @@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { ret = skb->len; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; netlink_deliver_tap_kernel(sk, ssk, skb); @@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check); static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); + unsigned int rmem, rcvbuf; - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + + if ((rmem != skb->truesize || rmem <= rcvbuf) && !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { netlink_skb_set_owner_r(skb, sk); __netlink_sendskb(sk, skb); - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); + return rmem > (rcvbuf >> 1); } + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); return -1; } @@ -2249,6 +2262,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) struct module *module; int err = -ENOBUFS; int alloc_min_size; + unsigned int rmem; int alloc_size; if (!lock_taken) @@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; } - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) - goto errout_skb; - /* NLMSG_GOODSIZE is small to avoid high order allocations being * required, but it makes sense to _attempt_ a 32KiB allocation * to reduce number of system calls on dump operations, if user @@ -2283,6 +2294,12 @@ static int netlink_dump(struct sock *sk, bool lock_taken) if (!skb) goto errout_skb; + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + if (rmem >= READ_ONCE(sk->sk_rcvbuf)) { + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + goto errout_skb; + } + /* Trim skb to allocated size. User is expected to provide buffer as * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at * netlink_recvmsg())). dump will pack as many smaller messages as @@ -2455,7 +2472,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, unsigned int flags = 0; size_t tlvlen; - /* Error messages get the original request appened, unless the user + /* Error messages get the original request appended, unless the user * requests to cap the error message, and get extra error data if * requested. */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 3add108340bf..2832e0794197 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -941,8 +941,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - if (dp->user_features & - OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + if (OVS_CB(skb)->upcall_pid) + upcall.portid = OVS_CB(skb)->upcall_pid; + else if (dp->user_features & + OVS_DP_F_DISPATCH_UPCALL_PER_CPU) upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b990dc83504f..d5b6e2002bc1 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -267,7 +267,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; - if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + if (OVS_CB(skb)->upcall_pid) + upcall.portid = OVS_CB(skb)->upcall_pid; + else if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); else @@ -651,6 +653,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) !!(hash & OVS_PACKET_HASH_L4_BIT)); } + OVS_CB(packet)->upcall_pid = + nla_get_u32_default(a[OVS_PACKET_ATTR_UPCALL_PID], 0); + /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); err = PTR_ERR(flow); @@ -719,6 +724,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, + [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 }, }; static const struct genl_small_ops dp_packet_genl_ops[] = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index cfeb817a1889..db0c3e69d66c 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -121,6 +121,8 @@ struct datapath { * @cutlen: The number of bytes from the packet end to be removed. * @probability: The sampling probability that was applied to this skb; 0 means * no sampling has occurred; U32_MAX means 100% probability. + * @upcall_pid: Netlink socket PID to use for sending this packet to userspace; + * 0 means "not set" and default per-CPU or per-vport dispatch should be used. */ struct ovs_skb_cb { struct vport *input_vport; @@ -128,6 +130,7 @@ struct ovs_skb_cb { u16 acts_origlen; u32 cutlen; u32 probability; + u32 upcall_pid; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 8732f6e51ae5..6bbbc16ab778 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -501,6 +501,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->mru = 0; OVS_CB(skb)->cutlen = 0; OVS_CB(skb)->probability = 0; + OVS_CB(skb)->upcall_pid = 0; if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { u32 mark; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 2dd6bd3a3011..b72bf8a08d48 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev) t = rose_node; rose_node = rose_node->next; - for (i = 0; i < t->count; i++) { + for (i = t->count - 1; i >= 0; i--) { if (t->neighbour[i] != s) continue; t->count--; - switch (i) { - case 0: - t->neighbour[0] = t->neighbour[1]; - fallthrough; - case 1: - t->neighbour[1] = t->neighbour[2]; - break; - case 2: - break; - } + memmove(&t->neighbour[i], &t->neighbour[i + 1], + sizeof(t->neighbour[0]) * + (t->count - i)); } if (t->count <= 0) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5bd3922c310d..376e33dce8c1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -361,12 +361,15 @@ struct rxrpc_local { struct list_head new_client_calls; /* Newly created client calls need connection */ spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ - /* Provide a kvec table sufficiently large to manage either a DATA - * packet with a maximum set of jumbo subpackets or a PING ACK padded - * out to 64K with zeropages for PMTUD. - */ - struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? - 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; + union { + /* Provide a kvec table sufficiently large to manage either a + * DATA packet with a maximum set of jumbo subpackets or a PING + * ACK padded out to 64K with zeropages for PMTUD. + */ + struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? + 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; + struct bio_vec bvec[3 + 16]; + }; }; /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a4b363b47cca..49fccee1a726 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -149,6 +149,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, id_in_use: write_unlock(&rx->call_lock); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT); rxrpc_cleanup_call(call); _leave(" = -EBADSLT"); return -EBADSLT; @@ -254,6 +255,9 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, unsigned short call_tail, conn_tail, peer_tail; unsigned short call_count, conn_count; + if (!b) + return NULL; + /* #calls >= #conns >= #peers must hold true. */ call_head = smp_load_acquire(&b->call_backlog_head); call_tail = b->call_backlog_tail; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0af19bcdc80a..ef7b3096c95e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -924,7 +924,7 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response { struct rxrpc_skb_priv *sp = rxrpc_skb(response); struct scatterlist sg[16]; - struct bio_vec bvec[16]; + struct bio_vec *bvec = conn->local->bvec; struct msghdr msg; size_t len = sp->resp.len; __be32 wserial; @@ -938,6 +938,9 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response if (ret < 0) goto fail; nr_sg = ret; + ret = -EIO; + if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec))) + goto fail; for (int i = 0; i < nr_sg; i++) bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 057e20cef375..9e468e463467 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -933,18 +933,25 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops, struct tcf_idrinfo *idrinfo) { struct idr *idr = &idrinfo->action_idr; + bool mutex_taken = false; struct tc_action *p; - int ret; unsigned long id = 1; unsigned long tmp; + int ret; idr_for_each_entry_ul(idr, p, tmp, id) { + if (tc_act_in_hw(p) && !mutex_taken) { + rtnl_lock(); + mutex_taken = true; + } ret = __tcf_idr_release(p, false, true); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) return; } + if (mutex_taken) + rtnl_unlock(); idr_destroy(&idrinfo->action_idr); } EXPORT_SYMBOL(tcf_idrinfo_destroy); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c5e3673aadbe..d7c767b861a4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -336,17 +336,22 @@ out: return q; } -static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) +static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid, + struct netlink_ext_ack *extack) { unsigned long cl; const struct Qdisc_class_ops *cops = p->ops->cl_ops; - if (cops == NULL) - return NULL; + if (cops == NULL) { + NL_SET_ERR_MSG(extack, "Parent qdisc is not classful"); + return ERR_PTR(-EOPNOTSUPP); + } cl = cops->find(p, classid); - if (cl == 0) - return NULL; + if (cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class not found"); + return ERR_PTR(-ENOENT); + } return cops->leaf(p, cl); } @@ -596,16 +601,6 @@ out: qdisc_skb_cb(skb)->pkt_len = pkt_len; } -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) -{ - if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { - pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", - txt, qdisc->ops->id, qdisc->handle >> 16); - qdisc->flags |= TCQ_F_WARN_NONWC; - } -} -EXPORT_SYMBOL(qdisc_warn_nonwc); - static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, @@ -780,15 +775,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { - bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; bool notify; int drops; - if (n == 0 && len == 0) - return; drops = max_t(int, n, 0); rcu_read_lock(); while ((parentid = sch->parent)) { @@ -797,17 +789,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) if (sch->flags & TCQ_F_NOPARENT) break; - /* Notify parent qdisc only if child qdisc becomes empty. - * - * If child was empty even before update then backlog - * counter is screwed and we skip notification because - * parent class is already passive. - * - * If the original child was offloaded then it is allowed - * to be seem as empty, so the parent is notified anyway. - */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n && - !qdisc_is_offloaded); + /* Notify parent qdisc only if child qdisc becomes empty. */ + notify = !sch->q.qlen; /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -816,6 +799,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) } cops = sch->ops->cl_ops; if (notify && cops->qlen_notify) { + /* Note that qlen_notify must be idempotent as it may get called + * multiple times. + */ cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); } @@ -1499,7 +1485,7 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); } else if (dev_ingress_queue(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } @@ -1510,6 +1496,8 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; } + if (IS_ERR(q)) + return PTR_ERR(q); if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Invalid handle"); @@ -1611,7 +1599,9 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); + if (IS_ERR(q)) + return PTR_ERR(q); } else if (dev_ingress_queue_create(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 48dd8c88903f..dbcfb948c867 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1407,7 +1407,10 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) return cake_calc_overhead(q, len, off); /* borrowed from qdisc_pkt_len_init() */ - hdr_len = skb_transport_offset(skb); + if (!skb->encapsulation) + hdr_len = skb_transport_offset(skb); + else + hdr_len = skb_inner_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5a7745170e84..d8fd35da32a7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -835,22 +835,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) } } -static unsigned int -qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - unsigned int len; - - skb = sch->ops->peek(sch); - if (unlikely(skb == NULL)) { - qdisc_warn_nonwc("qdisc_peek_len", sch); - return 0; - } - len = qdisc_pkt_len(skb); - - return len; -} - static void hfsc_adjust_levels(struct hfsc_class *cl) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bf1282cb22eb..bcce36608871 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -989,7 +989,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ list_del_init(&cl->alist); - else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { + else if (cl->deficit < qdisc_peek_len(cl->qdisc)) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8d56e4db63e0..bdbaad17f980 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2735,8 +2735,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock, if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) { /* wait till data arrives on the socket */ - timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * - MSEC_PER_SEC); + timeo = secs_to_jiffies(lsmc->sockopt_defer_accept); if (smc_sk(nsk)->use_fallback) { struct sock *clcsk = smc_sk(nsk)->clcsock->sk; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 0fa244f16876..7b943fbafcc3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1724,7 +1724,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr) maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[0], seq, p, len); /* RFC 2203 5.3.3.1 - compute the checksum of each sequence number in the cache */ while (unlikely(maj_stat == GSS_S_BAD_SIG && i < task->tk_rqstp->rq_seqno_count)) - maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i], seq, p, len); + maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i++], seq, p, len); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 8ee0c07d00e9..ffe577bf6b51 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net) for (id = 0; srv->idr_in_use; id++) { con = idr_find(&srv->conn_idr, id); if (con) { + conn_get(con); spin_unlock_bh(&srv->idr_lock); tipc_conn_close(con); + conn_put(con); spin_lock_bh(&srv->idr_lock); } } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 564c970d97ff..7a92733706fe 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -934,6 +934,52 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) #define unix_show_fdinfo NULL #endif +static bool unix_custom_sockopt(int optname) +{ + switch (optname) { + case SO_INQ: + return true; + default: + return false; + } +} + +static int unix_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct unix_sock *u = unix_sk(sock->sk); + struct sock *sk = sock->sk; + int val; + + if (level != SOL_SOCKET) + return -EOPNOTSUPP; + + if (!unix_custom_sockopt(optname)) + return sock_setsockopt(sock, level, optname, optval, optlen); + + if (optlen != sizeof(int)) + return -EINVAL; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + switch (optname) { + case SO_INQ: + if (sk->sk_type != SOCK_STREAM) + return -EINVAL; + + if (val > 1 || val < 0) + return -EINVAL; + + WRITE_ONCE(u->recvmsg_inq, val); + break; + default: + return -ENOPROTOOPT; + } + + return 0; +} + static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, @@ -950,6 +996,7 @@ static const struct proto_ops unix_stream_ops = { #endif .listen = unix_listen, .shutdown = unix_shutdown, + .setsockopt = unix_setsockopt, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .read_skb = unix_stream_read_skb, @@ -1116,6 +1163,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, switch (sock->type) { case SOCK_STREAM: + set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); sock->ops = &unix_stream_ops; break; /* @@ -1847,6 +1895,9 @@ static int unix_accept(struct socket *sock, struct socket *newsock, skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait); + if (tsk->sk_type == SOCK_STREAM) + set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + /* attach accepted sock to socket */ unix_state_lock(tsk); unix_update_edges(unix_sk(tsk)); @@ -2297,6 +2348,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, spin_lock(&other->sk_receive_queue.lock); WRITE_ONCE(ousk->oob_skb, skb); + WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1); __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); @@ -2319,6 +2371,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; struct sk_buff *skb = NULL; struct sock *other = NULL; + struct unix_sock *otheru; struct scm_cookie scm; bool fds_sent = false; int err, sent = 0; @@ -2342,14 +2395,16 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_namelen) { err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; - } else { - other = unix_peer(sk); - if (!other) { - err = -ENOTCONN; - goto out_err; - } } + other = unix_peer(sk); + if (!other) { + err = -ENOTCONN; + goto out_err; + } + + otheru = unix_sk(other); + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto out_pipe; @@ -2388,8 +2443,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { skb->ip_summed = CHECKSUM_UNNECESSARY; - err = skb_splice_from_iter(skb, &msg->msg_iter, size, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, size); if (err < 0) goto out_free; @@ -2418,7 +2472,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); - skb_queue_tail(&other->sk_receive_queue, skb); + + spin_lock(&other->sk_receive_queue.lock); + WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); + unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2528,12 +2587,10 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, &err, &timeo, last)); if (!skb) { /* implies iolock unlocked */ - unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && - (sk->sk_shutdown & RCV_SHUTDOWN)) + (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN)) err = 0; - unix_state_unlock(sk); goto out; } @@ -2707,6 +2764,7 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) if (!(state->flags & MSG_PEEK)) { WRITE_ONCE(u->oob_skb, NULL); + WRITE_ONCE(u->inq_len, u->inq_len - 1); if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue && !unix_skb_len(oob_skb->prev)) { @@ -2789,6 +2847,7 @@ unlock: static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct unix_sock *u = unix_sk(sk); struct sk_buff *skb; int err; @@ -2796,60 +2855,57 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) return -ENOTCONN; - mutex_lock(&u->iolock); - skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); - mutex_unlock(&u->iolock); - if (!skb) + err = sock_error(sk); + if (err) return err; -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - if (unlikely(skb == READ_ONCE(u->oob_skb))) { - bool drop = false; - - unix_state_lock(sk); + mutex_lock(&u->iolock); + spin_lock(&queue->lock); - if (sock_flag(sk, SOCK_DEAD)) { - unix_state_unlock(sk); - kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE); - return -ECONNRESET; - } + skb = __skb_dequeue(queue); + if (!skb) { + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); + return -EAGAIN; + } - spin_lock(&sk->sk_receive_queue.lock); - if (likely(skb == u->oob_skb)) { - WRITE_ONCE(u->oob_skb, NULL); - drop = true; - } - spin_unlock(&sk->sk_receive_queue.lock); + WRITE_ONCE(u->inq_len, u->inq_len - skb->len); - unix_state_unlock(sk); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb == u->oob_skb) { + WRITE_ONCE(u->oob_skb, NULL); + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); - if (drop) { - kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB); - return -EAGAIN; - } + kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB); + return -EAGAIN; } #endif + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); + return recv_actor(sk, skb); } static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { - struct scm_cookie scm; + int noblock = state->flags & MSG_DONTWAIT; struct socket *sock = state->socket; + struct msghdr *msg = state->msg; struct sock *sk = sock->sk; - struct unix_sock *u = unix_sk(sk); - int copied = 0; + size_t size = state->size; int flags = state->flags; - int noblock = flags & MSG_DONTWAIT; bool check_creds = false; - int target; + struct scm_cookie scm; + unsigned int last_len; + struct unix_sock *u; + int copied = 0; int err = 0; long timeo; + int target; int skip; - size_t size = state->size; - unsigned int last_len; if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { err = -EINVAL; @@ -2869,6 +2925,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, memset(&scm, 0, sizeof(scm)); + u = unix_sk(sk); + /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ @@ -2960,14 +3018,12 @@ unlock: } /* Copy address just once */ - if (state->msg && state->msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, - state->msg->msg_name); - unix_copy_addr(state->msg, skb->sk); + if (msg && msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); - BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, - state->msg->msg_name, - &state->msg->msg_namelen); + unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name, + &msg->msg_namelen); sunaddr = NULL; } @@ -2996,7 +3052,11 @@ unlock: if (unix_skb_len(skb)) break; - skb_unlink(skb, &sk->sk_receive_queue); + spin_lock(&sk->sk_receive_queue.lock); + WRITE_ONCE(u->inq_len, u->inq_len - skb->len); + __skb_unlink(skb, &sk->sk_receive_queue); + spin_unlock(&sk->sk_receive_queue.lock); + consume_skb(skb); if (scm.fp) @@ -3025,10 +3085,17 @@ unlock: } while (size); mutex_unlock(&u->iolock); - if (state->msg) - scm_recv_unix(sock, state->msg, &scm, flags); - else + if (msg) { + scm_recv_unix(sock, msg, &scm, flags); + + if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) { + msg->msg_inq = READ_ONCE(u->inq_len); + put_cmsg(msg, SOL_SOCKET, SCM_INQ, + sizeof(msg->msg_inq), &msg->msg_inq); + } + } else { scm_destroy(&scm); + } out: return copied ? : err; } @@ -3167,9 +3234,11 @@ long unix_inq_len(struct sock *sk) if (READ_ONCE(sk->sk_state) == TCP_LISTEN) return -EINVAL; + if (sk->sk_type == SOCK_STREAM) + return READ_ONCE(unix_sk(sk)->inq_len); + spin_lock(&sk->sk_receive_queue.lock); - if (sk->sk_type == SOCK_STREAM || - sk->sk_type == SOCK_SEQPACKET) { + if (sk->sk_type == SOCK_SEQPACKET) { skb_queue_walk(&sk->sk_receive_queue, skb) amount += unix_skb_len(skb); } else { diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2e7a3034e965..218d91e6b32b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -407,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -464,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -479,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport == new_transport) { + ret = 0; + goto err; + } /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we @@ -508,8 +515,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || @@ -528,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); +/* + * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. + * Otherwise we may race with module removal. Do not use on `vsk->transport`. + */ +static u32 vsock_registered_transport_cid(const struct vsock_transport **transport) +{ + u32 cid = VMADDR_CID_ANY; + + mutex_lock(&vsock_register_mutex); + if (*transport) + cid = (*transport)->get_local_cid(); + mutex_unlock(&vsock_register_mutex); + + return cid; +} + bool vsock_find_cid(unsigned int cid) { - if (transport_g2h && cid == transport_g2h->get_local_cid()) + if (cid == vsock_registered_transport_cid(&transport_g2h)) return true; if (transport_h2g && cid == VMADDR_CID_HOST) @@ -1389,6 +1423,28 @@ static int vsock_do_ioctl(struct socket *sock, unsigned int cmd, vsk = vsock_sk(sk); switch (cmd) { + case SIOCINQ: { + ssize_t n_bytes; + + if (!vsk->transport) { + ret = -EOPNOTSUPP; + break; + } + + if (sock_type_connectible(sk->sk_type) && + sk->sk_state == TCP_LISTEN) { + ret = -EINVAL; + break; + } + + n_bytes = vsock_stream_has_data(vsk); + if (n_bytes < 0) { + ret = n_bytes; + break; + } + ret = put_user(n_bytes, arg); + break; + } case SIOCOUTQ: { ssize_t n_bytes; @@ -2536,18 +2592,19 @@ static long vsock_dev_do_ioctl(struct file *filp, unsigned int cmd, void __user *ptr) { u32 __user *p = ptr; - u32 cid = VMADDR_CID_ANY; int retval = 0; + u32 cid; switch (cmd) { case IOCTL_VM_SOCKETS_GET_LOCAL_CID: /* To be compatible with the VMCI behavior, we prioritize the * guest CID instead of well-know host CID (VMADDR_CID_HOST). */ - if (transport_g2h) - cid = transport_g2h->get_local_cid(); - else if (transport_h2g) - cid = transport_h2g->get_local_cid(); + cid = vsock_registered_transport_cid(&transport_g2h); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_h2g); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_local); if (put_user(cid, p) != 0) retval = -EFAULT; diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 31342ab502b4..432fcbbd14d4 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -694,15 +694,26 @@ out: static s64 hvs_stream_has_data(struct vsock_sock *vsk) { struct hvsock *hvs = vsk->trans; + bool need_refill; s64 ret; if (hvs->recv_data_len > 0) - return 1; + return hvs->recv_data_len; switch (hvs_channel_readable_payload(hvs->chan)) { case 1: - ret = 1; - break; + need_refill = !hvs->recv_desc; + if (!need_refill) + return -EIO; + + hvs->recv_desc = hv_pkt_iter_first(hvs->chan); + if (!hvs->recv_desc) + return -ENOBUFS; + + ret = hvs_update_recv_data(hvs); + if (ret) + return ret; + return hvs->recv_data_len; case 0: vsk->peer_shutdown |= SEND_SHUTDOWN; ret = 0; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b370070194fa..7eccd6708d66 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, u16 proto, struct vmci_handle handle) { + memset(pkt, 0, sizeof(*pkt)); + /* We register the stream control handler as an any cid handle so we * must always send from a source address of VMADDR_CID_ANY */ @@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, pkt->type = type; pkt->src_port = src->svm_port; pkt->dst_port = dst->svm_port; - memset(&pkt->proto, 0, sizeof(pkt->proto)); - memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); switch (pkt->type) { case VMCI_TRANSPORT_PACKET_TYPE_INVALID: diff --git a/net/wireless/core.c b/net/wireless/core.c index f3cd70757ef2..a7e2931ffb2e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -239,7 +239,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (rdev->scan_req && rdev->scan_req->req.wdev == wdev) { if (WARN_ON(!rdev->scan_req->notified && (!rdev->int_scan_req || !rdev->int_scan_req->notified))) @@ -1574,7 +1574,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_DOWN: wiphy_lock(&rdev->wiphy); cfg80211_update_iface_num(rdev, wdev->iftype, -1); - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (rdev->scan_req && rdev->scan_req->req.wdev == wdev) { if (WARN_ON(!rdev->scan_req->notified && (!rdev->int_scan_req || !rdev->int_scan_req->notified))) diff --git a/net/wireless/core.h b/net/wireless/core.h index c56a35040caa..b6bd7f4d6385 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -21,6 +21,13 @@ #define WIPHY_IDX_INVALID -1 +struct cfg80211_scan_request_int { + struct cfg80211_scan_info info; + bool notified; + /* must be last - variable members */ + struct cfg80211_scan_request req; +}; + struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; @@ -70,8 +77,8 @@ struct cfg80211_registered_device { struct rb_root bss_tree; u32 bss_generation; u32 bss_entries; - struct cfg80211_scan_request *scan_req; /* protected by RTNL */ - struct cfg80211_scan_request *int_scan_req; + struct cfg80211_scan_request_int *scan_req; /* protected by RTNL */ + struct cfg80211_scan_request_int *int_scan_req; struct sk_buff *scan_msg; struct list_head sched_scan_req_list; time64_t suspend_at; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 29e1ce8aff42..bb5bc6ff09d4 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -352,8 +352,25 @@ cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a, return -EINVAL; } - if (ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_a) != - ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_b)) { + /* + * Only verify the values in Extended MLD Capabilities that are + * not reserved when transmitted by an AP (and expected to remain the + * same over time). + * The Recommended Max Simultaneous Links subfield in particular is + * reserved when included in a unicast Probe Response frame and may + * also change when the AP adds/removes links. The BTM MLD + * Recommendation For Multiple APs Support subfield is reserved when + * transmitted by an AP. All other bits are currently reserved. + * See IEEE P802.11be/D7.0, Table 9-417o. + */ + if ((ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_a) & + (IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE | + IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE | + IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK)) != + (ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_b) & + (IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE | + IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE | + IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK))) { NL_SET_ERR_MSG(extack, "extended link MLD capabilities/ops mismatch"); return -EINVAL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 70ca74a75f22..05538312bdad 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9811,34 +9811,12 @@ static bool nl80211_check_scan_feat(struct wiphy *wiphy, u32 flags, u32 flag, static int nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, - void *request, struct nlattr **attrs, - bool is_sched_scan) + struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask, + u32 *flags, enum nl80211_feature_flags randomness_flag) { - u8 *mac_addr, *mac_addr_mask; - u32 *flags; - enum nl80211_feature_flags randomness_flag; - if (!attrs[NL80211_ATTR_SCAN_FLAGS]) return 0; - if (is_sched_scan) { - struct cfg80211_sched_scan_request *req = request; - - randomness_flag = wdev ? - NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : - NL80211_FEATURE_ND_RANDOM_MAC_ADDR; - flags = &req->flags; - mac_addr = req->mac_addr; - mac_addr_mask = req->mac_addr_mask; - } else { - struct cfg80211_scan_request *req = request; - - randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; - flags = &req->flags; - mac_addr = req->mac_addr; - mac_addr_mask = req->mac_addr_mask; - } - *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]); if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && @@ -9887,11 +9865,35 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, return 0; } +static int +nl80211_check_scan_flags_sched(struct wiphy *wiphy, struct wireless_dev *wdev, + struct nlattr **attrs, + struct cfg80211_sched_scan_request *req) +{ + return nl80211_check_scan_flags(wiphy, wdev, attrs, + req->mac_addr, req->mac_addr_mask, + &req->flags, + wdev ? NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : + NL80211_FEATURE_ND_RANDOM_MAC_ADDR); +} + +static int +nl80211_check_scan_flags_reg(struct wiphy *wiphy, struct wireless_dev *wdev, + struct nlattr **attrs, + struct cfg80211_scan_request_int *req) +{ + return nl80211_check_scan_flags(wiphy, wdev, attrs, + req->req.mac_addr, + req->req.mac_addr_mask, + &req->req.flags, + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR); +} + static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; - struct cfg80211_scan_request *request; + struct cfg80211_scan_request_int *request; struct nlattr *scan_freqs = NULL; bool scan_freqs_khz = false; struct nlattr *attr; @@ -9943,21 +9945,21 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (ie_len > wiphy->max_scan_ie_len) return -EINVAL; - size = struct_size(request, channels, n_channels); + size = struct_size(request, req.channels, n_channels); ssids_offset = size; - size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + size = size_add(size, array_size(sizeof(*request->req.ssids), n_ssids)); ie_offset = size; size = size_add(size, ie_len); request = kzalloc(size, GFP_KERNEL); if (!request) return -ENOMEM; - request->n_channels = n_channels; + request->req.n_channels = n_channels; if (n_ssids) - request->ssids = (void *)request + ssids_offset; - request->n_ssids = n_ssids; + request->req.ssids = (void *)request + ssids_offset; + request->req.n_ssids = n_ssids; if (ie_len) - request->ie = (void *)request + ie_offset; + request->req.ie = (void *)request + ie_offset; i = 0; if (scan_freqs) { @@ -9980,7 +9982,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) !cfg80211_wdev_channel_allowed(wdev, chan)) continue; - request->channels[i] = chan; + request->req.channels[i] = chan; i++; } } else { @@ -10001,7 +10003,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) !cfg80211_wdev_channel_allowed(wdev, chan)) continue; - request->channels[i] = chan; + request->req.channels[i] = chan; i++; } } @@ -10012,10 +10014,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out_free; } - request->n_channels = i; + request->req.n_channels = i; - for (i = 0; i < request->n_channels; i++) { - struct ieee80211_channel *chan = request->channels[i]; + for (i = 0; i < request->req.n_channels; i++) { + struct ieee80211_channel *chan = request->req.channels[i]; /* if we can go off-channel to the target channel we're good */ if (cfg80211_off_channel_oper_allowed(wdev, chan)) @@ -10034,22 +10036,23 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out_free; } - request->ssids[i].ssid_len = nla_len(attr); - memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); + request->req.ssids[i].ssid_len = nla_len(attr); + memcpy(request->req.ssids[i].ssid, + nla_data(attr), nla_len(attr)); i++; } } if (info->attrs[NL80211_ATTR_IE]) { - request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); - memcpy((void *)request->ie, + request->req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + memcpy((void *)request->req.ie, nla_data(info->attrs[NL80211_ATTR_IE]), - request->ie_len); + request->req.ie_len); } for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) - request->rates[i] = + request->req.rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; if (info->attrs[NL80211_ATTR_SCAN_SUPP_RATES]) { @@ -10069,25 +10072,24 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = ieee80211_get_ratemask(wiphy->bands[band], nla_data(attr), nla_len(attr), - &request->rates[band]); + &request->req.rates[band]); if (err) goto out_free; } } if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) { - request->duration = + request->req.duration = nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]); - request->duration_mandatory = + request->req.duration_mandatory = nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); } - err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs, - false); + err = nl80211_check_scan_flags_reg(wiphy, wdev, info->attrs, request); if (err) goto out_free; - request->no_cck = + request->req.no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); /* Initial implementation used NL80211_ATTR_MAC to set the specific @@ -10100,19 +10102,21 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) * (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use). */ if (info->attrs[NL80211_ATTR_BSSID]) - memcpy(request->bssid, + memcpy(request->req.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); - else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) && + else if (!(request->req.flags & NL80211_SCAN_FLAG_RANDOM_ADDR) && info->attrs[NL80211_ATTR_MAC]) - memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), + memcpy(request->req.bssid, + nla_data(info->attrs[NL80211_ATTR_MAC]), ETH_ALEN); else - eth_broadcast_addr(request->bssid); + eth_broadcast_addr(request->req.bssid); - request->tsf_report_link_id = nl80211_link_id_or_invalid(info->attrs); - request->wdev = wdev; - request->wiphy = &rdev->wiphy; - request->scan_start = jiffies; + request->req.tsf_report_link_id = + nl80211_link_id_or_invalid(info->attrs); + request->req.wdev = wdev; + request->req.wiphy = &rdev->wiphy; + request->req.scan_start = jiffies; rdev->scan_req = request; err = cfg80211_scan(rdev); @@ -10534,7 +10538,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->ie_len); } - err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true); + err = nl80211_check_scan_flags_sched(wiphy, wdev, attrs, request); if (err) goto out_free; @@ -18414,7 +18418,7 @@ void nl80211_notify_iface(struct cfg80211_registered_device *rdev, static int nl80211_add_scan_req(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { - struct cfg80211_scan_request *req = rdev->scan_req; + struct cfg80211_scan_request_int *req = rdev->scan_req; struct nlattr *nest; int i; struct cfg80211_scan_info *info; @@ -18425,19 +18429,20 @@ static int nl80211_add_scan_req(struct sk_buff *msg, nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_SSIDS); if (!nest) goto nla_put_failure; - for (i = 0; i < req->n_ssids; i++) { - if (nla_put(msg, i, req->ssids[i].ssid_len, req->ssids[i].ssid)) + for (i = 0; i < req->req.n_ssids; i++) { + if (nla_put(msg, i, req->req.ssids[i].ssid_len, + req->req.ssids[i].ssid)) goto nla_put_failure; } nla_nest_end(msg, nest); - if (req->flags & NL80211_SCAN_FLAG_FREQ_KHZ) { + if (req->req.flags & NL80211_SCAN_FLAG_FREQ_KHZ) { nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQ_KHZ); if (!nest) goto nla_put_failure; - for (i = 0; i < req->n_channels; i++) { + for (i = 0; i < req->req.n_channels; i++) { if (nla_put_u32(msg, i, - ieee80211_channel_to_khz(req->channels[i]))) + ieee80211_channel_to_khz(req->req.channels[i]))) goto nla_put_failure; } nla_nest_end(msg, nest); @@ -18446,19 +18451,20 @@ static int nl80211_add_scan_req(struct sk_buff *msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!nest) goto nla_put_failure; - for (i = 0; i < req->n_channels; i++) { - if (nla_put_u32(msg, i, req->channels[i]->center_freq)) + for (i = 0; i < req->req.n_channels; i++) { + if (nla_put_u32(msg, i, + req->req.channels[i]->center_freq)) goto nla_put_failure; } nla_nest_end(msg, nest); } - if (req->ie && - nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) + if (req->req.ie && + nla_put(msg, NL80211_ATTR_IE, req->req.ie_len, req->req.ie)) goto nla_put_failure; - if (req->flags && - nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags)) + if (req->req.flags && + nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->req.flags)) goto nla_put_failure; info = rdev->int_scan_req ? &rdev->int_scan_req->info : diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 803b39c26587..ac6884bacf3f 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -456,15 +456,15 @@ rdev_set_monitor_channel(struct cfg80211_registered_device *rdev, } static inline int rdev_scan(struct cfg80211_registered_device *rdev, - struct cfg80211_scan_request *request) + struct cfg80211_scan_request_int *request) { int ret; - if (WARN_ON_ONCE(!request->n_ssids && request->ssids)) + if (WARN_ON_ONCE(!request->req.n_ssids && request->req.ssids)) return -EINVAL; trace_rdev_scan(&rdev->wiphy, request); - ret = rdev->ops->scan(&rdev->wiphy, request); + ret = rdev->ops->scan(&rdev->wiphy, &request->req); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c1752b31734f..2524bc187a19 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -53,7 +53,7 @@ #include <linux/list.h> #include <linux/ctype.h> #include <linux/nl80211.h> -#include <linux/platform_device.h> +#include <linux/device/faux.h> #include <linux/verification.h> #include <linux/moduleparam.h> #include <linux/firmware.h> @@ -105,7 +105,7 @@ static struct regulatory_request __rcu *last_request = (void __force __rcu *)&core_request_world; /* To trigger userspace events and load firmware */ -static struct platform_device *reg_pdev; +static struct faux_device *reg_fdev; /* * Central wireless core regulatory domains, we only need two, @@ -583,7 +583,7 @@ static int call_crda(const char *alpha2) else pr_debug("Calling CRDA to update world regulatory domain\n"); - ret = kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); + ret = kobject_uevent_env(®_fdev->dev.kobj, KOBJ_CHANGE, env); if (ret) return ret; @@ -779,7 +779,7 @@ static bool regdb_has_valid_signature(const u8 *data, unsigned int size) const struct firmware *sig; bool result; - if (request_firmware(&sig, "regulatory.db.p7s", ®_pdev->dev)) + if (request_firmware(&sig, "regulatory.db.p7s", ®_fdev->dev)) return false; result = verify_pkcs7_signature(data, size, sig->data, sig->size, @@ -1061,7 +1061,7 @@ static int query_regdb_file(const char *alpha2) return -ENOMEM; err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db", - ®_pdev->dev, GFP_KERNEL, + ®_fdev->dev, GFP_KERNEL, (void *)alpha2, regdb_fw_cb); if (err) kfree(alpha2); @@ -1077,7 +1077,7 @@ int reg_reload_regdb(void) const struct ieee80211_regdomain *current_regdomain; struct regulatory_request *request; - err = request_firmware(&fw, "regulatory.db", ®_pdev->dev); + err = request_firmware(&fw, "regulatory.db", ®_fdev->dev); if (err) return err; @@ -4300,12 +4300,12 @@ static int __init regulatory_init_db(void) * in that case, don't try to do any further work here as * it's doomed to lead to crashes. */ - if (IS_ERR_OR_NULL(reg_pdev)) + if (!reg_fdev) return -EINVAL; err = load_builtin_regdb_keys(); if (err) { - platform_device_unregister(reg_pdev); + faux_device_destroy(reg_fdev); return err; } @@ -4313,7 +4313,7 @@ static int __init regulatory_init_db(void) err = regulatory_hint_core(cfg80211_world_regdom->alpha2); if (err) { if (err == -ENOMEM) { - platform_device_unregister(reg_pdev); + faux_device_destroy(reg_fdev); return err; } /* @@ -4342,9 +4342,9 @@ late_initcall(regulatory_init_db); int __init regulatory_init(void) { - reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); - if (IS_ERR(reg_pdev)) - return PTR_ERR(reg_pdev); + reg_fdev = faux_device_create("regulatory", NULL, NULL); + if (!reg_fdev) + return -ENODEV; rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); @@ -4372,9 +4372,9 @@ void regulatory_exit(void) reset_regdomains(true, NULL); rtnl_unlock(); - dev_set_uevent_suppress(®_pdev->dev, true); + dev_set_uevent_suppress(®_fdev->dev, true); - platform_device_unregister(reg_pdev); + faux_device_destroy(reg_fdev); list_for_each_entry_safe(reg_beacon, btmp, ®_pending_beacons, list) { list_del(®_beacon->list); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e8a4fe44ec2d..a8339ed52404 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -782,9 +782,9 @@ cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, } EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_parse_colocated_ap); -static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request, - struct ieee80211_channel *chan, - bool add_to_6ghz) +static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request, + struct ieee80211_channel *chan, + bool add_to_6ghz) { int i; u32 n_channels = request->n_channels; @@ -838,30 +838,32 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap, return false; } -static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) +static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev, + bool first_part) { u8 i; struct cfg80211_colocated_ap *ap; int n_channels, count = 0, err; - struct cfg80211_scan_request *request, *rdev_req = rdev->scan_req; + struct cfg80211_scan_request_int *request, *rdev_req = rdev->scan_req; LIST_HEAD(coloc_ap_list); bool need_scan_psc = true; const struct ieee80211_sband_iftype_data *iftd; size_t size, offs_ssids, offs_6ghz_params, offs_ies; - rdev_req->scan_6ghz = true; + rdev_req->req.scan_6ghz = true; + rdev_req->req.first_part = first_part; if (!rdev->wiphy.bands[NL80211_BAND_6GHZ]) return -EOPNOTSUPP; iftd = ieee80211_get_sband_iftype_data(rdev->wiphy.bands[NL80211_BAND_6GHZ], - rdev_req->wdev->iftype); + rdev_req->req.wdev->iftype); if (!iftd || !iftd->he_cap.has_he) return -EOPNOTSUPP; n_channels = rdev->wiphy.bands[NL80211_BAND_6GHZ]->n_channels; - if (rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) { + if (rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) { struct cfg80211_internal_bss *intbss; spin_lock_bh(&rdev->bss_lock); @@ -883,8 +885,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) * This is relevant for ML probe requests when the lower * band APs have not been discovered. */ - if (is_broadcast_ether_addr(rdev_req->bssid) || - !ether_addr_equal(rdev_req->bssid, res->bssid) || + if (is_broadcast_ether_addr(rdev_req->req.bssid) || + !ether_addr_equal(rdev_req->req.bssid, res->bssid) || res->channel->band != NL80211_BAND_6GHZ) continue; @@ -911,13 +913,13 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) spin_unlock_bh(&rdev->bss_lock); } - size = struct_size(request, channels, n_channels); + size = struct_size(request, req.channels, n_channels); offs_ssids = size; - size += sizeof(*request->ssids) * rdev_req->n_ssids; + size += sizeof(*request->req.ssids) * rdev_req->req.n_ssids; offs_6ghz_params = size; - size += sizeof(*request->scan_6ghz_params) * count; + size += sizeof(*request->req.scan_6ghz_params) * count; offs_ies = size; - size += rdev_req->ie_len; + size += rdev_req->req.ie_len; request = kzalloc(size, GFP_KERNEL); if (!request) { @@ -926,26 +928,26 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) } *request = *rdev_req; - request->n_channels = 0; - request->n_6ghz_params = 0; - if (rdev_req->n_ssids) { + request->req.n_channels = 0; + request->req.n_6ghz_params = 0; + if (rdev_req->req.n_ssids) { /* * Add the ssids from the parent scan request to the new * scan request, so the driver would be able to use them * in its probe requests to discover hidden APs on PSC * channels. */ - request->ssids = (void *)request + offs_ssids; - memcpy(request->ssids, rdev_req->ssids, - sizeof(*request->ssids) * request->n_ssids); + request->req.ssids = (void *)request + offs_ssids; + memcpy(request->req.ssids, rdev_req->req.ssids, + sizeof(*request->req.ssids) * request->req.n_ssids); } - request->scan_6ghz_params = (void *)request + offs_6ghz_params; + request->req.scan_6ghz_params = (void *)request + offs_6ghz_params; - if (rdev_req->ie_len) { + if (rdev_req->req.ie_len) { void *ie = (void *)request + offs_ies; - memcpy(ie, rdev_req->ie, rdev_req->ie_len); - request->ie = ie; + memcpy(ie, rdev_req->req.ie, rdev_req->req.ie_len); + request->req.ie = ie; } /* @@ -953,10 +955,12 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) * and at least one of the reported co-located APs with same SSID * indicating that all APs in the same ESS are co-located */ - if (count && request->n_ssids == 1 && request->ssids[0].ssid_len) { + if (count && + request->req.n_ssids == 1 && + request->req.ssids[0].ssid_len) { list_for_each_entry(ap, &coloc_ap_list, list) { if (ap->colocated_ess && - cfg80211_find_ssid_match(ap, request)) { + cfg80211_find_ssid_match(ap, &request->req)) { need_scan_psc = false; break; } @@ -968,51 +972,52 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) * regardless of the collocated APs (PSC channels or all channels * in case that NL80211_SCAN_FLAG_COLOCATED_6GHZ is not set) */ - for (i = 0; i < rdev_req->n_channels; i++) { - if (rdev_req->channels[i]->band == NL80211_BAND_6GHZ && + for (i = 0; i < rdev_req->req.n_channels; i++) { + if (rdev_req->req.channels[i]->band == NL80211_BAND_6GHZ && ((need_scan_psc && - cfg80211_channel_is_psc(rdev_req->channels[i])) || - !(rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))) { - cfg80211_scan_req_add_chan(request, - rdev_req->channels[i], + cfg80211_channel_is_psc(rdev_req->req.channels[i])) || + !(rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))) { + cfg80211_scan_req_add_chan(&request->req, + rdev_req->req.channels[i], false); } } - if (!(rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ)) + if (!(rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ)) goto skip; list_for_each_entry(ap, &coloc_ap_list, list) { bool found = false; struct cfg80211_scan_6ghz_params *scan_6ghz_params = - &request->scan_6ghz_params[request->n_6ghz_params]; + &request->req.scan_6ghz_params[request->req.n_6ghz_params]; struct ieee80211_channel *chan = ieee80211_get_channel(&rdev->wiphy, ap->center_freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED || - !cfg80211_wdev_channel_allowed(rdev_req->wdev, chan)) + !cfg80211_wdev_channel_allowed(rdev_req->req.wdev, chan)) continue; - for (i = 0; i < rdev_req->n_channels; i++) { - if (rdev_req->channels[i] == chan) + for (i = 0; i < rdev_req->req.n_channels; i++) { + if (rdev_req->req.channels[i] == chan) found = true; } if (!found) continue; - if (request->n_ssids > 0 && - !cfg80211_find_ssid_match(ap, request)) + if (request->req.n_ssids > 0 && + !cfg80211_find_ssid_match(ap, &request->req)) continue; - if (!is_broadcast_ether_addr(request->bssid) && - !ether_addr_equal(request->bssid, ap->bssid)) + if (!is_broadcast_ether_addr(request->req.bssid) && + !ether_addr_equal(request->req.bssid, ap->bssid)) continue; - if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid) + if (!request->req.n_ssids && ap->multi_bss && + !ap->transmitted_bssid) continue; - cfg80211_scan_req_add_chan(request, chan, true); + cfg80211_scan_req_add_chan(&request->req, chan, true); memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN); scan_6ghz_params->short_ssid = ap->short_ssid; scan_6ghz_params->short_ssid_valid = ap->short_ssid_valid; @@ -1028,14 +1033,14 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) if (cfg80211_channel_is_psc(chan) && !need_scan_psc) scan_6ghz_params->psc_no_listen = true; - request->n_6ghz_params++; + request->req.n_6ghz_params++; } skip: cfg80211_free_coloc_ap_list(&coloc_ap_list); - if (request->n_channels) { - struct cfg80211_scan_request *old = rdev->int_scan_req; + if (request->req.n_channels) { + struct cfg80211_scan_request_int *old = rdev->int_scan_req; rdev->int_scan_req = request; @@ -1043,7 +1048,7 @@ skip: * If this scan follows a previous scan, save the scan start * info from the first part of the scan */ - if (old) + if (!first_part && !WARN_ON(!old)) rdev->int_scan_req->info = old->info; err = rdev_scan(rdev, request); @@ -1063,35 +1068,39 @@ skip: int cfg80211_scan(struct cfg80211_registered_device *rdev) { - struct cfg80211_scan_request *request; - struct cfg80211_scan_request *rdev_req = rdev->scan_req; + struct cfg80211_scan_request_int *request; + struct cfg80211_scan_request_int *rdev_req = rdev->scan_req; u32 n_channels = 0, idx, i; - if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ)) + if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ)) { + rdev_req->req.first_part = true; return rdev_scan(rdev, rdev_req); + } - for (i = 0; i < rdev_req->n_channels; i++) { - if (rdev_req->channels[i]->band != NL80211_BAND_6GHZ) + for (i = 0; i < rdev_req->req.n_channels; i++) { + if (rdev_req->req.channels[i]->band != NL80211_BAND_6GHZ) n_channels++; } if (!n_channels) - return cfg80211_scan_6ghz(rdev); + return cfg80211_scan_6ghz(rdev, true); - request = kzalloc(struct_size(request, channels, n_channels), + request = kzalloc(struct_size(request, req.channels, n_channels), GFP_KERNEL); if (!request) return -ENOMEM; *request = *rdev_req; - request->n_channels = n_channels; + request->req.n_channels = n_channels; - for (i = idx = 0; i < rdev_req->n_channels; i++) { - if (rdev_req->channels[i]->band != NL80211_BAND_6GHZ) - request->channels[idx++] = rdev_req->channels[i]; + for (i = idx = 0; i < rdev_req->req.n_channels; i++) { + if (rdev_req->req.channels[i]->band != NL80211_BAND_6GHZ) + request->req.channels[idx++] = + rdev_req->req.channels[i]; } - rdev_req->scan_6ghz = false; + rdev_req->req.scan_6ghz = false; + rdev_req->req.first_part = true; rdev->int_scan_req = request; return rdev_scan(rdev, request); } @@ -1099,7 +1108,7 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev) void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message) { - struct cfg80211_scan_request *request, *rdev_req; + struct cfg80211_scan_request_int *request, *rdev_req; struct wireless_dev *wdev; struct sk_buff *msg; #ifdef CONFIG_CFG80211_WEXT @@ -1118,13 +1127,13 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, if (!rdev_req) return; - wdev = rdev_req->wdev; + wdev = rdev_req->req.wdev; request = rdev->int_scan_req ? rdev->int_scan_req : rdev_req; if (wdev_running(wdev) && (rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ) && - !rdev_req->scan_6ghz && !request->info.aborted && - !cfg80211_scan_6ghz(rdev)) + !rdev_req->req.scan_6ghz && !request->info.aborted && + !cfg80211_scan_6ghz(rdev, false)) return; /* @@ -1136,10 +1145,10 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, cfg80211_sme_scan_done(wdev->netdev); if (!request->info.aborted && - request->flags & NL80211_SCAN_FLAG_FLUSH) { + request->req.flags & NL80211_SCAN_FLAG_FLUSH) { /* flush entries from previous scans */ spin_lock_bh(&rdev->bss_lock); - __cfg80211_bss_expire(rdev, request->scan_start); + __cfg80211_bss_expire(rdev, request->req.scan_start); spin_unlock_bh(&rdev->bss_lock); } @@ -1175,13 +1184,16 @@ void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk) void cfg80211_scan_done(struct cfg80211_scan_request *request, struct cfg80211_scan_info *info) { - struct cfg80211_scan_info old_info = request->info; + struct cfg80211_scan_request_int *intreq = + container_of(request, struct cfg80211_scan_request_int, req); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(request->wiphy); + struct cfg80211_scan_info old_info = intreq->info; - trace_cfg80211_scan_done(request, info); - WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req && - request != wiphy_to_rdev(request->wiphy)->int_scan_req); + trace_cfg80211_scan_done(intreq, info); + WARN_ON(intreq != rdev->scan_req && + intreq != rdev->int_scan_req); - request->info = *info; + intreq->info = *info; /* * In case the scan is split, the scan_start_tsf and tsf_bssid should @@ -1189,14 +1201,13 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, * be non zero. */ if (request->scan_6ghz && old_info.scan_start_tsf) { - request->info.scan_start_tsf = old_info.scan_start_tsf; - memcpy(request->info.tsf_bssid, old_info.tsf_bssid, - sizeof(request->info.tsf_bssid)); + intreq->info.scan_start_tsf = old_info.scan_start_tsf; + memcpy(intreq->info.tsf_bssid, old_info.tsf_bssid, + sizeof(intreq->info.tsf_bssid)); } - request->notified = true; - wiphy_work_queue(request->wiphy, - &wiphy_to_rdev(request->wiphy)->scan_done_wk); + intreq->notified = true; + wiphy_work_queue(request->wiphy, &rdev->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -2220,6 +2231,7 @@ cfg80211_get_6ghz_power_type(const u8 *elems, size_t elems_len) return IEEE80211_REG_LPI_AP; case IEEE80211_6GHZ_CTRL_REG_SP_AP: case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD: return IEEE80211_REG_SP_AP; case IEEE80211_6GHZ_CTRL_REG_VLP_AP: return IEEE80211_REG_VLP_AP; @@ -3496,7 +3508,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, struct cfg80211_registered_device *rdev; struct wiphy *wiphy; struct iw_scan_req *wreq = NULL; - struct cfg80211_scan_request *creq; + struct cfg80211_scan_request_int *creq; int i, err, n_channels = 0; enum nl80211_band band; @@ -3526,19 +3538,20 @@ int cfg80211_wext_siwscan(struct net_device *dev, n_channels = ieee80211_get_num_supported_channels(wiphy); } - creq = kzalloc(struct_size(creq, channels, n_channels) + + creq = kzalloc(struct_size(creq, req.channels, n_channels) + sizeof(struct cfg80211_ssid), GFP_ATOMIC); if (!creq) return -ENOMEM; - creq->wiphy = wiphy; - creq->wdev = dev->ieee80211_ptr; + creq->req.wiphy = wiphy; + creq->req.wdev = dev->ieee80211_ptr; /* SSIDs come after channels */ - creq->ssids = (void *)creq + struct_size(creq, channels, n_channels); - creq->n_channels = n_channels; - creq->n_ssids = 1; - creq->scan_start = jiffies; + creq->req.ssids = (void *)creq + + struct_size(creq, req.channels, n_channels); + creq->req.n_channels = n_channels; + creq->req.n_ssids = 1; + creq->req.scan_start = jiffies; /* translate "Scan on frequencies" request */ i = 0; @@ -3554,7 +3567,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* ignore disabled channels */ chan = &wiphy->bands[band]->channels[j]; if (chan->flags & IEEE80211_CHAN_DISABLED || - !cfg80211_wdev_channel_allowed(creq->wdev, chan)) + !cfg80211_wdev_channel_allowed(creq->req.wdev, chan)) continue; /* If we have a wireless request structure and the @@ -3577,7 +3590,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, } wext_freq_found: - creq->channels[i] = &wiphy->bands[band]->channels[j]; + creq->req.channels[i] = + &wiphy->bands[band]->channels[j]; i++; wext_freq_not_found: ; } @@ -3588,28 +3602,30 @@ int cfg80211_wext_siwscan(struct net_device *dev, goto out; } - /* Set real number of channels specified in creq->channels[] */ - creq->n_channels = i; + /* Set real number of channels specified in creq->req.channels[] */ + creq->req.n_channels = i; /* translate "Scan for SSID" request */ if (wreq) { if (wrqu->data.flags & IW_SCAN_THIS_ESSID) { if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) return -EINVAL; - memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); - creq->ssids[0].ssid_len = wreq->essid_len; + memcpy(creq->req.ssids[0].ssid, wreq->essid, + wreq->essid_len); + creq->req.ssids[0].ssid_len = wreq->essid_len; } if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) { - creq->ssids = NULL; - creq->n_ssids = 0; + creq->req.ssids = NULL; + creq->req.n_ssids = 0; } } for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) - creq->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; + creq->req.rates[i] = + (1 << wiphy->bands[i]->n_bitrates) - 1; - eth_broadcast_addr(creq->bssid); + eth_broadcast_addr(creq->req.bssid); scoped_guard(wiphy, &rdev->wiphy) { rdev->scan_req = creq; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index cf998500a965..6d7a7e7f0fc2 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2009, 2020, 2022-2024 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022-2025 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -64,7 +64,7 @@ static void cfg80211_sme_free(struct wireless_dev *wdev) static int cfg80211_conn_scan(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - struct cfg80211_scan_request *request; + struct cfg80211_scan_request_int *request; int n_channels, err; lockdep_assert_wiphy(wdev->wiphy); @@ -77,13 +77,13 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) else n_channels = ieee80211_get_num_supported_channels(wdev->wiphy); - request = kzalloc(sizeof(*request) + sizeof(request->ssids[0]) + - sizeof(request->channels[0]) * n_channels, + request = kzalloc(sizeof(*request) + sizeof(request->req.ssids[0]) + + sizeof(request->req.channels[0]) * n_channels, GFP_KERNEL); if (!request) return -ENOMEM; - request->n_channels = n_channels; + request->req.n_channels = n_channels; if (wdev->conn->params.channel) { enum nl80211_band band = wdev->conn->params.channel->band; struct ieee80211_supported_band *sband = @@ -93,8 +93,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) kfree(request); return -EINVAL; } - request->channels[0] = wdev->conn->params.channel; - request->rates[band] = (1 << sband->n_bitrates) - 1; + request->req.channels[0] = wdev->conn->params.channel; + request->req.rates[band] = (1 << sband->n_bitrates) - 1; } else { int i = 0, j; enum nl80211_band band; @@ -109,26 +109,26 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) channel = &bands->channels[j]; if (channel->flags & IEEE80211_CHAN_DISABLED) continue; - request->channels[i++] = channel; + request->req.channels[i++] = channel; } - request->rates[band] = (1 << bands->n_bitrates) - 1; + request->req.rates[band] = (1 << bands->n_bitrates) - 1; } n_channels = i; } - request->n_channels = n_channels; - request->ssids = (void *)request + - struct_size(request, channels, n_channels); - request->n_ssids = 1; + request->req.n_channels = n_channels; + request->req.ssids = (void *)request + + struct_size(request, req.channels, n_channels); + request->req.n_ssids = 1; - memcpy(request->ssids[0].ssid, wdev->conn->params.ssid, - wdev->conn->params.ssid_len); - request->ssids[0].ssid_len = wdev->conn->params.ssid_len; + memcpy(request->req.ssids[0].ssid, wdev->conn->params.ssid, + wdev->conn->params.ssid_len); + request->req.ssids[0].ssid_len = wdev->conn->params.ssid_len; - eth_broadcast_addr(request->bssid); + eth_broadcast_addr(request->req.bssid); - request->wdev = wdev; - request->wiphy = &rdev->wiphy; - request->scan_start = jiffies; + request->req.wdev = wdev; + request->req.wiphy = &rdev->wiphy; + request->req.scan_start = jiffies; rdev->scan_req = request; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 7e43ab9de923..a07d88d61bec 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -373,7 +373,8 @@ TRACE_EVENT(rdev_return_int, ); TRACE_EVENT(rdev_scan, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_scan_request *request), + TP_PROTO(struct wiphy *wiphy, + struct cfg80211_scan_request_int *request), TP_ARGS(wiphy, request), TP_STRUCT__entry( WIPHY_ENTRY @@ -3716,12 +3717,12 @@ TRACE_EVENT(cfg80211_tdls_oper_request, ); TRACE_EVENT(cfg80211_scan_done, - TP_PROTO(struct cfg80211_scan_request *request, + TP_PROTO(struct cfg80211_scan_request_int *request, struct cfg80211_scan_info *info), TP_ARGS(request, info), TP_STRUCT__entry( __field(u32, n_channels) - __dynamic_array(u8, ie, request ? request->ie_len : 0) + __dynamic_array(u8, ie, request ? request->req.ie_len : 0) __array(u32, rates, NUM_NL80211_BANDS) __field(u32, wdev_id) MAC_ENTRY(wiphy_mac) @@ -3732,16 +3733,16 @@ TRACE_EVENT(cfg80211_scan_done, ), TP_fast_assign( if (request) { - memcpy(__get_dynamic_array(ie), request->ie, - request->ie_len); - memcpy(__entry->rates, request->rates, + memcpy(__get_dynamic_array(ie), request->req.ie, + request->req.ie_len); + memcpy(__entry->rates, request->req.rates, NUM_NL80211_BANDS); - __entry->wdev_id = request->wdev ? - request->wdev->identifier : 0; - if (request->wiphy) + __entry->wdev_id = request->req.wdev ? + request->req.wdev->identifier : 0; + if (request->req.wiphy) MAC_ASSIGN(wiphy_mac, - request->wiphy->perm_addr); - __entry->no_cck = request->no_cck; + request->req.wiphy->perm_addr); + __entry->no_cck = request->req.no_cck; } if (info) { __entry->aborted = info->aborted; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 72c000c0ae5f..9c3acecc14b1 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -34,7 +34,7 @@ #include "xsk.h" #define TX_BATCH_SIZE 32 -#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) +#define MAX_PER_SOCKET_BUDGET 32 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { @@ -300,6 +300,13 @@ static bool xsk_tx_writeable(struct xdp_sock *xs) return true; } +static void __xsk_tx_release(struct xdp_sock *xs) +{ + __xskq_cons_release(xs->tx); + if (xsk_tx_writeable(xs)) + xs->sk.sk_write_space(&xs->sk); +} + static bool xsk_is_bound(struct xdp_sock *xs) { if (READ_ONCE(xs->state) == XSK_BOUND) { @@ -407,11 +414,8 @@ void xsk_tx_release(struct xsk_buff_pool *pool) struct xdp_sock *xs; rcu_read_lock(); - list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { - __xskq_cons_release(xs->tx); - if (xsk_tx_writeable(xs)) - xs->sk.sk_write_space(&xs->sk); - } + list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) + __xsk_tx_release(xs); rcu_read_unlock(); } EXPORT_SYMBOL(xsk_tx_release); @@ -779,10 +783,10 @@ free_err: static int __xsk_generic_xmit(struct sock *sk) { struct xdp_sock *xs = xdp_sk(sk); - u32 max_batch = TX_BATCH_SIZE; bool sent_frame = false; struct xdp_desc desc; struct sk_buff *skb; + u32 max_batch; int err = 0; mutex_lock(&xs->mutex); @@ -796,6 +800,7 @@ static int __xsk_generic_xmit(struct sock *sk) if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; + max_batch = READ_ONCE(xs->max_tx_budget); while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) { if (max_batch-- == 0) { err = -EAGAIN; @@ -858,8 +863,7 @@ static int __xsk_generic_xmit(struct sock *sk) out: if (sent_frame) - if (xsk_tx_writeable(xs)) - sk->sk_write_space(sk); + __xsk_tx_release(xs); mutex_unlock(&xs->mutex); return err; @@ -1437,6 +1441,21 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, mutex_unlock(&xs->mutex); return err; } + case XDP_MAX_TX_SKB_BUDGET: + { + unsigned int budget; + + if (optlen != sizeof(budget)) + return -EINVAL; + if (copy_from_sockptr(&budget, optval, sizeof(budget))) + return -EFAULT; + if (!xs->tx || + budget < TX_BATCH_SIZE || budget > xs->tx->nentries) + return -EACCES; + + WRITE_ONCE(xs->max_tx_budget, budget); + return 0; + } default: break; } @@ -1734,6 +1753,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs = xdp_sk(sk); xs->state = XSK_READY; + xs->max_tx_budget = TX_BATCH_SIZE; mutex_init(&xs->mutex); INIT_LIST_HEAD(&xs->map_list); diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py index c77b9ce75f6d..b5fbb18ccb77 100644 --- a/scripts/gdb/linux/vfs.py +++ b/scripts/gdb/linux/vfs.py @@ -22,7 +22,7 @@ def dentry_name(d): if parent == d or parent == 0: return "" p = dentry_name(d['d_parent']) + "/" - return p + d['d_iname'].string() + return p + d['d_shortname']['string'].string() class DentryName(gdb.Function): """Return string of the full path of a dentry. diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 062453eefc7a..3115558925ac 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -666,6 +666,7 @@ class KernelDoc: (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'), ] # Regexes here are guaranteed to have the end limiter matching diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c index 99006dc4777e..5c9e2d41d900 100644 --- a/sound/isa/ad1816a/ad1816a.c +++ b/sound/isa/ad1816a/ad1816a.c @@ -98,7 +98,7 @@ static int snd_card_ad1816a_pnp(int dev, struct pnp_card_link *card, pdev = pnp_request_card_device(card, id->devs[1].id, NULL); if (pdev == NULL) { mpu_port[dev] = -1; - dev_warn(&pdev->dev, "MPU401 device busy, skipping.\n"); + pr_warn("MPU401 device busy, skipping.\n"); return 0; } diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 08308231b4ed..9a7793eb16e9 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4551,7 +4551,9 @@ HDA_CODEC_ENTRY(0x10de002e, "Tegra186 HDMI/DP1", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de002f, "Tegra194 HDMI/DP2", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de0030, "Tegra194 HDMI/DP3", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de0031, "Tegra234 HDMI/DP", patch_tegra234_hdmi), +HDA_CODEC_ENTRY(0x10de0033, "SoC 33 HDMI/DP", patch_tegra234_hdmi), HDA_CODEC_ENTRY(0x10de0034, "Tegra264 HDMI/DP", patch_tegra234_hdmi), +HDA_CODEC_ENTRY(0x10de0035, "SoC 35 HDMI/DP", patch_tegra234_hdmi), HDA_CODEC_ENTRY(0x10de0040, "GPU 40 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0041, "GPU 41 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0042, "GPU 42 HDMI/DP", patch_nvhdmi), @@ -4590,15 +4592,32 @@ HDA_CODEC_ENTRY(0x10de0097, "GPU 97 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0098, "GPU 98 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0099, "GPU 99 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009a, "GPU 9a HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009b, "GPU 9b HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009c, "GPU 9c HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a1, "GPU a1 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a3, "GPU a3 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a4, "GPU a4 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a5, "GPU a5 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a6, "GPU a6 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a7, "GPU a7 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a8, "GPU a8 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a9, "GPU a9 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00aa, "GPU aa HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ab, "GPU ab HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ad, "GPU ad HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ae, "GPU ae HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00af, "GPU af HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00b0, "GPU b0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00b1, "GPU b1 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c0, "GPU c0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c1, "GPU c1 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c3, "GPU c3 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c4, "GPU c4 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c5, "GPU c5 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x67663d82, "Arise 82 HDMI/DP", patch_gf_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2e1618494c20..060db37eab83 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2656,6 +2656,7 @@ static const struct hda_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x5802, "Clevo X58[05]WN[RST]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), @@ -6609,6 +6610,7 @@ static void alc294_fixup_bass_speaker_15(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { static const hda_nid_t conn[] = { 0x02, 0x03 }; snd_hda_override_conn_list(codec, 0x15, ARRAY_SIZE(conn), conn); + snd_hda_gen_add_micmute_led_cdev(codec, NULL); } } @@ -10737,6 +10739,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), + SND_PCI_QUIRK(0x103c, 0x898a, "HP Pavilion 15-eg100", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8991, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), @@ -10878,6 +10881,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d07, "HP Victus 15-fb2xxx (MB 8D07)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED), @@ -10907,7 +10911,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8dfb, "HP EliteBook 6 G1a 14", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8dfd, "HP EliteBook 6 G1a 16", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8dfe, "HP EliteBook 665 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e12, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), @@ -11026,6 +11032,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606WA", ALC294_FIXUP_BASS_SPEAKER_15), SND_PCI_QUIRK(0x1043, 0x1264, "ASUS UM5606KA", ALC294_FIXUP_BASS_SPEAKER_15), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1e10, "ASUS VivoBook X507UAR", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC), @@ -11034,6 +11041,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1e93, "ASUS ExpertBook B9403CVAR", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1eb3, "ASUS Ally RCLA72", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1ed3, "ASUS HN7306W", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), @@ -11135,6 +11143,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x2624, "Clevo L240TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x28c1, "Clevo V370VND", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x35a1, "Clevo V3[56]0EN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x35b1, "Clevo V3[57]0WN[MNP]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11162,6 +11172,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x51b3, "Clevo NS70AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5700, "Clevo X560WN[RST]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11201,6 +11212,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa741, "Clevo V54x_6x_TNE", ALC245_FIXUP_CLEVO_NOISY_MIC), + SND_PCI_QUIRK(0x1558, 0xa743, "Clevo V54x_6x_TU", ALC245_FIXUP_CLEVO_NOISY_MIC), SND_PCI_QUIRK(0x1558, 0xa763, "Clevo V54x_6x_TU", ALC245_FIXUP_CLEVO_NOISY_MIC), SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11414,6 +11426,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), SND_PCI_QUIRK(0x2782, 0x1407, "Positivo P15X", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), + SND_PCI_QUIRK(0x2782, 0x1409, "Positivo K116J", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x1701, "Infinix Y4 Max", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), diff --git a/sound/pci/hda/tas2781_hda.c b/sound/pci/hda/tas2781_hda.c index 5f1d4b3e9688..34217ce9f28e 100644 --- a/sound/pci/hda/tas2781_hda.c +++ b/sound/pci/hda/tas2781_hda.c @@ -44,7 +44,7 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) TASDEVICE_REG(0, 0x13, 0x70), TASDEVICE_REG(0, 0x18, 0x7c), }; - unsigned int crc, oft; + unsigned int crc, oft, node_num; unsigned char *buf; int i, j, k, l; @@ -80,8 +80,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) dev_err(p->dev, "%s: CRC error\n", __func__); return; } + node_num = tmp_val[1]; - for (j = 0, k = 0; j < tmp_val[1]; j++) { + for (j = 0, k = 0; j < node_num; j++) { oft = j * 6 + 3; if (tmp_val[oft] == TASDEV_UEFI_CALI_REG_ADDR_FLG) { for (i = 0; i < TASDEV_CALIB_N; i++) { @@ -99,8 +100,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) } data[l] = k; + oft++; for (i = 0; i < TASDEV_CALIB_N * 4; i++) - data[l + i] = data[4 * oft + i]; + data[l + i + 1] = data[4 * oft + i]; k++; } } diff --git a/sound/soc/amd/ps/acp63.h b/sound/soc/amd/ps/acp63.h index 85feae45c44c..d7c994e26e4d 100644 --- a/sound/soc/amd/ps/acp63.h +++ b/sound/soc/amd/ps/acp63.h @@ -334,6 +334,8 @@ struct acp_hw_ops { * @addr: pci ioremap address * @reg_range: ACP reigister range * @acp_rev: ACP PCI revision id + * @acp_sw_pad_keeper_en: store acp SoundWire pad keeper enable register value + * @acp_pad_pulldown_ctrl: store acp pad pulldown control register value * @acp63_sdw0-dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire * manager-SW0 instance * @acp63_sdw_dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire @@ -367,6 +369,8 @@ struct acp63_dev_data { u32 addr; u32 reg_range; u32 acp_rev; + u32 acp_sw_pad_keeper_en; + u32 acp_pad_pulldown_ctrl; u16 acp63_sdw0_dma_intr_stat[ACP63_SDW0_DMA_MAX_STREAMS]; u16 acp63_sdw1_dma_intr_stat[ACP63_SDW1_DMA_MAX_STREAMS]; u16 acp70_sdw0_dma_intr_stat[ACP70_SDW0_DMA_MAX_STREAMS]; diff --git a/sound/soc/amd/ps/ps-common.c b/sound/soc/amd/ps/ps-common.c index 1c89fb5fe1da..7b4966b75dc6 100644 --- a/sound/soc/amd/ps/ps-common.c +++ b/sound/soc/amd/ps/ps-common.c @@ -160,6 +160,8 @@ static int __maybe_unused snd_acp63_suspend(struct device *dev) adata = dev_get_drvdata(dev); if (adata->is_sdw_dev) { + adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); adata->sdw_en_stat = check_acp_sdw_enable_status(adata); if (adata->sdw_en_stat) { writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL); @@ -197,6 +199,7 @@ static int __maybe_unused snd_acp63_runtime_resume(struct device *dev) static int __maybe_unused snd_acp63_resume(struct device *dev) { struct acp63_dev_data *adata; + u32 acp_sw_pad_keeper_en; int ret; adata = dev_get_drvdata(dev); @@ -209,6 +212,12 @@ static int __maybe_unused snd_acp63_resume(struct device *dev) if (ret) dev_err(dev, "ACP init failed\n"); + acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en); + if (!acp_sw_pad_keeper_en) { + writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); + } return ret; } @@ -408,6 +417,8 @@ static int __maybe_unused snd_acp70_suspend(struct device *dev) adata = dev_get_drvdata(dev); if (adata->is_sdw_dev) { + adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); adata->sdw_en_stat = check_acp_sdw_enable_status(adata); if (adata->sdw_en_stat) { writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL); @@ -445,6 +456,7 @@ static int __maybe_unused snd_acp70_runtime_resume(struct device *dev) static int __maybe_unused snd_acp70_resume(struct device *dev) { struct acp63_dev_data *adata; + u32 acp_sw_pad_keeper_en; int ret; adata = dev_get_drvdata(dev); @@ -459,6 +471,12 @@ static int __maybe_unused snd_acp70_resume(struct device *dev) if (ret) dev_err(dev, "ACP init failed\n"); + acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en); + if (!acp_sw_pad_keeper_en) { + writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); + } return ret; } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 98022e5fd428..97e340140d0c 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -356,6 +356,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "RB"), + DMI_MATCH(DMI_PRODUCT_NAME, "Nitro ANV15-41"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "83J2"), } @@ -363,6 +370,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83J3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), } diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index d0831d609584..ba653f6ccfae 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -980,7 +980,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) break; default: dev_err(cs35l56_base->dev, "Unknown device %x\n", devid); - return ret; + return -ENODEV; } cs35l56_base->type = devid & 0xFF; diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index 1c9f32e405cf..ba080957e933 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -430,6 +430,7 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, unsigned int read_l, read_r, ctl_l = 0, ctl_r = 0; unsigned int adc_vol_flag = 0; const unsigned int interval_offset = 0xc0; + const unsigned int tendA = 0x200; const unsigned int tendB = 0xa00; if (strstr(ucontrol->id.name, "FU1E Capture Volume") || @@ -439,9 +440,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, regmap_read(rt721->mbq_regmap, mc->reg, &read_l); regmap_read(rt721->mbq_regmap, mc->rreg, &read_r); - if (mc->shift == 8) /* boost gain */ + if (mc->shift == 8) { + /* boost gain */ ctl_l = read_l / tendB; - else { + } else if (mc->shift == 1) { + /* FU33 boost gain */ + if (read_l == 0x8000 || read_l == 0xfe00) + ctl_l = 0; + else + ctl_l = read_l / tendA + 1; + } else { if (adc_vol_flag) ctl_l = mc->max - (((0x1e00 - read_l) & 0xffff) / interval_offset); else @@ -449,9 +457,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, } if (read_l != read_r) { - if (mc->shift == 8) /* boost gain */ + if (mc->shift == 8) { + /* boost gain */ ctl_r = read_r / tendB; - else { /* ADC/DAC gain */ + } else if (mc->shift == 1) { + /* FU33 boost gain */ + if (read_r == 0x8000 || read_r == 0xfe00) + ctl_r = 0; + else + ctl_r = read_r / tendA + 1; + } else { /* ADC/DAC gain */ if (adc_vol_flag) ctl_r = mc->max - (((0x1e00 - read_r) & 0xffff) / interval_offset); else diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 677529916dc0..745532ccbdba 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -517,7 +517,8 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate) regmap_update_bits(asrc->regmap, REG_ASRCTR, ASRCTR_ATSi_MASK(index), ASRCTR_ATS(index)); regmap_update_bits(asrc->regmap, REG_ASRCTR, - ASRCTR_USRi_MASK(index), 0); + ASRCTR_IDRi_MASK(index) | ASRCTR_USRi_MASK(index), + ASRCTR_USR(index)); /* Set the input and output clock sources */ regmap_update_bits(asrc->regmap, REG_ASRCSR, diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index af1a168d35e3..50af6b725670 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -803,13 +803,15 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) * anymore. Add software reset to fix this issue. * This is a hardware bug, and will be fix in the * next sai version. + * + * In consumer mode, this can happen even after a + * single open/close, especially if both tx and rx + * are running concurrently. */ - if (!sai->is_consumer_mode[tx]) { - /* Software Reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); - /* Clear SR bit to finish the reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); - } + /* Software Reset */ + regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); + /* Clear SR bit to finish the reset */ + regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); } static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd, diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 2df7afa2f469..128b6876af83 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -42,6 +42,7 @@ config SND_SOC_INTEL_SOF_NUVOTON_COMMON tristate config SND_SOC_INTEL_SOF_BOARD_HELPERS + select SND_SOC_ACPI_INTEL_MATCH tristate if SND_SOC_INTEL_CATPT diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 81a914bd7ec2..504887505e68 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -783,6 +783,9 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC), SND_PCI_QUIRK(0x1043, 0x1f43, "ASUS Zenbook S16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2347, "Lenovo P16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2348, "Lenovo P16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2349, "Lenovo P1", SOC_SDW_CODEC_MIC), {} }; diff --git a/sound/soc/intel/common/soc-acpi-intel-arl-match.c b/sound/soc/intel/common/soc-acpi-intel-arl-match.c index 73e581e93755..1ad704ca2c5f 100644 --- a/sound/soc/intel/common/soc-acpi-intel-arl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-arl-match.c @@ -468,17 +468,17 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(2), - .links = arl_cs42l43_l2, + .link_mask = BIT(2) | BIT(3), + .links = arl_cs42l43_l2_cs35l56_l3, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l2.tplg", + .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(2) | BIT(3), - .links = arl_cs42l43_l2_cs35l56_l3, + .link_mask = BIT(2), + .links = arl_cs42l43_l2, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg", + .sof_tplg_filename = "sof-arl-cs42l43-l2.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, { diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index e86b4a03dd61..3d9ba13ee1e5 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -186,6 +186,7 @@ config SND_SOC_SM8250 tristate "SoC Machine driver for SM8250 boards" depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK + depends on SND_SOC_QCOM_OFFLOAD_UTILS || !SND_SOC_QCOM_OFFLOAD_UTILS select SND_SOC_QDSP6 select SND_SOC_QCOM_COMMON select SND_SOC_QCOM_SDW diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index bdfe388da198..3b47191ea7a5 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1257,11 +1257,11 @@ static int check_tplg_quirk_mask(struct snd_soc_acpi_mach *mach) return 0; } -static char *remove_file_ext(const char *tplg_filename) +static char *remove_file_ext(struct device *dev, const char *tplg_filename) { char *filename, *tmp; - filename = kstrdup(tplg_filename, GFP_KERNEL); + filename = devm_kstrdup(dev, tplg_filename, GFP_KERNEL); if (!filename) return NULL; @@ -1345,7 +1345,7 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) */ if (!sof_pdata->tplg_filename) { /* remove file extension if it exists */ - tplg_filename = remove_file_ext(mach->sof_tplg_filename); + tplg_filename = remove_file_ext(sdev->dev, mach->sof_tplg_filename); if (!tplg_filename) return NULL; diff --git a/sound/usb/format.c b/sound/usb/format.c index 8cd54f7bf33a..0ee532acbb60 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -310,16 +310,14 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, struct audioformat *fp, unsigned int rate) { - struct usb_interface *iface; struct usb_host_interface *alts; unsigned char *fmt; unsigned int max_rate; - iface = usb_ifnum_to_if(chip->dev, fp->iface); - if (!iface) + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) return true; - alts = &iface->altsetting[fp->altset_idx]; fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_FORMAT_TYPE); if (!fmt) @@ -328,20 +326,20 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, if (fmt[0] == 10) { /* bLength */ max_rate = combine_quad(&fmt[6]); - /* Validate max rate */ - if (max_rate != 48000 && - max_rate != 96000 && - max_rate != 192000 && - max_rate != 384000) { - + switch (max_rate) { + case 48000: + return (rate == 44100 || rate == 48000); + case 96000: + return (rate == 88200 || rate == 96000); + case 192000: + return (rate == 176400 || rate == 192000); + default: usb_audio_info(chip, "%u:%d : unexpected max rate: %u\n", fp->iface, fp->altsetting, max_rate); return true; } - - return rate <= max_rate; } return true; diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 5bc27c82e0af..3543b5a53592 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c @@ -759,7 +759,7 @@ static void qmi_stop_session(void) subs = find_substream(pcm_card_num, info->pcm_dev_num, info->direction); if (!subs || !chip || atomic_read(&chip->shutdown)) { - dev_err(&subs->dev->dev, + dev_err(&uadev[idx].udev->dev, "no sub for c#%u dev#%u dir%u\n", info->pcm_card_num, info->pcm_dev_num, @@ -1360,20 +1360,21 @@ static int prepare_qmi_response(struct snd_usb_substream *subs, if (!uadev[card_num].ctrl_intf) { dev_err(&subs->dev->dev, "audio ctrl intf info not cached\n"); - ret = -ENODEV; - goto err; + return -ENODEV; } ret = uaudio_populate_uac_desc(subs, resp); if (ret < 0) - goto err; + return ret; resp->slot_id = subs->dev->slot_id; resp->slot_id_valid = 1; data = snd_soc_usb_find_priv_data(uaudio_qdev->auxdev->dev.parent); - if (!data) - goto err; + if (!data) { + dev_err(&subs->dev->dev, "No private data found\n"); + return -ENODEV; + } uaudio_qdev->data = data; @@ -1382,7 +1383,7 @@ static int prepare_qmi_response(struct snd_usb_substream *subs, &resp->xhci_mem_info.tr_data, &resp->std_as_data_ep_desc); if (ret < 0) - goto err; + return ret; resp->std_as_data_ep_desc_valid = 1; @@ -1500,7 +1501,6 @@ drop_data_ep: xhci_sideband_remove_endpoint(uadev[card_num].sb, usb_pipe_endpoint(subs->dev, subs->data_endpoint->pipe)); -err: return ret; } diff --git a/sound/usb/stream.c b/sound/usb/stream.c index c1ea8844a46f..aa91d63749f2 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -987,6 +987,8 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, * and request Cluster Descriptor */ wLength = le16_to_cpu(hc_header.wLength); + if (wLength < sizeof(cluster)) + return NULL; cluster = kzalloc(wLength, GFP_KERNEL); if (!cluster) return ERR_PTR(-ENOMEM); diff --git a/tools/arch/loongarch/include/asm/orc_types.h b/tools/arch/loongarch/include/asm/orc_types.h index caf1f71a1057..d5fa98d1d177 100644 --- a/tools/arch/loongarch/include/asm/orc_types.h +++ b/tools/arch/loongarch/include/asm/orc_types.h @@ -34,7 +34,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -53,6 +53,6 @@ struct orc_entry { unsigned int type:3; unsigned int signal:1; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 44f2bb93e7e6..23a062781468 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -79,6 +79,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 #define XDP_OPTIONS 8 +#define XDP_MAX_TX_SKB_BUDGET 9 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 61deb5923067..7529bce174ff 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -762,6 +762,8 @@ class YnlFamily(SpecFamily): decoded = True elif attr_spec.is_auto_scalar: decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order) + if 'enum' in attr_spec: + decoded = self._decode_enum(decoded, attr_spec) elif attr_spec["type"] in NlAttr.type_formats: decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) if 'enum' in attr_spec: diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f23bdda737aa..d967ac001498 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2318,6 +2318,7 @@ static int read_annotate(struct objtool_file *file, for_each_reloc(sec->rsec, reloc) { type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); + type = bswap_if_needed(file->elf, type); offset = reloc->sym->offset + reloc_addend(reloc); insn = find_insn(file, reloc->sym->sec, offset); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 0ced4026ee44..a29de0713f19 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -109,6 +109,8 @@ #include <network_helpers.h> +#define MAX_TX_BUDGET_DEFAULT 32 + static bool opt_verbose; static bool opt_print_tests; static enum test_mode opt_mode = TEST_MODE_ALL; @@ -1091,11 +1093,45 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) return true; } +static u32 load_value(u32 *counter) +{ + return __atomic_load_n(counter, __ATOMIC_ACQUIRE); +} + +static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret) +{ + u32 max_budget = MAX_TX_BUDGET_DEFAULT; + u32 cons, ready_to_send; + int delta; + + cons = load_value(xsk->tx.consumer); + ready_to_send = load_value(xsk->tx.producer) - cons; + *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + + delta = load_value(xsk->tx.consumer) - cons; + /* By default, xsk should consume exact @max_budget descs at one + * send in this case where hitting the max budget limit in while + * loop is triggered in __xsk_generic_xmit(). Please make sure that + * the number of descs to be sent is larger than @max_budget, or + * else the tx.consumer will be updated in xskq_cons_peek_desc() + * in time which hides the issue we try to verify. + */ + if (ready_to_send > max_budget && delta != max_budget) + return false; + + return true; +} + static int kick_tx(struct xsk_socket_info *xsk) { int ret; - ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (xsk->check_consumer) { + if (!kick_tx_with_check(xsk, &ret)) + return TEST_FAILURE; + } else { + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + } if (ret >= 0) return TEST_PASS; if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { @@ -2613,6 +2649,23 @@ static int testapp_adjust_tail_grow_mb(struct test_spec *test) XSK_UMEM__LARGE_FRAME_SIZE * 2); } +static int testapp_tx_queue_consumer(struct test_spec *test) +{ + int nr_packets; + + if (test->mode == TEST_MODE_ZC) { + ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n"); + return TEST_SKIP; + } + + nr_packets = MAX_TX_BUDGET_DEFAULT + 1; + pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE); + test->ifobj_tx->xsk->batch_size = nr_packets; + test->ifobj_tx->xsk->check_consumer = true; + + return testapp_validate_traffic(test); +} + static void run_pkt_test(struct test_spec *test) { int ret; @@ -2723,6 +2776,7 @@ static const struct test_spec tests[] = { {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, + {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer}, }; static void print_tests(void) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 67fc44b2813b..4df3a5d329ac 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -95,6 +95,7 @@ struct xsk_socket_info { u32 batch_size; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; + bool check_consumer; }; struct pkt { diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9984413be9f0..68f8e479ac36 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -461,10 +461,15 @@ TEST_F(coredump, socket_detect_userspace_client) _exit(EXIT_FAILURE); } + ret = read(fd_coredump, &c, 1); + close(fd_coredump); close(fd_server); close(fd_peer_pidfd); close(fd_core_file); + + if (ret < 1) + _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); } self->pid_coredump_server = pid_coredump_server; diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py index db0f723a674b..6ae908bed1a4 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_api.py +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -20,6 +20,38 @@ def _ethtool_create(cfg, act, opts): return int(output.split()[-1]) +def _ethtool_get_cfg(cfg, fl_type, to_nl=False): + descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + if to_nl: + converter = { + "IP SA": "ip-src", + "IP DA": "ip-dst", + "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3", + } + + ret = set() + else: + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + } + + ret = "" + + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + if to_nl: + ret.add(converter[line]) + else: + ret += converter[line] + return ret + + def test_rxfh_indir_ntf(cfg): """ Check that Netlink notifications are generated when RSS indirection @@ -77,6 +109,21 @@ def test_rxfh_indir_ctx_ntf(cfg): ksft_eq(set(ntf["msg"]["indir"]), {1}) +def test_rxfh_fields(cfg): + """ + Test reading Rx Flow Hash over Netlink. + """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + ethnl = EthtoolFamily() + + cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + one = _ethtool_get_cfg(cfg, fl_type, to_nl=True) + ksft_eq(one, cfg_nl["flow-hash"][fl_type], + comment="Config for " + fl_type) + + def main() -> None: """ Ksft boiler plate main """ diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 3fcf85a34596..258af805497b 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,11 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later -SRCIP=192.0.2.1 +SRCIP4="192.0.2.1" +SRCIP6="fc00::1" DSTIF="" # to be populated later -DSTIP=192.0.2.2 +DSTIP4="192.0.2.2" +DSTIP6="fc00::2" PORT="6666" MSG="netconsole selftest" @@ -80,7 +82,23 @@ function configure_ip() { ip link set "${SRCIF}" up } +function select_ipv4_or_ipv6() +{ + local VERSION=${1} + + if [[ "$VERSION" == "ipv6" ]] + then + DSTIP="${DSTIP6}" + SRCIP="${SRCIP6}" + else + DSTIP="${DSTIP4}" + SRCIP="${SRCIP4}" + fi +} + function set_network() { + local IP_VERSION=${1:-"ipv4"} + # setup_ns function is coming from lib.sh setup_ns NAMESPACE @@ -91,6 +109,7 @@ function set_network() { # Link both interfaces back to back link_ifaces + select_ipv4_or_ipv6 "${IP_VERSION}" configure_ip } @@ -119,6 +138,11 @@ function create_dynamic_target() { fi echo 1 > "${NETCONS_PATH}"/enabled + + # This will make sure that the kernel was able to + # load the netconsole driver configuration. The console message + # gets more organized/sequential as well. + sleep 1 } # Generate the command line argument for netconsole following: @@ -179,9 +203,18 @@ function set_user_data() { function listen_port_and_save_to() { local OUTPUT=${1} + local IPVERSION=${2:-"ipv4"} + + if [ "${IPVERSION}" == "ipv4" ] + then + SOCAT_MODE="UDP-LISTEN" + else + SOCAT_MODE="UDP6-LISTEN" + fi + # Just wait for 2 seconds timeout 2 ip netns exec "${NAMESPACE}" \ - socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" + socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" } # Only validate that the message arrived properly @@ -263,8 +296,15 @@ function check_for_dependencies() { exit "${ksft_skip}" fi - if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then - echo "SKIP: IPs already in use. Skipping it" >&2 + REGEXP4="inet.*(${SRCIP4}|${DSTIP4})" + REGEXP6="inet.*(${SRCIP6}|${DSTIP6})" + if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then + echo "SKIP: IPv4s already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then + echo "SKIP: IPv6s already in use. Skipping it" >&2 exit "${ksft_skip}" fi } @@ -278,11 +318,13 @@ function check_for_taskset() { # This is necessary if running multiple tests in a row function pkill_socat() { - PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}" # socat runs under timeout(1), kill it if it is still alive # do not fail if socat doesn't exist anymore set +e - pkill -f "${PROCESS_NAME}" + pkill -f "${PROCESS_NAME4}" + pkill -f "${PROCESS_NAME6}" set -e } @@ -294,3 +336,23 @@ function check_netconsole_module() { exit "${ksft_skip}" fi } + +# A wrapper to translate protocol version to udp version +function wait_for_port() { + local NAMESPACE=${1} + local PORT=${2} + IP_VERSION=${3} + + if [ "${IP_VERSION}" == "ipv6" ] + then + PROTOCOL="udp6" + else + PROTOCOL="udp" + fi + + wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}" + # even after the port is open, let's wait 1 second before writing + # otherwise the packet could be missed, and the test will fail. Happens + # more frequently on IPv6 + sleep 1 +} diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index 40a6ac6191b8..a3446b569976 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -36,30 +36,37 @@ trap cleanup EXIT # Run the test twice, with different format modes for FORMAT in "basic" "extended" do - echo "Running with target mode: ${FORMAT}" - # Create one namespace and two interfaces - set_network - # Create a dynamic target for netconsole - create_dynamic_target "${FORMAT}" - # Only set userdata for extended format - if [ "$FORMAT" == "extended" ] - then - # Set userdata "key" with the "value" value - set_user_data - fi - # Listed for netconsole port inside the namespace and destination interface - listen_port_and_save_to "${OUTPUT_FILE}" & - # Wait for socat to start and listen to the port. - wait_local_port_listen "${NAMESPACE}" "${PORT}" udp - # Send the message - echo "${MSG}: ${TARGET}" > /dev/kmsg - # Wait until socat saves the file to disk - busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + for IP_VERSION in "ipv6" "ipv4" + do + echo "Running with target mode: ${FORMAT} (${IP_VERSION})" + # Create one namespace and two interfaces + set_network "${IP_VERSION}" + # Create a dynamic target for netconsole + create_dynamic_target "${FORMAT}" + # Only set userdata for extended format + if [ "$FORMAT" == "extended" ] + then + # Set userdata "key" with the "value" value + set_user_data + fi + # Listed for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" - # Make sure the message was received in the dst part - # and exit - validate_result "${OUTPUT_FILE}" "${FORMAT}" - cleanup + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat + cleanup + echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2 + done done trap - EXIT diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 7b24ae89594a..776ad658f75e 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -11,3 +11,4 @@ futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock futex_waitv +futex_numa diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a8e85afe9aa..1926ef6b40ab 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void) mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, &mfd); + assert(mfd_buffer != MAP_FAILED); + assert(mfd > 0); } FIXTURE(iommufd) @@ -1746,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -1798,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -2008,6 +2014,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + size_t mmap_buffer_size; unsigned long size; int mmap_flags; void *vrc; @@ -2022,22 +2029,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking) self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); - rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); - if (rc || !self->buffer) { - SKIP(return, "Skipping buffer_size=%lu due to errno=%d", - variant->buffer_size, rc); - } - mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + mmap_buffer_size = variant->buffer_size; if (variant->hugepages) { /* * MAP_POPULATE will cause the kernel to fail mmap if THPs are * not available. */ mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + + /* + * Allocation must be aligned to the HUGEPAGE_SIZE, because the + * following mmap() will automatically align the length to be a + * multiple of the underlying huge page size. Failing to do the + * same at this allocation will result in a memory overwrite by + * the mmap(). + */ + if (mmap_buffer_size < HUGEPAGE_SIZE) + mmap_buffer_size = HUGEPAGE_SIZE; + } + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + mmap_buffer_size, rc); } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); - vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); assert(vrc == self->buffer); @@ -2066,8 +2084,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { - munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); + free(self->buffer); + free(self->bitmap); teardown_iommufd(self->fd, _metadata); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 72f6636e5d90..6e967b58acfd 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) { int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; int mfd = memfd_create("buffer", mfd_flags); + void *buf = MAP_FAILED; if (mfd <= 0) return MAP_FAILED; if (ftruncate(mfd, length)) - return MAP_FAILED; + goto out; *mfd_p = mfd; - return mmap(0, length, prot, flags, mfd, 0); + buf = mmap(0, length, prot, flags, mfd, 0); +out: + if (buf == MAP_FAILED) + close(mfd); + return buf; } /* diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 390ae2d87493..0eb371c62ab8 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -74,6 +74,7 @@ int main(int argc, char *argv[]) int testcase; char test[80]; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); ksft_print_header(); diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index b380e102b22f..169dbd692bf5 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -77,8 +77,11 @@ static void validate_addr(char *ptr, int high_addr) { unsigned long addr = (unsigned long) ptr; - if (high_addr && addr < HIGH_ADDR_MARK) - ksft_exit_fail_msg("Bad address %lx\n", addr); + if (high_addr) { + if (addr < HIGH_ADDR_MARK) + ksft_exit_fail_msg("Bad address %lx\n", addr); + return; + } if (addr > HIGH_ADDR_MARK) ksft_exit_fail_msg("Bad address %lx\n", addr); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c6dd2a335cf4..47c293c2962f 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -34,6 +34,7 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello +scm_inq scm_pidfd scm_rights sk_bind_sendto_listen diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 50584479540b..a4b61c6d0290 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect +TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c new file mode 100644 index 000000000000..9d22561e7b8f --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "../../kselftest_harness.h" + +#define NR_CHUNKS 100 +#define MSG_LEN 256 + +struct scm_inq { + struct cmsghdr cmsghdr; + int inq; +}; + +FIXTURE(scm_inq) +{ + int fd[2]; +}; + +FIXTURE_VARIANT(scm_inq) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(scm_inq, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(scm_inq) +{ + int err; + + err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(scm_inq) +{ + close(self->fd[0]); + close(self->fd[1]); +} + +static void send_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char buf[MSG_LEN] = {}; + int i, ret; + + for (i = 0; i < NR_CHUNKS; i++) { + ret = send(self->fd[0], buf, sizeof(buf), 0); + ASSERT_EQ(sizeof(buf), ret); + } +} + +static void recv_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + struct msghdr msg = {}; + struct iovec iov = {}; + struct scm_inq cmsg; + char buf[MSG_LEN]; + int i, ret; + int inq; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = &cmsg; + msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + for (i = 0; i < NR_CHUNKS; i++) { + memset(buf, 0, sizeof(buf)); + memset(&cmsg, 0, sizeof(cmsg)); + + ret = recvmsg(self->fd[1], &msg, 0); + ASSERT_EQ(MSG_LEN, ret); + ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); + ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + ret = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, ret); + ASSERT_EQ(cmsg.inq, inq); + } +} + +TEST_F(scm_inq, basic) +{ + int err, inq; + + err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int)); + if (variant->type != SOCK_STREAM) { + ASSERT_EQ(-ENOPROTOOPT, -errno); + return; + } + + ASSERT_EQ(0, err); + + err = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, err); + ASSERT_EQ(0, inq); + + send_chunks(_metadata, self); + recv_chunks(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001> + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9aa44d8176d9..5c6851e8d311 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -635,5 +635,42 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "d74b", + "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain", + "category": [ + "qdisc", + "hfsc", + "drr", + "netem", + "blackhole" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole", + "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr" + ] } ] diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh index 6eef282d569f..3ed4c9b2d8c0 100755 --- a/tools/testing/selftests/ublk/test_stress_03.sh +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -32,22 +32,23 @@ _create_backfile 2 128M ublk_io_and_remove 8G -t null -q 4 -z & ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait if _have_feature "AUTO_BUF_REG"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc & ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & + wait fi -wait if _have_feature "PER_IO_DAEMON"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks & ublk_io_and_remove 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & + wait fi -wait _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 803f1e075b62..1e65c5abd85b 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -17,6 +17,7 @@ #include <unistd.h> #include <assert.h> #include <sys/epoll.h> +#include <sys/ioctl.h> #include <sys/mman.h> #include <linux/sockios.h> @@ -101,28 +102,39 @@ void vsock_wait_remote_close(int fd) close(epollfd); } -/* Wait until transport reports no data left to be sent. - * Return false if transport does not implement the unsent_bytes() callback. +/* Wait until ioctl gives an expected int value. + * Return false if the op is not supported. */ -bool vsock_wait_sent(int fd) +bool vsock_ioctl_int(int fd, unsigned long op, int expected) { - int ret, sock_bytes_unsent; + int actual, ret; + char name[32]; + + snprintf(name, sizeof(name), "ioctl(%lu)", op); timeout_begin(TIMEOUT); do { - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + ret = ioctl(fd, op, &actual); if (ret < 0) { if (errno == EOPNOTSUPP) break; - perror("ioctl(SIOCOUTQ)"); + perror(name); exit(EXIT_FAILURE); } - timeout_check("SIOCOUTQ"); - } while (sock_bytes_unsent != 0); + timeout_check(name); + } while (actual != expected); timeout_end(); - return !ret; + return ret >= 0; +} + +/* Wait until transport reports no data left to be sent. + * Return false if transport does not implement the unsent_bytes() callback. + */ +bool vsock_wait_sent(int fd) +{ + return vsock_ioctl_int(fd, SIOCOUTQ, 0); } /* Create socket <type>, bind to <cid, port>. diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index fdd4649fe2d4..142c02a6834a 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -87,6 +87,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port); int vsock_seqpacket_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); +bool vsock_ioctl_int(int fd, unsigned long op, int expected); bool vsock_wait_sent(int fd); void send_buf(int fd, const void *buf, size_t len, int flags, ssize_t expected_ret); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index be6ce764f694..d4517386e551 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -24,6 +24,7 @@ #include <linux/time64.h> #include <pthread.h> #include <fcntl.h> +#include <linux/sockios.h> #include "vsock_test_zerocopy.h" #include "timeout.h" @@ -1307,6 +1308,54 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) close(fd); } +static void test_unread_bytes_server(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int client_fd; + + client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type); + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + send_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf)); + control_writeln("SENT"); + + close(client_fd); +} + +static void test_unread_bytes_client(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int fd; + + fd = vsock_connect(opts->peer_cid, opts->peer_port, type); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + control_expectln("SENT"); + /* The data has arrived but has not been read. The expected is + * MSG_BUF_IOCTL_LEN. + */ + if (!vsock_ioctl_int(fd, SIOCINQ, MSG_BUF_IOCTL_LEN)) { + fprintf(stderr, "Test skipped, SIOCINQ not supported.\n"); + goto out; + } + + recv_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + /* All data has been consumed, so the expected is 0. */ + vsock_ioctl_int(fd, SIOCINQ, 0); + +out: + close(fd); +} + static void test_stream_unsent_bytes_client(const struct test_opts *opts) { test_unsent_bytes_client(opts, SOCK_STREAM); @@ -1327,6 +1376,26 @@ static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts) test_unsent_bytes_server(opts, SOCK_SEQPACKET); } +static void test_stream_unread_bytes_client(const struct test_opts *opts) +{ + test_unread_bytes_client(opts, SOCK_STREAM); +} + +static void test_stream_unread_bytes_server(const struct test_opts *opts) +{ + test_unread_bytes_server(opts, SOCK_STREAM); +} + +static void test_seqpacket_unread_bytes_client(const struct test_opts *opts) +{ + test_unread_bytes_client(opts, SOCK_SEQPACKET); +} + +static void test_seqpacket_unread_bytes_server(const struct test_opts *opts) +{ + test_unread_bytes_server(opts, SOCK_SEQPACKET); +} + #define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) /* This define is the same as in 'include/linux/virtio_vsock.h': * it is used to decide when to send credit update message during @@ -1937,6 +2006,7 @@ static void test_stream_transport_change_client(const struct test_opts *opts) .svm_cid = opts->peer_cid, .svm_port = opts->peer_port, }; + bool send_control = false; int s; s = socket(AF_VSOCK, SOCK_STREAM, 0); @@ -1957,19 +2027,29 @@ static void test_stream_transport_change_client(const struct test_opts *opts) exit(EXIT_FAILURE); } + /* Notify the server if the connect() is successful or the + * receiver connection queue is full, so it will do accept() + * to drain it. + */ + if (!ret || errno == ECONNRESET) + send_control = true; + /* Set CID to 0 cause a transport change. */ sa.svm_cid = 0; - /* Ignore return value since it can fail or not. - * If the previous connect is interrupted while the - * connection request is already sent, the second + /* There is a case where this will not fail: + * if the previous connect() is interrupted while the + * connection request is already sent, this second * connect() will wait for the response. */ - connect(s, (struct sockaddr *)&sa, sizeof(sa)); + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); + if (!ret || errno == ECONNRESET) + send_control = true; close(s); - control_writeulong(CONTROL_CONTINUE); + if (send_control) + control_writeulong(CONTROL_CONTINUE); } while (current_nsec() < tout); @@ -2276,6 +2356,16 @@ static struct test_case test_cases[] = { .run_client = test_stream_transport_change_client, .run_server = test_stream_transport_change_server, }, + { + .name = "SOCK_STREAM ioctl(SIOCINQ) functionality", + .run_client = test_stream_unread_bytes_client, + .run_server = test_stream_unread_bytes_server, + }, + { + .name = "SOCK_SEQPACKET ioctl(SIOCINQ) functionality", + .run_client = test_seqpacket_unread_bytes_client, + .run_server = test_seqpacket_unread_bytes_server, + }, {}, }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eec82775c5bf..222f0e894a0c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2572,6 +2572,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT); if (r) goto out_unlock; + + cond_resched(); } kvm_handle_gfn_range(kvm, &pre_set_range); @@ -2580,6 +2582,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, r = xa_err(xa_store(&kvm->mem_attr_array, i, entry, GFP_KERNEL_ACCOUNT)); KVM_BUG_ON(r, kvm); + cond_resched(); } kvm_handle_gfn_range(kvm, &post_set_range); |
